Ejemplo n.º 1
0
def give_cluster(url):
  try:
      #print('URL name is :',url)
      # for key in final_dict.keys():
      #   for value in final_dict[key]:
      #     if(url==value):
      #       return key

      content = preprocess(get_text_content(url))
      #print("line 1")
      new_url_vector=sent_vectorizer(content,modelg)
      #print("line 2")
      #print(kmeans.predict([new_url_vector])[0])
      return kmeans.predict([new_url_vector])[0]
  except Exception as e:
      return e
Ejemplo n.º 2
0
def giveUrlInfo(url):
  try:
      urlInfo = checkUrlInDb(url)
      if(len(urlInfo['new_url_vector'])!=0):
        print("url in databse")
        cluster_no = urlInfo['cluster_no']
        new_url_vector = urlInfo['new_url_vector'] 
      else:
        print("url not in database")
        content = preprocess(get_text_content(url))
        new_url_vector=sent_vectorizer(content,modelg)
        cluster_no=kmeans.predict([new_url_vector])[0]
        print("cluster",cluster_no)
      # print("cluster_no")
      # print(new_url_vector)  
      return dict({"cluster_no":cluster_no,"urlvector":new_url_vector})
  except Exception as e:
      print('giveUrlInfo error',e)
      return e
Ejemplo n.º 3
0
def getVectorOfUrl(url):
  try:
      content = preprocess(get_text_content(url))
      return sent_vectorizer(content,modelg)
  except Exception as e:
      return e