예제 #1
0
 def ClusterTweetSim(self, cluster, tweet):
     tweet_txt = tweet.text
     min_similariy = 1.0
     total_sim = 0
     for index, row in cluster.data.iterrows():
         curr_sim = Cosine_Sim.get_cosine(tweet_txt, row['tweet_text'])
         if round(curr_sim, 2) >= 0.99:  # duplicate tweet
             return 1  # return once you find the duplicate
         else:
             total_sim = total_sim + curr_sim
     #average similarity
     min_similariy = (total_sim * 1.0 / len(cluster.data.index))
     return round(min_similariy, 2)