def ClusterTweetSim(self, cluster, tweet): tweet_txt = tweet.text min_similariy = 1.0 total_sim = 0 for index, row in cluster.data.iterrows(): curr_sim = Cosine_Sim.get_cosine(tweet_txt, row['tweet_text']) if round(curr_sim, 2) >= 0.99: # duplicate tweet return 1 # return once you find the duplicate else: total_sim = total_sim + curr_sim #average similarity min_similariy = (total_sim * 1.0 / len(cluster.data.index)) return round(min_similariy, 2)