def compare(self, text_a, text_b): texta = self.split(self.lower(self.remove_punctuation(self.remove_spec_chars(text_a)))) textb = self.split(self.lower(self.remove_punctuation(self.remove_spec_chars(text_b)))) stemmer = Stemmer() texta = stemmer.stem_words(texta) textb = stemmer.stem_words(textb) set_a = set(texta) set_b = set(textb) jaccard_similarity_result = len(set_a & set_b) / len(set_a | set_b) return jaccard_similarity_result
def compare(self, text_a, text_b): texta = self.split( self.lower(self.remove_punctuation( self.remove_spec_chars(text_a)))) textb = self.split( self.lower(self.remove_punctuation( self.remove_spec_chars(text_b)))) texta = self.remove_stopwords(texta) textb = self.remove_stopwords(textb) var_1 = text_a var_2 = text_b stemmer = Stemmer() texta = stemmer.stem_words(texta) textb = stemmer.stem_words(textb) general_set = set() for i in texta: general_set.add(i) for i in textb: general_set.add(i) map_a = {} map_b = {} for i in general_set: map_a[i] = 0 map_b[i] = 0 for i in texta: map_a[i] = map_a[i] + 1 for i in textb: map_b[i] = map_b[i] + 1 ab = 0 a = 0 b = 0 for i in general_set: ab += map_a[i] * map_b[i] for i in general_set: a += map_a[i]**2 for i in general_set: b += map_b[i]**2 cosine_similarity_coefficient = ab / (math.sqrt(a) * math.sqrt(b)) return cosine_similarity_coefficient
def compare(self, text_a, text_b): texta = self.split( self.lower(self.remove_punctuation( self.remove_spec_chars(text_a)))) textb = self.split( self.lower(self.remove_punctuation( self.remove_spec_chars(text_b)))) texta = self.remove_stopwords(texta) textb = self.remove_stopwords(textb) stemmer = Stemmer() texta = stemmer.stem_words(texta) textb = stemmer.stem_words(textb) general_set = set() for i in texta: general_set.add(i) for i in textb: general_set.add(i) map_a = {} map_b = {} for i in general_set: map_a[i] = 0 map_b[i] = 0 for i in texta: map_a[i] = map_a[i] + 1 for i in textb: map_b[i] = map_b[i] + 1 ab = 0 a = 0 b = 0 for i in general_set: if map_a[i] > 0 and map_b[i] > 0: ab += 1 if map_a[i] > 0: a += 1 if map_b[i] > 0: b += 1 dice_similarity_coefficient = (2 * ab) / (a + b) return dice_similarity_coefficient
def compare(self, text_a, text_b): texta = self.split( self.lower(self.remove_punctuation( self.remove_spec_chars(text_a)))) textb = self.split( self.lower(self.remove_punctuation( self.remove_spec_chars(text_b)))) stemmer = Stemmer() texta = stemmer.stem_words(texta) textb = stemmer.stem_words(textb) set_a = set(texta) set_b = set(textb) matching_coefficient = len(set_a & set_b) / (min( len(set_a), len(set_b))) return matching_coefficient