Exemple #1
0
    def __init__(self):
        self.similarity_function = [
            sm.BagDistance(),
            sm.Cosine(),
            sm.Dice(),
            sm.Editex(),
            sm.GeneralizedJaccard(),
            sm.Jaccard(),
            sm.Jaro(),
            sm.JaroWinkler(),
            sm.Levenshtein(),
            sm.OverlapCoefficient(),
            sm.TverskyIndex()
        ]

        self.alphanumeric_tokenizer = sm.AlphanumericTokenizer(return_set=True)
Exemple #2
0
df['TfIdf'] = df.apply(
    lambda x: tfidf.get_sim_score(x['aTokens'], x['bTokens']), axis=1)
df.head()

# # Sequence Based Similarities

# In[38]:

aff = sm.Affine()
df['Affine'] = df.apply(
    lambda x: aff.get_raw_score(x['Sequence1'], x['Sequence2']), axis=1)
df.head()

# In[39]:

bd = sm.BagDistance()
df['Bag'] = df.apply(
    lambda x: bd.get_sim_score(x['Sequence1'], x['Sequence2']), axis=1)
df.head()

# In[40]:

ed = sm.Editex()
df['Editex'] = df.apply(
    lambda x: ed.get_sim_score(x['Sequence1'], x['Sequence2']), axis=1)
df.head()

# In[41]:

jaro = sm.Jaro()
df['Jaro'] = df.apply(