def crosslang_record_linkage_baseline(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) return sim_func.record_linkage_baseline(s1, s2)
def crosslang_uwn_sense_similarity_lin(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) return sim_func.sense_similarity_lin(s1, s2)
def crosslang_max_sim(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) return sim_func.get_max_sim(s1, s2)
def crosslang_uwn_common_sense_weights(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) return sim_func.common_sense_weights(s1, s2)
def crosslang_greedy_aligned_words(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) return sim_func.greedy_aligned_words(s1, s2)
def crosslang_weighted_aligned_words_senses_jaccard(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) return sim_func.weighted_aligned_words_senses_jaccard(s1, s2)
def smith_waterman(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) measure = sm.SmithWaterman() return measure.get_raw_score(s1, s2)
def needleman_wunsch(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) measure = sm.NeedlemanWunsch() return measure.get_raw_score(s1, s2)
def jaro_winkler(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) measure = sm.JaroWinkler() return measure.get_raw_score(s1, s2)
def lev_sim(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) measure = sm.Levenshtein() return measure.get_sim_score(s1, s2)
def hamming_sim(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) measure = sm.HammingDistance() return measure.get_sim_score(s1, s2)
def affine(s1, s2): if s1 is None or s2 is None: return pd.np.NaN if pd.isnull(s1) or pd.isnull(s2): return pd.np.NaN s1 = helper.convert_to_str_unicode(s1) s2 = helper.convert_to_str_unicode(s2) measure = sm.Affine() return measure.get_raw_score(s1, s2)
def tok_qgram(s): # check if the input is of type base string if pd.isnull(s): return s s = helper.convert_to_str_unicode(s) measure = sm.QgramTokenizer(qval=q) return measure.tokenize(s)
def tok_delim(s): # check if the input is of type base string if pd.isnull(s): return s # Remove non ascii characters. Note: This should be fixed in the # next version. # s = remove_non_ascii(s) s = helper.convert_to_str_unicode(s) # Initialize the tokenizer measure object measure = sm.DelimiterTokenizer(delim_set=[d]) # Call the function that will tokenize the input string. return measure.tokenize(s)