コード例 #1
0
def crosslang_record_linkage_baseline(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    return sim_func.record_linkage_baseline(s1, s2)
コード例 #2
0
def crosslang_uwn_sense_similarity_lin(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    return sim_func.sense_similarity_lin(s1, s2)
コード例 #3
0
def crosslang_max_sim(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    return sim_func.get_max_sim(s1, s2)
コード例 #4
0
def crosslang_uwn_common_sense_weights(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    return sim_func.common_sense_weights(s1, s2)
コード例 #5
0
def crosslang_greedy_aligned_words(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    return sim_func.greedy_aligned_words(s1, s2)
コード例 #6
0
def crosslang_weighted_aligned_words_senses_jaccard(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    return sim_func.weighted_aligned_words_senses_jaccard(s1, s2)
コード例 #7
0
def smith_waterman(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    measure = sm.SmithWaterman()
    return measure.get_raw_score(s1, s2)
コード例 #8
0
def needleman_wunsch(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    measure = sm.NeedlemanWunsch()
    return measure.get_raw_score(s1, s2)
コード例 #9
0
def jaro_winkler(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    measure = sm.JaroWinkler()
    return measure.get_raw_score(s1, s2)
コード例 #10
0
def lev_sim(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    measure = sm.Levenshtein()
    return measure.get_sim_score(s1, s2)
コード例 #11
0
def hamming_sim(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    measure = sm.HammingDistance()
    return measure.get_sim_score(s1, s2)
コード例 #12
0
def affine(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    measure = sm.Affine()
    return measure.get_raw_score(s1, s2)
コード例 #13
0
    def tok_qgram(s):
        # check if the input is of type base string
        if pd.isnull(s):
            return s

        s = helper.convert_to_str_unicode(s)

        measure = sm.QgramTokenizer(qval=q)
        return measure.tokenize(s)
コード例 #14
0
    def tok_delim(s):
        # check if the input is of type base string
        if pd.isnull(s):
            return s
        # Remove non ascii  characters. Note: This should be fixed in the
        # next version.
        # s = remove_non_ascii(s)

        s = helper.convert_to_str_unicode(s)

        # Initialize the tokenizer measure object
        measure = sm.DelimiterTokenizer(delim_set=[d])
        # Call the function that will tokenize the input string.
        return measure.tokenize(s)