def hamming_sim(s1, s2):
    """
    This function computes the Hamming similarity between the two input
    strings.

    Args:
        s1,s2 (string): The input strings for which the similarity measure should
            be computed.

    Returns:
        The Hamming similarity if both the strings are not missing (i.e NaN),
        else  returns NaN.
    """

    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN
    # if isinstance(s1, six.string_types):
    #     s1 = gh.remove_non_ascii(s1)
    # if isinstance(s2, six.string_types):
    #     s2 = gh.remove_non_ascii(s2)
    # Create the similarity measure object
    measure = sm.HammingDistance()
    if not (isinstance(s1, six.string_types) or isinstance(s1, bytes)):
        s1 = str(s1)

    if not (isinstance(s2, six.string_types) or isinstance(s2, bytes)):
        s2 = str(s2)

    # Call the function to compute the similarity score.
    return measure.get_sim_score(s1, s2)
Example #2
0
def hamming_sim(s1, s2):
    """
    This function computes the Hamming similarity between the two input
    strings.

    Args:
        s1,s2 (string): The input strings for which the similarity measure should
            be computed.

    Returns:
        The Hamming similarity if both the strings are not missing (i.e NaN),
        else  returns NaN.

    Examples:
        >>> import py_entitymatching as em
        >>> em.hamming_sim('alex', 'alxe')
        0.5
        >>> em.hamming_sim(None, 'alex')
        nan

    """

    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    # Create the similarity measure object
    measure = sm.HammingDistance()

    s1 = gh.convert_to_str_unicode(s1)
    s2 = gh.convert_to_str_unicode(s2)

    # Call the function to compute the similarity score.
    return measure.get_sim_score(s1, s2)
Example #3
0
 def hamming_score(self, str_pair, sim_score=True):
     """
     calculate hamming similarity between two strings
     :return: similarity score or raw score
     """
     s1, s2 = self._check_input(str_pair)
     if len(s1) != len(s2):
         return 0
     hamming = sm.HammingDistance()
     return hamming.get_sim_score(
         s1, s2) if sim_score else hamming.get_raw_score(s1, s2)
def hamming_sim(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    measure = sm.HammingDistance()
    return measure.get_sim_score(s1, s2)
Example #5
0
def hamming_sim(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN
    # if isinstance(s1, six.string_types):
    #     s1 = gh.remove_non_ascii(s1)
    # if isinstance(s2, six.string_types):
    #     s2 = gh.remove_non_ascii(s2)
    # Create the similarity measure object
    measure = sm.HammingDistance()
    if not(isinstance(s1, six.string_types) or isinstance(s1, bytes)):
        s1 = str(s1)

    if not(isinstance(s2, six.string_types) or isinstance(s2, bytes)):
        s2 = str(s2)

    # Call the function to compute the similarity score.
    return measure.get_sim_score(s1, s2)
Example #6
0
def hamming_dist(s1, s2):
    """
    This function computes the Hamming distance between the two input
    strings.

    Args:
        s1,s2 (string): The input strings for which the similarity measure should
            be computed.

    Returns:
        The Hamming distance if both the strings are not missing (i.e NaN),
        else  returns NaN.

    Examples:
        >>> import py_entitymatching as em
        >>> em.hamming_dist('alex', 'john')
        4
        >>> em.hamming_dist(None, 'john')
        nan


    """

    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN
    # if isinstance(s1, six.string_types):
    #     s1 = gh.remove_non_ascii(s1)
    # if isinstance(s2, six.string_types):
    #     s2 = gh.remove_non_ascii(s2)
    # Create the similarity measure object
    measure = sm.HammingDistance()

    if not (isinstance(s1, six.string_types) or isinstance(s1, bytes)):
        s1 = str(s1)

    if not (isinstance(s2, six.string_types) or isinstance(s2, bytes)):
        s2 = str(s2)

    # Call the function to compute the distance
    return measure.get_raw_score(s1, s2)