Esempio n. 1
0
def affinity(X):
    """Compute pairwise distances between (author, affiliation) tuples.

    Note that this function is a heuristic. It should ideally be replaced
    by a more robust distance function, e.g. using a model learned over
    pairs of tuples.
    """
    distances = np.zeros((len(X), len(X)), dtype=np.float)

    for i, j in zip(*np.triu_indices(len(X), k=1)):
        name_i = normalize_name(X[i, 0])
        aff_i = X[i, 1]
        initials_i = name_initials(name_i)
        name_j = normalize_name(X[j, 0])
        aff_j = X[j, 1]
        initials_j = name_initials(name_j)

        # Names and affiliations match
        if name_i == name_j and aff_i == aff_j:
            distances[i, j] = 0.0

        # Compatible initials and affiliations match
        elif (len(initials_i | initials_j) == max(len(initials_i),
                                                  len(initials_j))
              and aff_i == aff_j and aff_i != ""):
            distances[i, j] = 0.0

        # Initials are not compatible
        elif (len(initials_i | initials_j) != max(len(initials_i),
                                                  len(initials_j))):
            distances[i, j] = 1.0

        # We dont know
        else:
            distances[i, j] = 0.5

    distances += distances.T
    return distances
Esempio n. 2
0
def affinity(X):
    """Compute pairwise distances between (author, affiliation) tuples.

    Note that this function is a heuristic. It should ideally be replaced
    by a more robust distance function, e.g. using a model learned over
    pairs of tuples.
    """
    distances = np.zeros((len(X), len(X)), dtype=np.float)

    for i, j in zip(*np.triu_indices(len(X), k=1)):
        name_i = normalize_name(X[i, 0])
        aff_i = X[i, 1]
        initials_i = name_initials(name_i)
        name_j = normalize_name(X[j, 0])
        aff_j = X[j, 1]
        initials_j = name_initials(name_j)

        # Names and affiliations match
        if (name_i == name_j and aff_i == aff_j):
            distances[i, j] = 0.0

        # Compatible initials and affiliations match
        elif (len(initials_i | initials_j) == max(len(initials_i),
                                                  len(initials_j)) and
              aff_i == aff_j and aff_i != ""):
            distances[i, j] = 0.0

        # Initials are not compatible
        elif (len(initials_i | initials_j) != max(len(initials_i),
                                                  len(initials_j))):
            distances[i, j] = 1.0

        # We dont know
        else:
            distances[i, j] = 0.5

    distances += distances.T
    return distances
Esempio n. 3
0
def get_author_initials(s):
    """Get author initials from the signature.

    Parameters
    ----------
    :param s: dict
        Signature

    Returns
    -------
    :returns: string
        Initials, not separated
    """
    v = s["author_name"]
    v = v if v else ""
    v = "".join(name_initials(v))
    return v
Esempio n. 4
0
def get_author_initials(s):
    """Get author initials from the signature.

    Parameters
    ----------
    :param s: dict
        Signature

    Returns
    -------
    :returns: string
        Initials, not separated
    """
    v = s["author_name"]
    v = v if v else ""
    v = "".join(name_initials(v))
    return v
Esempio n. 5
0
def get_author_initials(s):
    v = s["author_name"]
    v = v if v else ""
    v = "".join(name_initials(v))
    return v