def affinity(X): """Compute pairwise distances between (author, affiliation) tuples. Note that this function is a heuristic. It should ideally be replaced by a more robust distance function, e.g. using a model learned over pairs of tuples. """ distances = np.zeros((len(X), len(X)), dtype=np.float) for i, j in zip(*np.triu_indices(len(X), k=1)): name_i = normalize_name(X[i, 0]) aff_i = X[i, 1] initials_i = name_initials(name_i) name_j = normalize_name(X[j, 0]) aff_j = X[j, 1] initials_j = name_initials(name_j) # Names and affiliations match if name_i == name_j and aff_i == aff_j: distances[i, j] = 0.0 # Compatible initials and affiliations match elif (len(initials_i | initials_j) == max(len(initials_i), len(initials_j)) and aff_i == aff_j and aff_i != ""): distances[i, j] = 0.0 # Initials are not compatible elif (len(initials_i | initials_j) != max(len(initials_i), len(initials_j))): distances[i, j] = 1.0 # We dont know else: distances[i, j] = 0.5 distances += distances.T return distances
def affinity(X): """Compute pairwise distances between (author, affiliation) tuples. Note that this function is a heuristic. It should ideally be replaced by a more robust distance function, e.g. using a model learned over pairs of tuples. """ distances = np.zeros((len(X), len(X)), dtype=np.float) for i, j in zip(*np.triu_indices(len(X), k=1)): name_i = normalize_name(X[i, 0]) aff_i = X[i, 1] initials_i = name_initials(name_i) name_j = normalize_name(X[j, 0]) aff_j = X[j, 1] initials_j = name_initials(name_j) # Names and affiliations match if (name_i == name_j and aff_i == aff_j): distances[i, j] = 0.0 # Compatible initials and affiliations match elif (len(initials_i | initials_j) == max(len(initials_i), len(initials_j)) and aff_i == aff_j and aff_i != ""): distances[i, j] = 0.0 # Initials are not compatible elif (len(initials_i | initials_j) != max(len(initials_i), len(initials_j))): distances[i, j] = 1.0 # We dont know else: distances[i, j] = 0.5 distances += distances.T return distances
def get_author_initials(s): """Get author initials from the signature. Parameters ---------- :param s: dict Signature Returns ------- :returns: string Initials, not separated """ v = s["author_name"] v = v if v else "" v = "".join(name_initials(v)) return v
def get_author_initials(s): v = s["author_name"] v = v if v else "" v = "".join(name_initials(v)) return v