Python most_similar_author Examples

Programming Language: Python

Namespace/Package Name: papers.name

Method/Function: most_similar_author

Examples at hotexamples.com: 5

Python most_similar_author - 5 examples found. These are the top rated real world Python examples of papers.name.most_similar_author extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def fix_duplicate_orcids(p):
    from collections import defaultdict
    from papers.name import most_similar_author
    counts = defaultdict(int)
    for a in p.authors:
        if a.orcid:
            counts[a.orcid] += 1

    author_name_pairs = p.author_name_pairs()
    best_indices = {}
    for orcid, count in list(counts.items()):
        if count >= 2:
            try:
                n = Name.objects.get(researcher__orcid=orcid)
                best_author = most_similar_author(n.pair, author_name_pairs)
                best_indices[best_author] = orcid
            except Name.DoesNotExist:
                logger.exception('DUPLICATE ORCID WITH NO RESEARCHER_ID')

    logger.info(best_indices)
    for idx, a in enumerate(p.authors):
        if a.orcid and counts[a.orcid] >= 2:
            if best_indices.get(idx) != a.orcid:
                # delete
                p.authors_list[idx]['orcid'] = None
                p.authors_list[idx]['researcher_id'] = None

    if best_indices:
        p.save()
        p.update_index()

Example #2

Show file

File: protocol.py Project: Phyks/dissemin

    def get_form_initial_data(self):
        data = super(HALProtocol, self).get_form_initial_data()

        data['first_name'] = self.user.first_name
        data['last_name'] = self.user.last_name

        # Abstract
        if self.paper.abstract:
            data['abstract'] = kill_html(self.paper.abstract)
        else:
            self.paper.consolidate_metadata(wait=False)

        # Topic
        topic_text = ''
        if 'abstract' in data:
            topic_text = data['abstract']
        else:
            topic_text = self.paper.title
        data['topic'] = self.predict_topic(topic_text)
        if data['topic'] == 'OTHER':
            del data['topic']

        # Depositing author
        most_similar_idx = None
        first, last = (self.user.first_name, self.user.last_name)
        if first and last:
            most_similar_idx = most_similar_author((first,last),
                self.paper.author_name_pairs())
        data['depositing_author'] = most_similar_idx

        return data

Example #3

Show file

File: protocol.py Project: robertdigital/dissemin

    def get_form_initial_data(self, **kwargs):
        data = super(HALProtocol, self).get_form_initial_data(**kwargs)

        data['first_name'] = self.user.first_name
        data['last_name'] = self.user.last_name

        # Abstract
        if self.paper.abstract:
            data['abstract'] = kill_html(self.paper.abstract)
        else:
            self.paper.consolidate_metadata(wait=False)

        # Topic
        topic_text = ''
        if 'abstract' in data:
            topic_text = data['abstract']
        else:
            topic_text = self.paper.title
        data['topic'] = self.predict_topic(topic_text)
        if data['topic'] == 'OTHER':
            del data['topic']

        # Depositing author
        most_similar_idx = None
        first, last = (self.user.first_name, self.user.last_name)
        if first and last:
            most_similar_idx = most_similar_author(
                (first, last), self.paper.author_name_pairs())
        data['depositing_author'] = most_similar_idx

        return data

Example #4

Show file

File: maintenance.py Project: Phyks/dissemin

def fix_duplicate_orcids(p):
    from collections import defaultdict
    from papers.name import most_similar_author
    counts = defaultdict(int)
    for a in p.authors:
        if a.orcid:
            counts[a.orcid] += 1

    author_name_pairs = p.author_name_pairs()
    best_indices = {}
    for orcid, count in list(counts.items()):
        if count >= 2:
            try:
                n = Name.objects.get(researcher__orcid=orcid)
                best_author = most_similar_author(n.pair, author_name_pairs)
                best_indices[best_author] = orcid
            except Name.DoesNotExist:
                logger.exception('DUPLICATE ORCID WITH NO RESEARCHER_ID')

    logger.info(best_indices)
    for idx, a in enumerate(p.authors):
        if a.orcid and counts[a.orcid] >= 2:
            if best_indices.get(idx) != a.orcid:
                # delete
                p.authors_list[idx]['orcid'] = None
                p.authors_list[idx]['researcher_id'] = None

    if best_indices:
        p.save()
        p.update_index()

Example #5

Show file

def affiliate_author_with_orcid(ref_name, orcid, authors, initial_orcids=None):
    """
    Given a reference name and an ORCiD for a researcher, find out which
    author in the list is the most likely to be that author. This function
    is run on author lists of papers listed in the ORCiD record so we expect
    that one of the authors should be the same person as the ORCiD holder.
    This just finds the most similar name and returns the appropriate orcids
    list (None everywhere except for the most similar name where it is the ORCiD).
    """
    max_sim_idx = most_similar_author(ref_name, authors)
    orcids = [None] * len(authors)
    if initial_orcids and len(initial_orcids) == len(authors):
        orcids = initial_orcids
    if max_sim_idx is not None:
        orcids[max_sim_idx] = orcid
    return orcids