Example #1
0
def fix_duplicate_orcids(p):
    from collections import defaultdict
    from papers.name import most_similar_author
    counts = defaultdict(int)
    for a in p.authors:
        if a.orcid:
            counts[a.orcid] += 1

    author_name_pairs = p.author_name_pairs()
    best_indices = {}
    for orcid, count in list(counts.items()):
        if count >= 2:
            try:
                n = Name.objects.get(researcher__orcid=orcid)
                best_author = most_similar_author(n.pair, author_name_pairs)
                best_indices[best_author] = orcid
            except Name.DoesNotExist:
                logger.exception('DUPLICATE ORCID WITH NO RESEARCHER_ID')

    logger.info(best_indices)
    for idx, a in enumerate(p.authors):
        if a.orcid and counts[a.orcid] >= 2:
            if best_indices.get(idx) != a.orcid:
                # delete
                p.authors_list[idx]['orcid'] = None
                p.authors_list[idx]['researcher_id'] = None

    if best_indices:
        p.save()
        p.update_index()
Example #2
0
    def get_form_initial_data(self):
        data = super(HALProtocol, self).get_form_initial_data()

        data['first_name'] = self.user.first_name
        data['last_name'] = self.user.last_name

        # Abstract
        if self.paper.abstract:
            data['abstract'] = kill_html(self.paper.abstract)
        else:
            self.paper.consolidate_metadata(wait=False)

        # Topic
        topic_text = ''
        if 'abstract' in data:
            topic_text = data['abstract']
        else:
            topic_text = self.paper.title
        data['topic'] = self.predict_topic(topic_text)
        if data['topic'] == 'OTHER':
            del data['topic']

        # Depositing author
        most_similar_idx = None
        first, last = (self.user.first_name, self.user.last_name)
        if first and last:
            most_similar_idx = most_similar_author((first,last),
                self.paper.author_name_pairs())
        data['depositing_author'] = most_similar_idx

        return data
Example #3
0
    def get_form_initial_data(self, **kwargs):
        data = super(HALProtocol, self).get_form_initial_data(**kwargs)

        data['first_name'] = self.user.first_name
        data['last_name'] = self.user.last_name

        # Abstract
        if self.paper.abstract:
            data['abstract'] = kill_html(self.paper.abstract)
        else:
            self.paper.consolidate_metadata(wait=False)

        # Topic
        topic_text = ''
        if 'abstract' in data:
            topic_text = data['abstract']
        else:
            topic_text = self.paper.title
        data['topic'] = self.predict_topic(topic_text)
        if data['topic'] == 'OTHER':
            del data['topic']

        # Depositing author
        most_similar_idx = None
        first, last = (self.user.first_name, self.user.last_name)
        if first and last:
            most_similar_idx = most_similar_author(
                (first, last), self.paper.author_name_pairs())
        data['depositing_author'] = most_similar_idx

        return data
Example #4
0
def fix_duplicate_orcids(p):
    from collections import defaultdict
    from papers.name import most_similar_author
    counts = defaultdict(int)
    for a in p.authors:
        if a.orcid:
            counts[a.orcid] += 1

    author_name_pairs = p.author_name_pairs()
    best_indices = {}
    for orcid, count in list(counts.items()):
        if count >= 2:
            try:
                n = Name.objects.get(researcher__orcid=orcid)
                best_author = most_similar_author(n.pair, author_name_pairs)
                best_indices[best_author] = orcid
            except Name.DoesNotExist:
                logger.exception('DUPLICATE ORCID WITH NO RESEARCHER_ID')

    logger.info(best_indices)
    for idx, a in enumerate(p.authors):
        if a.orcid and counts[a.orcid] >= 2:
            if best_indices.get(idx) != a.orcid:
                # delete
                p.authors_list[idx]['orcid'] = None
                p.authors_list[idx]['researcher_id'] = None

    if best_indices:
        p.save()
        p.update_index()
Example #5
0
def affiliate_author_with_orcid(ref_name, orcid, authors, initial_orcids=None):
    """
    Given a reference name and an ORCiD for a researcher, find out which
    author in the list is the most likely to be that author. This function
    is run on author lists of papers listed in the ORCiD record so we expect
    that one of the authors should be the same person as the ORCiD holder.
    This just finds the most similar name and returns the appropriate orcids
    list (None everywhere except for the most similar name where it is the ORCiD).
    """
    max_sim_idx = most_similar_author(ref_name, authors)
    orcids = [None] * len(authors)
    if initial_orcids and len(initial_orcids) == len(authors):
        orcids = initial_orcids
    if max_sim_idx is not None:
        orcids[max_sim_idx] = orcid
    return orcids