Example #1
0
 def test_affiliation_is_greater_partial_order(self):
     for a, b in [(None, None), (None, 'Cambridge'), ('0000-0002-8612-8827', 'ENS'),
                  ('University of Oxford, Oxfordshire', '0000-0001-5892-7431')]:
         self.assertFalse(affiliation_is_greater(a, b) and
                          affiliation_is_greater(b, a))
         self.assertFalse(affiliation_is_greater(a, a))
         self.assertFalse(affiliation_is_greater(b, b))
Example #2
0
    def update_author_names(self, new_author_names, new_affiliations=None):
        """
        Improves the current list of authors by considering a new list of author names.
        Missing authors are added, and names are unified.
        If affiliations are provided, they will replace the old ones if they are
        more informative.

        :param new_author_names: list of Name instances (the order matters)
        :param new_affiliations: (optional) list of affiliation strings for the new author names.
        """

        if new_affiliations is None:
            new_affiliations = [None] * len(new_author_names)
        assert len(new_author_names) == len(new_affiliations)
        if hasattr(self, 'authors'):
            del self.authors
        old_authors = list(self.authors)

        # Invalidate cached properties
        if hasattr(self, 'interesting_authors'):
            del self.interesting_authors

        old_names = map(lambda a: (a.name.first, a.name.last), old_authors)
        unified_names = unify_name_lists(old_names, new_author_names)
        seen_old_names = set()
        for i, (new_name, (idx, new_idx)) in enumerate(unified_names):
            if idx is not None:  # Updating the name of an existing author
                seen_old_names.add(idx)
                author = old_authors[idx]
                if new_name is None:
                    # Delete that author, it was pruned because it already
                    # appears elsewhere
                    if author.id is not None:
                        author.delete()
                    continue
                fields = []
                if idx != i:
                    author.position = i
                    fields.append('position')
                if new_name != (author.name.first, author.name.last):
                    name = Name.lookup_name(new_name)
                    name.save()
                    author.name = name
                    fields.append('name')
                if new_idx is not None and affiliation_is_greater(
                        new_affiliations[new_idx], author.affiliation):
                    author.affiliation = new_affiliations[new_idx]
                    fields.append('affiliation')
                    author.update_name_variants_if_needed()
                if fields:
                    author.name.save_if_not_saved()
                    author.save()
            elif new_name is not None:  # Creating a new author
                name = Name.lookup_name(new_name)
                name.save()
                author = Author(paper=self,
                                name=name,
                                position=i,
                                affiliation=new_affiliations[new_idx])
                author.save()

        # Just in case unify_name_lists pruned authors without telling us…
        for idx, author in enumerate(old_authors):
            if idx not in seen_old_names:
                print("** Deleting author %d **" % author.pk)
                author.delete()

        # Invalidate our local cache
        if hasattr(self, 'authors'):
            del self.authors
Example #3
0
 def test_affiliation_is_greater(self):
     self.assertEqual(affiliation_is_greater(None, None), False)
     self.assertEqual(affiliation_is_greater(None, 'UPenn'), False)
     self.assertEqual(affiliation_is_greater('UPenn', None), True)
     self.assertEqual(affiliation_is_greater('0000-0001-8633-6098', 'Ecole normale superieure, Paris'), True)
     self.assertEqual(affiliation_is_greater('Ecole normale superieure', 'Upenn'), True)
Example #4
0
    def save_doi_metadata(self,
                          metadata,
                          extra_affiliations=None,
                          allow_unknown_authors=False):
        """
        Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication

        :param extra_affiliations: an optional affiliations list, which will be unified
            with the affiliations extracted from the metadata. This is useful for the ORCID interface.
        :param allow_unknown_authors: create the paper even if no author matches our researchers
        :returns: the paper, created if needed
        """
        # Normalize metadata
        if metadata is None or type(metadata) != dict:
            if metadata is not None:
                print "WARNING: Invalid metadata: type is " + str(
                    type(metadata))
                print "The doi proxy is doing something nasty!"
            raise ValueError('Invalid metadata format, expecting a dict')
        if not 'author' in metadata:
            raise ValueError('No author provided')

        if not 'title' in metadata or not metadata['title']:
            raise ValueError('No title')

        # the upstream function ensures that there is a non-empty title
        if not 'DOI' in metadata or not metadata['DOI']:
            raise ValueError("No DOI, skipping")
        doi = to_doi(metadata['DOI'])

        pubdate = get_publication_date(metadata)

        if pubdate is None:
            raise ValueError('No pubdate')

        title = metadata['title']
        # CrossRef metadata stores titles in lists
        if type(title) == list:
            title = title[0]
        subtitle = metadata.get('subtitle')
        if subtitle:
            if type(subtitle) == list:
                subtitle = subtitle[0]
            title += ': ' + subtitle
        authors = map(name_lookup_cache.lookup,
                      map(convert_to_name_pair, metadata['author']))
        authors = filter(lambda x: x != None, authors)
        if (not allow_unknown_authors
                and all(not elem.is_known
                        for elem in authors)) or authors == []:
            raise ValueError('No known author')

        def get_affiliation(author_elem):
            # First, look for an ORCID id
            orcid = validate_orcid(author_elem.get('ORCID'))
            if orcid:
                return orcid
            # Otherwise return the plain affiliation, if any
            for dct in author_elem.get('affiliation', []):
                if 'name' in dct:
                    return dct['name']

        affiliations = map(get_affiliation, metadata['author'])
        if extra_affiliations and len(affiliations) == len(extra_affiliations):
            for i in range(len(affiliations)):
                if affiliation_is_greater(extra_affiliations[i],
                                          affiliations[i]):
                    affiliations[i] = extra_affiliations[i]

        paper = BarePaper.create(title, authors, pubdate, 'VISIBLE',
                                 affiliations)

        result = create_publication(paper, metadata)

        if result is None:  # Creating the publication failed!
            paper.update_visibility()
            # Make sure the paper only appears if it is still associated
            # with another source.
            # TODO add unit test for this
        else:
            paper = result[0]

        return paper
Example #5
0
    def save_doi_metadata(self, metadata, extra_affiliations=None, allow_unknown_authors=False):
        """
        Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication

        :param extra_affiliations: an optional affiliations list, which will be unified
            with the affiliations extracted from the metadata. This is useful for the ORCID interface.
        :param allow_unknown_authors: create the paper even if no author matches our researchers
        :returns: the paper, created if needed
        """        
        # Normalize metadata
        if metadata is None or type(metadata) != dict:
            if metadata is not None:
                print "WARNING: Invalid metadata: type is "+str(type(metadata))
                print "The doi proxy is doing something nasty!"
            raise ValueError('Invalid metadata format, expecting a dict')
        if not 'author' in metadata:
            raise ValueError('No author provided')

        if not 'title' in metadata or not metadata['title']:
            raise ValueError('No title')

        # the upstream function ensures that there is a non-empty title
        if not 'DOI' in metadata or not metadata['DOI']:
            raise ValueError("No DOI, skipping")
        doi = to_doi(metadata['DOI'])

        pubdate = get_publication_date(metadata)

        if pubdate is None:
            raise ValueError('No pubdate')
        
        title = metadata['title']
        # CrossRef metadata stores titles in lists
        if type(title) == list:
            title = title[0]
        subtitle = metadata.get('subtitle')
        if subtitle:
            if type(subtitle) == list:
                subtitle = subtitle[0]
            title += ': '+subtitle
        authors = map(name_lookup_cache.lookup, map(convert_to_name_pair, metadata['author']))
        authors = filter(lambda x: x != None, authors)
        if (not allow_unknown_authors and all(not elem.is_known for elem in authors)) or authors == []:
            raise ValueError('No known author')

        def get_affiliation(author_elem):
            # First, look for an ORCID id
            orcid = validate_orcid(author_elem.get('ORCID'))
            if orcid:
                return orcid
            # Otherwise return the plain affiliation, if any
            for dct in author_elem.get('affiliation', []):
                if 'name' in dct:
                    return dct['name']

        affiliations = map(get_affiliation, metadata['author'])
        if extra_affiliations and len(affiliations) == len(extra_affiliations):
            for i in range(len(affiliations)):
                if affiliation_is_greater(extra_affiliations[i],affiliations[i]):
                    affiliations[i] = extra_affiliations[i]

        paper = BarePaper.create(title, authors, pubdate, 
                'VISIBLE', affiliations)

        result = create_publication(paper, metadata)

        if result is None: # Creating the publication failed!
            paper.update_visibility()
            # Make sure the paper only appears if it is still associated
            # with another source.
            # TODO add unit test for this
        else:
            paper = result[0]

        return paper
Example #6
0
    def update_author_names(self, new_author_names, new_affiliations=None):
        """
        Improves the current list of authors by considering a new list of author names.
        Missing authors are added, and names are unified.
        If affiliations are provided, they will replace the old ones if they are
        more informative.

        :param new_author_names: list of Name instances (the order matters)
        :param new_affiliations: (optional) list of affiliation strings for the new author names.
        """

        if new_affiliations is None:
            new_affiliations = [None]*len(new_author_names)
        assert len(new_author_names) == len(new_affiliations)
        if hasattr(self, 'authors'):
            del self.authors
        old_authors = list(self.authors)

        # Invalidate cached properties
        if hasattr(self, 'interesting_authors'):
            del self.interesting_authors

        old_names = map(lambda a: (a.name.first,a.name.last), old_authors)
        unified_names = unify_name_lists(old_names, new_author_names)
        seen_old_names = set()
        for i, (new_name, (idx,new_idx)) in enumerate(unified_names):
            if idx is not None: # Updating the name of an existing author
                seen_old_names.add(idx)
                author = old_authors[idx]
                if new_name is None:
                    # Delete that author, it was pruned because it already
                    # appears elsewhere
                    if author.id is not None:
                        author.delete()
                    continue
                fields = []
                if idx != i:
                    author.position = i
                    fields.append('position')
                if new_name != (author.name.first,author.name.last):
                    name = Name.lookup_name(new_name)
                    name.save()
                    author.name = name
                    fields.append('name')
                if new_idx is not None and affiliation_is_greater(new_affiliations[new_idx], author.affiliation):
                    author.affiliation = new_affiliations[new_idx]
                    fields.append('affiliation')
                    author.update_name_variants_if_needed()
                if fields:
                    author.name.save_if_not_saved()
                    author.save()
            elif new_name is not None: # Creating a new author
                name = Name.lookup_name(new_name)
                name.save()
                author = Author(paper=self,name=name,position=i,affiliation=new_affiliations[new_idx])
                author.save()
        
        # Just in case unify_name_lists pruned authors without telling us…
        for idx, author in enumerate(old_authors):
            if idx not in seen_old_names:
                print("** Deleting author %d **" % author.pk)
                author.delete()

        # Invalidate our local cache
        if hasattr(self, 'authors'):
            del self.authors