Example #1
0
 def setUpClass(self):
     super(StatisticsTest, self).setUpClass()
     self.ccf.clear()
     crps = CrossRefPaperSource(self.ccf)
     oai = OaiPaperSource(self.ccf)
     crps.fetch_and_save(self.r2, incremental=True)
     oai.fetch_and_save(self.r2, incremental=True)
Example #2
0
 def setUpClass(self):
     super(StatisticsTest, self).setUpClass()
     self.ccf.clear()
     crps = CrossRefPaperSource(self.ccf)
     oai = OaiPaperSource(self.ccf)
     crps.fetch_and_save(self.r2, incremental=True)
     oai.fetch_and_save(self.r2, incremental=True)
Example #3
0
 def setUpClass(self):
     super(DepositPagesTest, self).setUpClass()
     crps = CrossRefPaperSource(self.ccf)
     crps.fetch_and_save(self.r3, incremental=True)
Example #4
0
    def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=orcid_id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = [] # list of DOIs to fetch
        ignored_papers = [] # list of ignored papers due to incomplete metadata

        # Fetch publications (1st attempt with ORCiD data)
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, [])
        for pub in pubs:
            data_paper = ORCIDDataPaper.from_orcid_metadata(
                ref_name,
                orcid_id,
                pub,
                stop_if_dois_exists=use_doi
            )

            if data_paper.dois and use_doi: # We want to batch it rather than manually do it.
                dois.extend(data_paper.dois)
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if data_paper.skipped:
                print ('%s is skipped due to incorrect metadata (%s)' % (data_paper, data_paper.skip_reason))

                print ('Trying to reconcile it with local researcher.')
                data_paper = self.reconcile_paper(
                    ref_name,
                    orcid_id,
                    pub,
                    overrides={
                        'authors': [(self.researcher.name.first, self.researcher.name.last)]
                    }
                )
                if data_paper.skipped:
                    ignored_papers.append(data_paper.as_dict())
                    continue

            yield self.create_paper(data_paper)

        # 2nd attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers from CrossRef
            for success, paper_or_metadata in self.fetch_crossref_incrementally(crps, orcid_id):
                if success:
                    yield paper_or_metadata
                else:
                    ignored_papers.append(paper_or_metadata)
                    print ('This metadata (%s) yields no paper.' % (metadata))

            # Let's grab papers with DOIs found in our ORCiD profile.
            # FIXME(RaitoBezarius): if we fail here, we should get back the pub and yield it.
            for success, paper_or_metadata in self.fetch_metadata_from_dois(crps, ref_name, orcid_id, dois):
                if success:
                    yield paper_or_metadata
                else:
                    ignored_papers.append(paper_or_metadata)
                    print ('This metadata (%s) yields no paper.' % (paper_or_metadata))
       
        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            print ('Warning: Total ignored papers: %d' % (len(ignored_papers)))
Example #5
0
    def fetch_orcid_records(self, id, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        id = validate_orcid(id)
        if id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = [] # list of DOIs to fetch
        papers = [] # list of papers created
        records_found = 0 # how many records did we successfully import from the profile?

        # Fetch publications
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, [])
        for pub in pubs:
            def j(path, default=None):
                return jpath(path, pub, default)

            # DOI
            doi = None
            for extid in j('work-external-identifiers/work-external-identifier', []):
                if extid.get('work-external-identifier-type') == 'DOI':
                    doi = to_doi(jpath('work-external-identifier-id/value', extid))
                    if doi:
                        # If a DOI is available, create the paper using metadata from CrossRef.
                        # We don't do it yet, we only store the DOI, so that we can fetch them
                        # by batch later.
                        dois.append(doi)

            if doi and use_doi:
                continue

            # Extract information from ORCiD

            # Title
            title = j('work-title/title/value')
            if title is None:
                print "Warning: Skipping ORCID publication: no title"
            
            # Type
            doctype = orcid_to_doctype(j('work-type', 'other'))

            # Contributors (ignored for now as they are very often not present)
            def get_contrib(js):
                return {
                     'orcid':jpath('contributor-orcid', js),
                     'name': jpath('credit-name/value', js),
                    }
            contributors = map(get_contrib, j('work-contributors/contributor',[]))

            author_names = filter(lambda x: x is not None, map(
                                  lambda x: x['name'], contributors))
            authors = map(parse_comma_name, author_names)
            pubdate = None
            # ORCiD internal id
            identifier = j('put-code')
            affiliations = map(lambda x: x['orcid'], contributors)
            # Pubdate
            year = parse_int(j('publication-date/year/value'), 1970)
            month = parse_int(j('publication-date/month/value'), 01)
            day = parse_int(j('publication-date/day/value'), 01)
            pubdate = None
            try:
                pubdate = date(year=year, month=01, day=01)
                pubdate = date(year=year, month=month, day=01)
                pubdate = date(year=year, month=month, day=day)
            except ValueError:
                if pubdate is None:
                    print "Invalid publication date in ORCID publication, skipping"
                    continue

            # Citation type: metadata format
            citation_format = j('work-citation/work-citation-type')
            print citation_format
            bibtex = j('work-citation/citation')

            if bibtex is not None:
                try:
                    entry = parse_bibtex(bibtex)

                    if entry.get('author', []) == []:
                        print "Warning: Skipping ORCID publication: no authors."
                        print j('work-citation/citation')
                    if not authors:
                        authors = entry['author']
                except ValueError:
                    pass

            affiliations = affiliate_author_with_orcid(ref_name, id, authors, initial_affiliations=affiliations)

            authors = map(name_lookup_cache.lookup, authors)

            if not authors:
                print "No authors found, skipping"
                continue

            # Create paper:
            paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations)

            record = BareOaiRecord(
                    source=orcid_oai_source,
                    identifier=identifier,
                    splash_url='http://orcid.org/'+id,
                    pubtype=doctype)

            paper.add_oairecord(record)
            yield paper

        if use_doi:
            for metadata in crps.search_for_dois_incrementally('', {'orcid':id}):
                try:
                    paper = crps.save_doi_metadata(metadata)
                    if paper:
                        yield paper
                except ValueError as e:
                    print "Saving CrossRef record from ORCID failed: %s" % unicode(e)

            # Now we add the DOIs found in the ORCID profile.
            doi_metadata = fetch_dois(dois)
            for metadata in doi_metadata:
                try:
                    authors = map(convert_to_name_pair, metadata['author'])
                    affiliations = affiliate_author_with_orcid(ref_name, id, authors)
                    paper = crps.save_doi_metadata(metadata, affiliations)
                    if not paper:
                        continue
                    record = BareOaiRecord(
                            source=orcid_oai_source,
                            identifier='orcid:'+id+':'+metadata['DOI'],
                            splash_url='http://orcid.org/'+id,
                            pubtype=paper.doctype)
                    paper.add_oairecord(record)
                    yield paper
                except (KeyError, ValueError, TypeError):
                    pass
Example #6
0
    def fetch_orcid_records(self, id, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        id = validate_orcid(id)
        if id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = []  # list of DOIs to fetch
        papers = []  # list of papers created
        records_found = 0  # how many records did we successfully import from the profile?

        # Fetch publications
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work',
                     profile, [])
        for pub in pubs:

            def j(path, default=None):
                return jpath(path, pub, default)

            # DOI
            doi = None
            for extid in j(
                    'work-external-identifiers/work-external-identifier', []):
                if extid.get('work-external-identifier-type') == 'DOI':
                    doi = to_doi(
                        jpath('work-external-identifier-id/value', extid))
                    if doi:
                        # If a DOI is available, create the paper using metadata from CrossRef.
                        # We don't do it yet, we only store the DOI, so that we can fetch them
                        # by batch later.
                        dois.append(doi)

            if doi and use_doi:
                continue

            # Extract information from ORCiD

            # Title
            title = j('work-title/title/value')
            if title is None:
                print "Warning: Skipping ORCID publication: no title"

            # Type
            doctype = orcid_to_doctype(j('work-type', 'other'))

            # Contributors (ignored for now as they are very often not present)
            def get_contrib(js):
                return {
                    'orcid': jpath('contributor-orcid', js),
                    'name': jpath('credit-name/value', js),
                }

            contributors = map(get_contrib,
                               j('work-contributors/contributor', []))

            author_names = filter(lambda x: x is not None,
                                  map(lambda x: x['name'], contributors))
            authors = map(parse_comma_name, author_names)
            pubdate = None
            # ORCiD internal id
            identifier = j('put-code')
            affiliations = map(lambda x: x['orcid'], contributors)
            # Pubdate
            year = parse_int(j('publication-date/year/value'), 1970)
            month = parse_int(j('publication-date/month/value'), 01)
            day = parse_int(j('publication-date/day/value'), 01)
            pubdate = None
            try:
                pubdate = date(year=year, month=01, day=01)
                pubdate = date(year=year, month=month, day=01)
                pubdate = date(year=year, month=month, day=day)
            except ValueError:
                if pubdate is None:
                    print "Invalid publication date in ORCID publication, skipping"
                    continue

            # Citation type: metadata format
            citation_format = j('work-citation/work-citation-type')
            print citation_format
            bibtex = j('work-citation/citation')

            if bibtex is not None:
                try:
                    entry = parse_bibtex(bibtex)

                    if entry.get('author', []) == []:
                        print "Warning: Skipping ORCID publication: no authors."
                        print j('work-citation/citation')
                    if not authors:
                        authors = entry['author']
                except ValueError:
                    pass

            affiliations = affiliate_author_with_orcid(
                ref_name, id, authors, initial_affiliations=affiliations)

            authors = map(name_lookup_cache.lookup, authors)

            if not authors:
                print "No authors found, skipping"
                continue

            # Create paper:
            paper = BarePaper.create(title, authors, pubdate, 'VISIBLE',
                                     affiliations)

            record = BareOaiRecord(source=orcid_oai_source,
                                   identifier=identifier,
                                   splash_url='http://orcid.org/' + id,
                                   pubtype=doctype)

            paper.add_oairecord(record)
            yield paper

        if use_doi:
            for metadata in crps.search_for_dois_incrementally(
                    '', {'orcid': id}):
                try:
                    paper = crps.save_doi_metadata(metadata)
                    if paper:
                        yield paper
                except ValueError as e:
                    print "Saving CrossRef record from ORCID failed: %s" % unicode(
                        e)

            # Now we add the DOIs found in the ORCID profile.
            doi_metadata = fetch_dois(dois)
            for metadata in doi_metadata:
                try:
                    authors = map(convert_to_name_pair, metadata['author'])
                    affiliations = affiliate_author_with_orcid(
                        ref_name, id, authors)
                    paper = crps.save_doi_metadata(metadata, affiliations)
                    if not paper:
                        continue
                    record = BareOaiRecord(source=orcid_oai_source,
                                           identifier='orcid:' + id + ':' +
                                           metadata['DOI'],
                                           splash_url='http://orcid.org/' + id,
                                           pubtype=paper.doctype)
                    paper.add_oairecord(record)
                    yield paper
                except (KeyError, ValueError, TypeError):
                    pass
Example #7
0
 def setUpClass(self):
     super(DepositPagesTest, self).setUpClass()
     crps = CrossRefPaperSource(self.ccf)
     crps.fetch_and_save(self.r3, incremental=True)