Ejemplo n.º 1
0
 def test_parse_bibtex_latex_accents(self):
     """
     See https://github.com/dissemin/dissemin/issues/362
     """
     bibtex = r"""@misc{Nobody06,
 author = "Adri{\`{a}} Mart{\'{\i}}n Mor and Alessandro Beccu",
 title = "My Article",
 year = "2006" }
     """
     print(parse_bibtex(bibtex)['author'][0][0])
     self.assertEqual(
         parse_bibtex(bibtex), {
             'ENTRYTYPE': 'misc',
             'ID': 'Nobody06',
             'author': [('Adrià Martín', 'Mor'), ('Alessandro', 'Beccu')],
             'title': 'My Article',
             'year': '2006'
         })
Ejemplo n.º 2
0
 def authors_from_bibtex(self):
     if self.bibtex is not None:
         try:
             entry = parse_bibtex(self.bibtex)
             if 'author' not in entry or len(entry['author']) == 0:
                 return []
             else:
                 return entry['author']
         except ValueError:
             return []
     else:
         return []
Ejemplo n.º 3
0
 def test_parse_bibtex_latex_accents(self):
     """
     See https://github.com/dissemin/dissemin/issues/362
     """
     bibtex = r"""@misc{Nobody06,
 author = "Adri{\`{a}} Mart{\'{\i}}n Mor and Alessandro Beccu",
 title = "My Article",
 year = "2006" }
     """
     print(parse_bibtex(bibtex)['author'][0][0])
     self.assertEqual(
         parse_bibtex(bibtex),
         {
             'ENTRYTYPE': 'misc',
             'ID': 'Nobody06',
             'author': [
                 ('Adrià Martín', 'Mor'),
                 ('Alessandro', 'Beccu')
             ],
             'title': 'My Article',
             'year': '2006'
         }
     )
Ejemplo n.º 4
0
 def test_parse_bibtex(self):
     bibtex = """@misc{ Nobody06,
 author = "Orti, E. and Bredas, J.L. and Clarisse, C. and others",
 title = "My Article",
 year = "2006" }
     """
     self.assertEqual(
         parse_bibtex(bibtex), {
             'ENTRYTYPE': 'misc',
             'ID': 'Nobody06',
             'author': [('E.', 'Orti'), ('J. L.', 'Bredas'),
                        ('C.', 'Clarisse')],
             'title': 'My Article',
             'year': '2006'
         })
Ejemplo n.º 5
0
    def authors_from_bibtex(self, bibtex):
        if bibtex is not None:
            try:
                entry = parse_bibtex(bibtex)

                if 'author' not in entry or len(entry['author']) == 0:
                    print ("Warning: ORCiD publication with no authors.")
                    print (bibtex)
                    return []
                else:
                    return entry['author']
            except ValueError:
                return []
        else:
            return []
Ejemplo n.º 6
0
 def test_parse_bibtex_unicode_accents(self):
     """
     See https://github.com/dissemin/dissemin/issues/362
     """
     bibtex = """@misc{ Nobody06,
 author = "Adrià Martin Mor",
 title = "My Article",
 year = "2006" }
     """
     self.assertEqual(
         parse_bibtex(bibtex), {
             'ENTRYTYPE': 'misc',
             'ID': 'Nobody06',
             'author': [('Adrià Martin', 'Mor')],
             'title': 'My Article',
             'year': '2006'
         })
Ejemplo n.º 7
0
 def test_parse_bibtex_unicode_accents(self):
     """
     See https://github.com/dissemin/dissemin/issues/362
     """
     bibtex = """@misc{ Nobody06,
 author = "Adrià Martin Mor",
 title = "My Article",
 year = "2006" }
     """
     self.assertEqual(
         parse_bibtex(bibtex),
         {
             'ENTRYTYPE': 'misc',
             'ID': 'Nobody06',
             'author': [('Adrià Martin', 'Mor')],
             'title': 'My Article',
             'year': '2006'
         }
     )
Ejemplo n.º 8
0
 def test_parse_bibtex(self):
     bibtex = """@misc{ Nobody06,
 author = "Orti, E. and Bredas, J.L. and Clarisse, C. and others",
 title = "My Article",
 year = "2006" }
     """
     self.assertEqual(
         parse_bibtex(bibtex),
         {
             'ENTRYTYPE': 'misc',
             'ID': 'Nobody06',
             'author': [
                 ('E.', 'Orti'),
                 ('J. L.', 'Bredas'),
                 ('C.', 'Clarisse')
             ],
             'title': 'My Article',
             'year': '2006'
         }
     )
Ejemplo n.º 9
0
 def test_no_newlines(self):
     bibtex = "@article{DBLP:journals/corr/abs-1804-07832, author= {Antonin Delpeuch and Jamie Vicary}, title= {Normal forms for planar connected string diagrams}, journal= {CoRR}, volume= {abs/1804.07832}, year= {2018}, url= {http://arxiv.org/abs/1804.07832}, archivePrefix= {arXiv}, eprint= {1804.07832}, timestamp= {Wed, 02 May 2018 15:55:01 +0200}, biburl= {https://dblp.org/rec/bib/journals/corr/abs-1804-07832}, bibsource= {dblp computer science bibliography, https://dblp.org}}"
     rec = parse_bibtex(bibtex)
     self.assertEqual(rec['author'], [('Antonin', 'Delpeuch'),
                                      ('Jamie', 'Vicary')])
Ejemplo n.º 10
0
    def fetch_orcid_records(self, id, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        id = validate_orcid(id)
        if id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = [] # list of DOIs to fetch
        papers = [] # list of papers created
        records_found = 0 # how many records did we successfully import from the profile?

        # Fetch publications
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, [])
        for pub in pubs:
            def j(path, default=None):
                return jpath(path, pub, default)

            # DOI
            doi = None
            for extid in j('work-external-identifiers/work-external-identifier', []):
                if extid.get('work-external-identifier-type') == 'DOI':
                    doi = to_doi(jpath('work-external-identifier-id/value', extid))
                    if doi:
                        # If a DOI is available, create the paper using metadata from CrossRef.
                        # We don't do it yet, we only store the DOI, so that we can fetch them
                        # by batch later.
                        dois.append(doi)

            if doi and use_doi:
                continue

            # Extract information from ORCiD

            # Title
            title = j('work-title/title/value')
            if title is None:
                print "Warning: Skipping ORCID publication: no title"
            
            # Type
            doctype = orcid_to_doctype(j('work-type', 'other'))

            # Contributors (ignored for now as they are very often not present)
            def get_contrib(js):
                return {
                     'orcid':jpath('contributor-orcid', js),
                     'name': jpath('credit-name/value', js),
                    }
            contributors = map(get_contrib, j('work-contributors/contributor',[]))

            author_names = filter(lambda x: x is not None, map(
                                  lambda x: x['name'], contributors))
            authors = map(parse_comma_name, author_names)
            pubdate = None
            # ORCiD internal id
            identifier = j('put-code')
            affiliations = map(lambda x: x['orcid'], contributors)
            # Pubdate
            year = parse_int(j('publication-date/year/value'), 1970)
            month = parse_int(j('publication-date/month/value'), 01)
            day = parse_int(j('publication-date/day/value'), 01)
            pubdate = None
            try:
                pubdate = date(year=year, month=01, day=01)
                pubdate = date(year=year, month=month, day=01)
                pubdate = date(year=year, month=month, day=day)
            except ValueError:
                if pubdate is None:
                    print "Invalid publication date in ORCID publication, skipping"
                    continue

            # Citation type: metadata format
            citation_format = j('work-citation/work-citation-type')
            print citation_format
            bibtex = j('work-citation/citation')

            if bibtex is not None:
                try:
                    entry = parse_bibtex(bibtex)

                    if entry.get('author', []) == []:
                        print "Warning: Skipping ORCID publication: no authors."
                        print j('work-citation/citation')
                    if not authors:
                        authors = entry['author']
                except ValueError:
                    pass

            affiliations = affiliate_author_with_orcid(ref_name, id, authors, initial_affiliations=affiliations)

            authors = map(name_lookup_cache.lookup, authors)

            if not authors:
                print "No authors found, skipping"
                continue

            # Create paper:
            paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations)

            record = BareOaiRecord(
                    source=orcid_oai_source,
                    identifier=identifier,
                    splash_url='http://orcid.org/'+id,
                    pubtype=doctype)

            paper.add_oairecord(record)
            yield paper

        if use_doi:
            for metadata in crps.search_for_dois_incrementally('', {'orcid':id}):
                try:
                    paper = crps.save_doi_metadata(metadata)
                    if paper:
                        yield paper
                except ValueError as e:
                    print "Saving CrossRef record from ORCID failed: %s" % unicode(e)

            # Now we add the DOIs found in the ORCID profile.
            doi_metadata = fetch_dois(dois)
            for metadata in doi_metadata:
                try:
                    authors = map(convert_to_name_pair, metadata['author'])
                    affiliations = affiliate_author_with_orcid(ref_name, id, authors)
                    paper = crps.save_doi_metadata(metadata, affiliations)
                    if not paper:
                        continue
                    record = BareOaiRecord(
                            source=orcid_oai_source,
                            identifier='orcid:'+id+':'+metadata['DOI'],
                            splash_url='http://orcid.org/'+id,
                            pubtype=paper.doctype)
                    paper.add_oairecord(record)
                    yield paper
                except (KeyError, ValueError, TypeError):
                    pass
Ejemplo n.º 11
0
    def fetch_orcid_records(self, id, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        id = validate_orcid(id)
        if id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = []  # list of DOIs to fetch
        papers = []  # list of papers created
        records_found = 0  # how many records did we successfully import from the profile?

        # Fetch publications
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work',
                     profile, [])
        for pub in pubs:

            def j(path, default=None):
                return jpath(path, pub, default)

            # DOI
            doi = None
            for extid in j(
                    'work-external-identifiers/work-external-identifier', []):
                if extid.get('work-external-identifier-type') == 'DOI':
                    doi = to_doi(
                        jpath('work-external-identifier-id/value', extid))
                    if doi:
                        # If a DOI is available, create the paper using metadata from CrossRef.
                        # We don't do it yet, we only store the DOI, so that we can fetch them
                        # by batch later.
                        dois.append(doi)

            if doi and use_doi:
                continue

            # Extract information from ORCiD

            # Title
            title = j('work-title/title/value')
            if title is None:
                print "Warning: Skipping ORCID publication: no title"

            # Type
            doctype = orcid_to_doctype(j('work-type', 'other'))

            # Contributors (ignored for now as they are very often not present)
            def get_contrib(js):
                return {
                    'orcid': jpath('contributor-orcid', js),
                    'name': jpath('credit-name/value', js),
                }

            contributors = map(get_contrib,
                               j('work-contributors/contributor', []))

            author_names = filter(lambda x: x is not None,
                                  map(lambda x: x['name'], contributors))
            authors = map(parse_comma_name, author_names)
            pubdate = None
            # ORCiD internal id
            identifier = j('put-code')
            affiliations = map(lambda x: x['orcid'], contributors)
            # Pubdate
            year = parse_int(j('publication-date/year/value'), 1970)
            month = parse_int(j('publication-date/month/value'), 01)
            day = parse_int(j('publication-date/day/value'), 01)
            pubdate = None
            try:
                pubdate = date(year=year, month=01, day=01)
                pubdate = date(year=year, month=month, day=01)
                pubdate = date(year=year, month=month, day=day)
            except ValueError:
                if pubdate is None:
                    print "Invalid publication date in ORCID publication, skipping"
                    continue

            # Citation type: metadata format
            citation_format = j('work-citation/work-citation-type')
            print citation_format
            bibtex = j('work-citation/citation')

            if bibtex is not None:
                try:
                    entry = parse_bibtex(bibtex)

                    if entry.get('author', []) == []:
                        print "Warning: Skipping ORCID publication: no authors."
                        print j('work-citation/citation')
                    if not authors:
                        authors = entry['author']
                except ValueError:
                    pass

            affiliations = affiliate_author_with_orcid(
                ref_name, id, authors, initial_affiliations=affiliations)

            authors = map(name_lookup_cache.lookup, authors)

            if not authors:
                print "No authors found, skipping"
                continue

            # Create paper:
            paper = BarePaper.create(title, authors, pubdate, 'VISIBLE',
                                     affiliations)

            record = BareOaiRecord(source=orcid_oai_source,
                                   identifier=identifier,
                                   splash_url='http://orcid.org/' + id,
                                   pubtype=doctype)

            paper.add_oairecord(record)
            yield paper

        if use_doi:
            for metadata in crps.search_for_dois_incrementally(
                    '', {'orcid': id}):
                try:
                    paper = crps.save_doi_metadata(metadata)
                    if paper:
                        yield paper
                except ValueError as e:
                    print "Saving CrossRef record from ORCID failed: %s" % unicode(
                        e)

            # Now we add the DOIs found in the ORCID profile.
            doi_metadata = fetch_dois(dois)
            for metadata in doi_metadata:
                try:
                    authors = map(convert_to_name_pair, metadata['author'])
                    affiliations = affiliate_author_with_orcid(
                        ref_name, id, authors)
                    paper = crps.save_doi_metadata(metadata, affiliations)
                    if not paper:
                        continue
                    record = BareOaiRecord(source=orcid_oai_source,
                                           identifier='orcid:' + id + ':' +
                                           metadata['DOI'],
                                           splash_url='http://orcid.org/' + id,
                                           pubtype=paper.doctype)
                    paper.add_oairecord(record)
                    yield paper
                except (KeyError, ValueError, TypeError):
                    pass
Ejemplo n.º 12
0
 def test_no_newlines(self):
     bibtex = "@article{DBLP:journals/corr/abs-1804-07832, author= {Antonin Delpeuch and Jamie Vicary}, title= {Normal forms for planar connected string diagrams}, journal= {CoRR}, volume= {abs/1804.07832}, year= {2018}, url= {http://arxiv.org/abs/1804.07832}, archivePrefix= {arXiv}, eprint= {1804.07832}, timestamp= {Wed, 02 May 2018 15:55:01 +0200}, biburl= {https://dblp.org/rec/bib/journals/corr/abs-1804-07832}, bibsource= {dblp computer science bibliography, https://dblp.org}}"
     rec = parse_bibtex(bibtex)
     self.assertEqual(rec['author'], [('Antonin','Delpeuch'), ('Jamie','Vicary')])