Beispiel #1
0
    def create_paper(self, work):
        assert (not work.skipped)
        # Create paper
        authors, orcids = work.authors_and_orcids
        paper = BarePaper.create(
            work.title,
            authors,
            work.pubdate,
            visible=True,
            affiliations=None,
            orcids=orcids,
        )
        record = BareOaiRecord(source=self.oai_source,
                               identifier=work.api_uri,
                               splash_url=work.splash_url,
                               pubtype=work.pubtype)

        paper.add_oairecord(record)

        try:
            p = Paper.from_bare(paper)
            p = self.associate_researchers(p)
            p.save()
            p.update_index()
        except ValueError:
            p = None

        return p
Beispiel #2
0
    def translate(self, header, metadata):
        """
        Creates a BarePaper
        """
        # We need three things to create a paper:
        # - publication date
        pubdate = self.find_earliest_oai_date(metadata)
        # - authors
        authors = self.get_oai_authors(metadata)

        # - title
        if not metadata.get('title') or not authors or not pubdate:
            #print "no title, authors, or pubdate"
            return

        # Find the OAI source
        source = self.get_source(header, metadata)

        if not source:
            print "Invalid source from the proxy, skipping"
            return

        # Create paper and record
        try:
            paper = BarePaper.create(metadata['title'][0], authors, pubdate)
            self.add_oai_record(header, metadata, source, paper)
            return paper
        except ValueError as e:
            print "Warning, OAI record "+header.identifier()+" skipped:\n"+unicode(e)
            paper.update_availability()
Beispiel #3
0
    def translate(self, header, metadata):
        """
        Creates a BarePaper
        """
        # We need three things to create a paper:
        # - publication date
        pubdate = self.find_earliest_oai_date(metadata)
        # - authors
        authors = self.get_oai_authors(metadata)

        # - title
        if not metadata.get('title') or not authors or not pubdate:
            logger.debug("No title, authors or pubdate")
            return

        # Create paper and record
        try:
            paper = BarePaper.create(metadata['title'][0], authors, pubdate)
            self.add_oai_record(header, metadata, paper)
            return paper
        except ValueError as e:
            logger.warning("OAI record " + header.identifier() + " skipped:\n",
                           e,
                           exc_info=True)
            paper.update_availability()
Beispiel #4
0
    def create_paper(self, data_paper):
        assert (not data_paper.skipped)
        # Create paper
        paper = BarePaper.create(
            data_paper.title,
            data_paper.authors,
            data_paper.pubdate,
            visible=True,
            affiliations=None,
            orcids=data_paper.orcids,
        )
        record = BareOaiRecord(source=orcid_oai_source(),
                               identifier=data_paper.identifier,
                               splash_url=data_paper.splash_url,
                               pubtype=data_paper.doctype)

        paper.add_oairecord(record)

        return paper
Beispiel #5
0
    def create_paper(self, data_paper):
        assert (not data_paper.skipped)
        # Create paper
        paper = BarePaper.create(
            data_paper.title,
            data_paper.authors,
            data_paper.pubdate,
            'VISIBLE',
            data_paper.affiliations
        )
        record = BareOaiRecord(
            source=orcid_oai_source,
            identifier=data_paper.identifier,
            splash_url=data_paper.splash_url,
            pubtype=data_paper.doctype
        )

        paper.add_oairecord(record)

        return paper
Beispiel #6
0
    def test_add_author(self):
        """
        p.add_author adds the author at the right place
        """
        names = [BareName.create('Peter', 'Johnstone'),
                 BareName.create('Xing', 'Li'),
                 BareName.create('John', 'Dubuc')]
        p = BarePaper.create('The title', [names[0]],
                             datetime.date(year=2012, month=1, day=9))

        p.add_author(BareAuthor(name=names[2]))
        self.assertEqual(len(p.authors), 2)

        p.add_author(BareAuthor(name=names[1]), position=1)
        self.assertListEqual(p.author_names(), names)

        self.assertRaises(ValueError, p.add_author,
                          BareAuthor(name=BareName.create(
                              'Cantor', 'Bernstein')),
                          position=8)
Beispiel #7
0
    def create_paper(self, work):
        assert (not work.skipped)
        # Create paper
        authors, orcids = work.authors_and_orcids
        paper = BarePaper.create(
            work.title,
            authors,
            work.pubdate,
            visible=True,
            affiliations=None,
            orcids=orcids,
        )
        record = BareOaiRecord(source=orcid_oai_source(),
                               identifier=work.api_uri,
                               splash_url=work.splash_url,
                               pubtype=work.pubtype)

        paper.add_oairecord(record)

        return paper
Beispiel #8
0
    def create_paper(self, work):
        assert (not work.skipped)
        # Create paper
        authors, orcids = work.authors_and_orcids
        paper = BarePaper.create(
            work.title,
            authors,
            work.pubdate,
            visible=True,
            affiliations=None,
            orcids=orcids,
        )
        record = BareOaiRecord(
            source=self.oai_source,
            identifier=work.api_uri,
            splash_url=work.splash_url,
            pubtype=work.pubtype
        )

        paper.add_oairecord(record)

        return paper
Beispiel #9
0
    def translate(self, header, metadata):
        """
        Creates a BarePaper
        """
        # We need three things to create a paper:
        # - publication date
        pubdate = self.find_earliest_oai_date(metadata)
        # - authors
        authors = self.get_oai_authors(metadata)

        # - title
        if not metadata.get('title') or not authors or not pubdate:
            logger.debug("No title, authors or pubdate")
            return

        # Create paper and record
        try:
            paper = BarePaper.create(metadata['title'][0], authors, pubdate)
            self.add_oai_record(header, metadata, paper)
            return paper
        except ValueError as e:
            logger.warning("OAI record "+header.identifier()+" skipped:\n", e, exc_info=True)
            paper.update_availability()
Beispiel #10
0
    def to_paper(cls, data):
        """
        Call this function to convert citeproc metadata into a paper object
        Our strategy is as follows:
        We collect first all data necessary, if me miss something, then we raise CiteprocError.
        If we have collected everything, we pass that to the corresponding baremodels.
        :param data: citeproc metadata. Note that CrossRef does put its citeproc into a message block
        :returns: Paper object
        :raises: CiteprocError
        """
        if not isinstance(data, dict):
            raise CiteprocError('Invalid metadaformat, expecting dict')
        bare_paper_data = cls._get_paper_data(data)
        bare_oairecord_data = cls._get_oairecord_data(data)

        bare_paper = BarePaper.create(**bare_paper_data)
        bare_oairecord = BareOaiRecord(paper=bare_paper, **bare_oairecord_data)
        bare_paper.add_oairecord(bare_oairecord)
        bare_paper.update_availability()

        paper = Paper.from_bare(bare_paper)
        paper.update_index()
        return paper
Beispiel #11
0
    def test_add_author(self):
        """
        p.add_author adds the author at the right place
        """
        names = [
            BareName.create('Peter', 'Johnstone'),
            BareName.create('Xing', 'Li'),
            BareName.create('John', 'Dubuc')
        ]
        p = BarePaper.create('The title', [names[0]],
                             datetime.date(year=2012, month=1, day=9))

        p.add_author(BareAuthor(name=names[2]))
        self.assertEqual(len(p.authors), 2)

        p.add_author(BareAuthor(name=names[1]), position=1)
        self.assertListEqual(p.author_names(), names)

        self.assertRaises(
            ValueError,
            p.add_author,
            BareAuthor(name=BareName.create('Cantor', 'Bernstein')),
            position=8)
Beispiel #12
0
def api_paper_query(request):
    try:
        fields = json.loads(request.body.decode('utf-8'))
    except (ValueError, UnicodeDecodeError):
        raise BadRequest('Invalid JSON payload')

    doi = fields.get('doi')
    if doi:
        p = None
        try:
            p = Paper.create_by_doi(doi, bare=True)
        except MetadataSourceException:
            pass
        if p is None:
            raise BadRequest('Could not find a paper with this DOI')
        return {'status': 'ok', 'paper': p.json()}

    title = fields.get('title')
    if not isinstance(title, unicode) or not title or len(title) > 512:
        raise BadRequest(
            'Invalid title, has to be a non-empty string shorter than 512 characters'
        )

    date = fields.get('date')
    if not isinstance(date, unicode):
        raise BadRequest('A date is required')
    try:
        date = tolerant_datestamp_to_datetime(date)
    except ValueError as e:
        raise BadRequest(unicode(e))

    authors = fields.get('authors')
    if not isinstance(authors, list):
        raise BadRequest('A list of authors is expected')

    parsed_authors = []
    for a in authors:
        author = None
        if not isinstance(a, dict):
            raise BadRequest('Invalid author')

        if 'first' in a and 'last' in a:
            if not isinstance(a['first'], unicode) or not isinstance(
                    a['last'], unicode) or not a['last']:
                raise BadRequest('Invalid (first,last) name provided')
            else:
                author = (a['first'], a['last'])
        elif 'plain' in a:
            if not isinstance(a['plain'], unicode) or not a['plain']:
                raise BadRequest('Invalid plain name provided')
            else:
                author = parse_comma_name(a['plain'])

        if author is None:
            raise BadRequest('Invalid author')

        parsed_authors.append(BareName.create(author[0], author[1]))

    if not authors:
        raise BadRequest('No authors provided')

    try:
        p = BarePaper.create(title, parsed_authors, date)
    except ValueError:
        raise BadRequest('Invalid paper')

    return {'status': 'ok', 'paper': p.json()}
Beispiel #13
0
    def save_doi_metadata(self, metadata, extra_orcids=None):
        """
        Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication

        :param extra_orcids: an optional orcids list, which will be unified
            with the orcids extracted from the metadata. This is useful for the ORCID interface.
        :returns: the paper, created if needed
        """
        # Normalize metadata
        if metadata is None or not isinstance(metadata, dict):
            raise ValueError('Invalid metadata format, expecting a dict')
        if not metadata.get('author'):
            raise ValueError('No author provided')

        if not metadata.get('title'):
            raise ValueError('No title')

        # the upstream function ensures that there is a non-empty title
        if not to_doi(metadata.get('DOI')):
            raise ValueError("No DOI, skipping")

        pubdate = get_publication_date(metadata)

        if pubdate is None:
            raise ValueError('No pubdate')

        title = metadata['title']
        # CrossRef metadata stores titles in lists
        if isinstance(title, list):
            title = title[0]
        subtitle = metadata.get('subtitle')
        if subtitle:
            if isinstance(subtitle, list):
                subtitle = subtitle[0]
            title += ': '+subtitle

        name_pairs = map(convert_to_name_pair, metadata['author'])
        if None in name_pairs:
            raise ValueError('Invalid author')
        authors = [BareName.create_bare(first, last) for first, last in
                   name_pairs]

        def get_affiliation(author_elem):
            for dct in author_elem.get('affiliation', []):
                if 'name' in dct:
                    return dct['name']

        def get_orcid(author_elem):
            orcid = validate_orcid(author_elem.get('ORCID'))
            if orcid:
                return orcid

        new_orcids = map(get_orcid, metadata['author'])
        if extra_orcids:
            orcids = [new or old for (old, new) in zip(
                extra_orcids, new_orcids)]
        else:
            orcids = new_orcids
        affiliations = map(get_affiliation, metadata['author'])

        paper = BarePaper.create(title, authors, pubdate,
                                 visible=True, affiliations=affiliations, orcids=orcids)

        result = create_publication(paper, metadata)

        if result is None:  # Creating the publication failed!
            # Make sure the paper only appears if it is still associated
            # with another source.
            paper.update_visible()
        else:
            paper = result[0]

        return paper
Beispiel #14
0
    def save_doi_metadata(self, metadata, extra_orcids=None):
        """
        Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication

        :param extra_orcids: an optional orcids list, which will be unified
            with the orcids extracted from the metadata. This is useful for the ORCID interface.
        :returns: the paper, created if needed
        """
        # Normalize metadata
        if metadata is None or not isinstance(metadata, dict):
            raise ValueError('Invalid metadata format, expecting a dict')
        if not metadata.get('author'):
            raise ValueError('No author provided')

        if not metadata.get('title'):
            raise ValueError('No title')

        # the upstream function ensures that there is a non-empty title
        if not to_doi(metadata.get('DOI')):
            raise ValueError("No DOI, skipping")

        pubdate = get_publication_date(metadata)

        if pubdate is None:
            raise ValueError('No pubdate')

        title = metadata['title']
        # CrossRef metadata stores titles in lists
        if isinstance(title, list):
            title = title[0]
        subtitle = metadata.get('subtitle')
        if subtitle:
            if isinstance(subtitle, list):
                subtitle = subtitle[0]
            title += ': '+subtitle

        name_pairs = list(map(convert_to_name_pair, metadata['author']))
        if None in name_pairs:
            raise ValueError('Invalid author')
        authors = [BareName.create_bare(first, last) for first, last in
                   name_pairs]

        def get_affiliation(author_elem):
            for dct in author_elem.get('affiliation', []):
                if 'name' in dct:
                    return dct['name']

        def get_orcid(author_elem):
            orcid = validate_orcid(author_elem.get('ORCID'))
            if orcid:
                return orcid

        new_orcids = list(map(get_orcid, metadata['author']))
        if extra_orcids:
            # remove the extra_orcids if they already exist on different authors
            set_of_extra_orcids = set(x for x in extra_orcids if x != None)
            new_orcids = [(x if x not in set_of_extra_orcids else None)
                    for x in new_orcids]
            # now do the union
            orcids = [new or old for (old, new) in zip(
                extra_orcids, new_orcids)]
        else:
            orcids = new_orcids
        affiliations = list(map(get_affiliation, metadata['author']))

        paper = BarePaper.create(title, authors, pubdate,
                                 visible=True, affiliations=affiliations, orcids=orcids)

        result = create_publication(paper, metadata)

        if result is None:  # Creating the publication failed!
            # Make sure the paper only appears if it is still associated
            # with another source.
            paper.update_visible()
        else:
            paper = result[0]

        return paper
Beispiel #15
0
    def fetch_orcid_records(self, id, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        id = validate_orcid(id)
        if id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = []  # list of DOIs to fetch
        papers = []  # list of papers created
        records_found = 0  # how many records did we successfully import from the profile?

        # Fetch publications
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work',
                     profile, [])
        for pub in pubs:

            def j(path, default=None):
                return jpath(path, pub, default)

            # DOI
            doi = None
            for extid in j(
                    'work-external-identifiers/work-external-identifier', []):
                if extid.get('work-external-identifier-type') == 'DOI':
                    doi = to_doi(
                        jpath('work-external-identifier-id/value', extid))
                    if doi:
                        # If a DOI is available, create the paper using metadata from CrossRef.
                        # We don't do it yet, we only store the DOI, so that we can fetch them
                        # by batch later.
                        dois.append(doi)

            if doi and use_doi:
                continue

            # Extract information from ORCiD

            # Title
            title = j('work-title/title/value')
            if title is None:
                print "Warning: Skipping ORCID publication: no title"

            # Type
            doctype = orcid_to_doctype(j('work-type', 'other'))

            # Contributors (ignored for now as they are very often not present)
            def get_contrib(js):
                return {
                    'orcid': jpath('contributor-orcid', js),
                    'name': jpath('credit-name/value', js),
                }

            contributors = map(get_contrib,
                               j('work-contributors/contributor', []))

            author_names = filter(lambda x: x is not None,
                                  map(lambda x: x['name'], contributors))
            authors = map(parse_comma_name, author_names)
            pubdate = None
            # ORCiD internal id
            identifier = j('put-code')
            affiliations = map(lambda x: x['orcid'], contributors)
            # Pubdate
            year = parse_int(j('publication-date/year/value'), 1970)
            month = parse_int(j('publication-date/month/value'), 01)
            day = parse_int(j('publication-date/day/value'), 01)
            pubdate = None
            try:
                pubdate = date(year=year, month=01, day=01)
                pubdate = date(year=year, month=month, day=01)
                pubdate = date(year=year, month=month, day=day)
            except ValueError:
                if pubdate is None:
                    print "Invalid publication date in ORCID publication, skipping"
                    continue

            # Citation type: metadata format
            citation_format = j('work-citation/work-citation-type')
            print citation_format
            bibtex = j('work-citation/citation')

            if bibtex is not None:
                try:
                    entry = parse_bibtex(bibtex)

                    if entry.get('author', []) == []:
                        print "Warning: Skipping ORCID publication: no authors."
                        print j('work-citation/citation')
                    if not authors:
                        authors = entry['author']
                except ValueError:
                    pass

            affiliations = affiliate_author_with_orcid(
                ref_name, id, authors, initial_affiliations=affiliations)

            authors = map(name_lookup_cache.lookup, authors)

            if not authors:
                print "No authors found, skipping"
                continue

            # Create paper:
            paper = BarePaper.create(title, authors, pubdate, 'VISIBLE',
                                     affiliations)

            record = BareOaiRecord(source=orcid_oai_source,
                                   identifier=identifier,
                                   splash_url='http://orcid.org/' + id,
                                   pubtype=doctype)

            paper.add_oairecord(record)
            yield paper

        if use_doi:
            for metadata in crps.search_for_dois_incrementally(
                    '', {'orcid': id}):
                try:
                    paper = crps.save_doi_metadata(metadata)
                    if paper:
                        yield paper
                except ValueError as e:
                    print "Saving CrossRef record from ORCID failed: %s" % unicode(
                        e)

            # Now we add the DOIs found in the ORCID profile.
            doi_metadata = fetch_dois(dois)
            for metadata in doi_metadata:
                try:
                    authors = map(convert_to_name_pair, metadata['author'])
                    affiliations = affiliate_author_with_orcid(
                        ref_name, id, authors)
                    paper = crps.save_doi_metadata(metadata, affiliations)
                    if not paper:
                        continue
                    record = BareOaiRecord(source=orcid_oai_source,
                                           identifier='orcid:' + id + ':' +
                                           metadata['DOI'],
                                           splash_url='http://orcid.org/' + id,
                                           pubtype=paper.doctype)
                    paper.add_oairecord(record)
                    yield paper
                except (KeyError, ValueError, TypeError):
                    pass
Beispiel #16
0
 def setUp(self):
     self.ist = BarePaper.create('Groundbreaking Results',
                                 [BareName.create('Alfred', 'Kastler'),
                                  BareName.create('John', 'Dubuc')],
                                 datetime.date(year=2015, month=3, day=2))
Beispiel #17
0
def api_paper_query(request):
    try:
        fields = json.loads(request.body.decode('utf-8'))
    except (ValueError, UnicodeDecodeError):
        raise BadRequest('Invalid JSON payload')

    doi = fields.get('doi')
    if doi:
        p = None
        try:
            p = Paper.get_by_doi(doi)
            if not p:
                p = Paper.create_by_doi(doi)
        except MetadataSourceException:
            pass
        if p is None:
            raise BadRequest('Could not find a paper with this DOI')
        return {'status': 'ok', 'paper': p.json()}

    title = fields.get('title')
    if not isinstance(title,  str) or not title or len(title) > 512:
        raise BadRequest(
            'Invalid title, has to be a non-empty string shorter than 512 characters')

    date = fields.get('date')
    if not isinstance(date, str):
        raise BadRequest('A date is required')
    try:
        date = tolerant_datestamp_to_datetime(date)
    except ValueError as e:
        raise BadRequest(str(e))

    authors = fields.get('authors')
    if not isinstance(authors, list):
        raise BadRequest('A list of authors is expected')

    parsed_authors = []
    for a in authors:
        author = None
        if not isinstance(a, dict):
            raise BadRequest('Invalid author')

        if 'first' in a and 'last' in a:
            if not isinstance(a['first'], str) or not isinstance(a['last'], str) or not a['last']:
                raise BadRequest('Invalid (first,last) name provided')
            else:
                author = (a['first'], a['last'])
        elif 'plain' in a:
            if not isinstance(a['plain'], str) or not a['plain']:
                raise BadRequest('Invalid plain name provided')
            else:
                author = parse_comma_name(a['plain'])

        if author is None:
            raise BadRequest('Invalid author')

        parsed_authors.append(BareName.create(author[0], author[1]))

    if not authors:
        raise BadRequest('No authors provided')

    try:
        p = BarePaper.create(title, parsed_authors, date)
    except ValueError:
        raise BadRequest('Invalid paper')

    return {'status': 'ok', 'paper': p.json()}
Beispiel #18
0
    def fetch_orcid_records(self, id, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        id = validate_orcid(id)
        if id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = [] # list of DOIs to fetch
        papers = [] # list of papers created
        records_found = 0 # how many records did we successfully import from the profile?

        # Fetch publications
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, [])
        for pub in pubs:
            def j(path, default=None):
                return jpath(path, pub, default)

            # DOI
            doi = None
            for extid in j('work-external-identifiers/work-external-identifier', []):
                if extid.get('work-external-identifier-type') == 'DOI':
                    doi = to_doi(jpath('work-external-identifier-id/value', extid))
                    if doi:
                        # If a DOI is available, create the paper using metadata from CrossRef.
                        # We don't do it yet, we only store the DOI, so that we can fetch them
                        # by batch later.
                        dois.append(doi)

            if doi and use_doi:
                continue

            # Extract information from ORCiD

            # Title
            title = j('work-title/title/value')
            if title is None:
                print "Warning: Skipping ORCID publication: no title"
            
            # Type
            doctype = orcid_to_doctype(j('work-type', 'other'))

            # Contributors (ignored for now as they are very often not present)
            def get_contrib(js):
                return {
                     'orcid':jpath('contributor-orcid', js),
                     'name': jpath('credit-name/value', js),
                    }
            contributors = map(get_contrib, j('work-contributors/contributor',[]))

            author_names = filter(lambda x: x is not None, map(
                                  lambda x: x['name'], contributors))
            authors = map(parse_comma_name, author_names)
            pubdate = None
            # ORCiD internal id
            identifier = j('put-code')
            affiliations = map(lambda x: x['orcid'], contributors)
            # Pubdate
            year = parse_int(j('publication-date/year/value'), 1970)
            month = parse_int(j('publication-date/month/value'), 01)
            day = parse_int(j('publication-date/day/value'), 01)
            pubdate = None
            try:
                pubdate = date(year=year, month=01, day=01)
                pubdate = date(year=year, month=month, day=01)
                pubdate = date(year=year, month=month, day=day)
            except ValueError:
                if pubdate is None:
                    print "Invalid publication date in ORCID publication, skipping"
                    continue

            # Citation type: metadata format
            citation_format = j('work-citation/work-citation-type')
            print citation_format
            bibtex = j('work-citation/citation')

            if bibtex is not None:
                try:
                    entry = parse_bibtex(bibtex)

                    if entry.get('author', []) == []:
                        print "Warning: Skipping ORCID publication: no authors."
                        print j('work-citation/citation')
                    if not authors:
                        authors = entry['author']
                except ValueError:
                    pass

            affiliations = affiliate_author_with_orcid(ref_name, id, authors, initial_affiliations=affiliations)

            authors = map(name_lookup_cache.lookup, authors)

            if not authors:
                print "No authors found, skipping"
                continue

            # Create paper:
            paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations)

            record = BareOaiRecord(
                    source=orcid_oai_source,
                    identifier=identifier,
                    splash_url='http://orcid.org/'+id,
                    pubtype=doctype)

            paper.add_oairecord(record)
            yield paper

        if use_doi:
            for metadata in crps.search_for_dois_incrementally('', {'orcid':id}):
                try:
                    paper = crps.save_doi_metadata(metadata)
                    if paper:
                        yield paper
                except ValueError as e:
                    print "Saving CrossRef record from ORCID failed: %s" % unicode(e)

            # Now we add the DOIs found in the ORCID profile.
            doi_metadata = fetch_dois(dois)
            for metadata in doi_metadata:
                try:
                    authors = map(convert_to_name_pair, metadata['author'])
                    affiliations = affiliate_author_with_orcid(ref_name, id, authors)
                    paper = crps.save_doi_metadata(metadata, affiliations)
                    if not paper:
                        continue
                    record = BareOaiRecord(
                            source=orcid_oai_source,
                            identifier='orcid:'+id+':'+metadata['DOI'],
                            splash_url='http://orcid.org/'+id,
                            pubtype=paper.doctype)
                    paper.add_oairecord(record)
                    yield paper
                except (KeyError, ValueError, TypeError):
                    pass
Beispiel #19
0
    def process_records(self, listRecords):
        for record in listRecords:
            metadata = record[1]._map
            authors = get_oai_authors(metadata)

            # Filter the record
            if all(not elem.is_known for elem in authors):
                print "No relevant author, continue"
                continue
            if not 'title' in metadata or metadata['title'] == []:
                continue

            # Find the source
            sets = record[0].setSpec()
            source_identifier = None
            for s in sets:
                if s.startswith(PROXY_SOURCE_PREFIX):
                    source_identifier = s[len(PROXY_SOURCE_PREFIX):]
                    break
            source = None
            if source_identifier:
                try:
                    source = OaiSource.objects.get(
                        identifier=source_identifier)
                except OaiSource.DoesNotExist:
                    pass
            if not source:
                print "Invalid source '" + str(
                    source_identifier) + "' from the proxy, skipping"
                continue

            # Find the DOI, if any
            doi = None
            for identifier in metadata['identifier'] + metadata['relation']:
                if not doi:
                    doi = to_doi(identifier)

            # A publication date is necessary
            pubdate = find_earliest_oai_date(record)
            if not pubdate:
                print "No publication date, skipping"
                continue

            print 'Saving record %s' % record[0].identifier()
            paper = BarePaper.create(metadata['title'][0], authors, pubdate)

            if doi:
                try:
                    metadata = crossref.fetch_metadata_by_DOI(doi)
                    crossref.create_publication(paper, metadata)
                except MetadataSourceException as e:
                    print(
                        "Warning, metadata source exception while fetching DOI "
                        + doi + ":\n" + unicode(e))
                    pass

            if paper is None:
                print "Paper creation failed, skipping"
                continue

            # Save the record
            # TODO: we should check record validity *BEFORE* creating the paper
            try:
                add_oai_record(record, source, paper)
                yield paper
            except ValueError as e:
                print "Warning, OAI record " + record[0].identifier(
                ) + " skipped:\n" + unicode(e)
                paper.update_availability()
Beispiel #20
0
    def save_doi_metadata(self, metadata, extra_affiliations=None, allow_unknown_authors=False):
        """
        Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication

        :param extra_affiliations: an optional affiliations list, which will be unified
            with the affiliations extracted from the metadata. This is useful for the ORCID interface.
        :param allow_unknown_authors: create the paper even if no author matches our researchers
        :returns: the paper, created if needed
        """        
        # Normalize metadata
        if metadata is None or type(metadata) != dict:
            if metadata is not None:
                print "WARNING: Invalid metadata: type is "+str(type(metadata))
                print "The doi proxy is doing something nasty!"
            raise ValueError('Invalid metadata format, expecting a dict')
        if not 'author' in metadata:
            raise ValueError('No author provided')

        if not 'title' in metadata or not metadata['title']:
            raise ValueError('No title')

        # the upstream function ensures that there is a non-empty title
        if not 'DOI' in metadata or not metadata['DOI']:
            raise ValueError("No DOI, skipping")
        doi = to_doi(metadata['DOI'])

        pubdate = get_publication_date(metadata)

        if pubdate is None:
            raise ValueError('No pubdate')
        
        title = metadata['title']
        # CrossRef metadata stores titles in lists
        if type(title) == list:
            title = title[0]
        subtitle = metadata.get('subtitle')
        if subtitle:
            if type(subtitle) == list:
                subtitle = subtitle[0]
            title += ': '+subtitle
        authors = map(name_lookup_cache.lookup, map(convert_to_name_pair, metadata['author']))
        authors = filter(lambda x: x != None, authors)
        if (not allow_unknown_authors and all(not elem.is_known for elem in authors)) or authors == []:
            raise ValueError('No known author')

        def get_affiliation(author_elem):
            # First, look for an ORCID id
            orcid = validate_orcid(author_elem.get('ORCID'))
            if orcid:
                return orcid
            # Otherwise return the plain affiliation, if any
            for dct in author_elem.get('affiliation', []):
                if 'name' in dct:
                    return dct['name']

        affiliations = map(get_affiliation, metadata['author'])
        if extra_affiliations and len(affiliations) == len(extra_affiliations):
            for i in range(len(affiliations)):
                if affiliation_is_greater(extra_affiliations[i],affiliations[i]):
                    affiliations[i] = extra_affiliations[i]

        paper = BarePaper.create(title, authors, pubdate, 
                'VISIBLE', affiliations)

        result = create_publication(paper, metadata)

        if result is None: # Creating the publication failed!
            paper.update_visibility()
            # Make sure the paper only appears if it is still associated
            # with another source.
            # TODO add unit test for this
        else:
            paper = result[0]

        return paper
Beispiel #21
0
def api_paper_query(request):
    try:
        fields = json.loads(request.body.decode('utf-8'))
    except (ValueError, UnicodeDecodeError):
        raise BadRequest('Invalid JSON payload')

    doi = fields.get('doi')
    if doi:
        p = None
        try:
            p = Paper.get_by_doi(doi)
            if not p:
                p = Paper.create_by_doi(doi)
        except MetadataSourceException:
            pass
        if p is None:
            raise BadRequest('Could not find a paper with this DOI')
        return {'status': 'ok', 'paper': p.json()}

    title = fields.get('title')
    if not isinstance(title, str) or not title or len(title) > 512:
        raise BadRequest(
            'Invalid title, has to be a non-empty string shorter than 512 characters'
        )

    date = fields.get('date')
    if not isinstance(date, str):
        raise BadRequest('A date is required')
    try:
        date = tolerant_datestamp_to_datetime(date)
    except ValueError as e:
        raise BadRequest(str(e))

    authors = fields.get('authors')
    if not isinstance(authors, list):
        raise BadRequest('A list of authors is expected')

    parsed_authors = []
    for a in authors:
        author = None
        if not isinstance(a, dict):
            raise BadRequest('Invalid author')

        if 'first' in a and 'last' in a:
            if not isinstance(a['first'], str) or not isinstance(
                    a['last'], str) or not a['last']:
                raise BadRequest('Invalid (first,last) name provided')
            else:
                author = (a['first'], a['last'])
        elif 'plain' in a:
            if not isinstance(a['plain'], str) or not a['plain']:
                raise BadRequest('Invalid plain name provided')
            else:
                author = parse_comma_name(a['plain'])

        if author is None:
            raise BadRequest('Invalid author')

        parsed_authors.append(BareName.create(author[0], author[1]))

    if not authors:
        raise BadRequest('No authors provided')

    try:
        # Validate the metadata against our data model,
        # and compute the fingerprint to look up the paper in the DB.
        # This does NOT create a paper in the database - we do not want
        # to create papers for every search query we get!
        p = BarePaper.create(title, parsed_authors, date)
    except ValueError as e:
        raise BadRequest('Invalid paper: {}'.format(e))

    try:
        model_paper = Paper.objects.get(fingerprint=p.fingerprint)
        return {'status': 'ok', 'paper': model_paper.json()}
    except Paper.DoesNotExist:
        return {'status': 'not found'}, 404
Beispiel #22
0
    def save_doi_metadata(self,
                          metadata,
                          extra_affiliations=None,
                          allow_unknown_authors=False):
        """
        Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication

        :param extra_affiliations: an optional affiliations list, which will be unified
            with the affiliations extracted from the metadata. This is useful for the ORCID interface.
        :param allow_unknown_authors: create the paper even if no author matches our researchers
        :returns: the paper, created if needed
        """
        # Normalize metadata
        if metadata is None or type(metadata) != dict:
            if metadata is not None:
                print "WARNING: Invalid metadata: type is " + str(
                    type(metadata))
                print "The doi proxy is doing something nasty!"
            raise ValueError('Invalid metadata format, expecting a dict')
        if not 'author' in metadata:
            raise ValueError('No author provided')

        if not 'title' in metadata or not metadata['title']:
            raise ValueError('No title')

        # the upstream function ensures that there is a non-empty title
        if not 'DOI' in metadata or not metadata['DOI']:
            raise ValueError("No DOI, skipping")
        doi = to_doi(metadata['DOI'])

        pubdate = get_publication_date(metadata)

        if pubdate is None:
            raise ValueError('No pubdate')

        title = metadata['title']
        # CrossRef metadata stores titles in lists
        if type(title) == list:
            title = title[0]
        subtitle = metadata.get('subtitle')
        if subtitle:
            if type(subtitle) == list:
                subtitle = subtitle[0]
            title += ': ' + subtitle
        authors = map(name_lookup_cache.lookup,
                      map(convert_to_name_pair, metadata['author']))
        authors = filter(lambda x: x != None, authors)
        if (not allow_unknown_authors
                and all(not elem.is_known
                        for elem in authors)) or authors == []:
            raise ValueError('No known author')

        def get_affiliation(author_elem):
            # First, look for an ORCID id
            orcid = validate_orcid(author_elem.get('ORCID'))
            if orcid:
                return orcid
            # Otherwise return the plain affiliation, if any
            for dct in author_elem.get('affiliation', []):
                if 'name' in dct:
                    return dct['name']

        affiliations = map(get_affiliation, metadata['author'])
        if extra_affiliations and len(affiliations) == len(extra_affiliations):
            for i in range(len(affiliations)):
                if affiliation_is_greater(extra_affiliations[i],
                                          affiliations[i]):
                    affiliations[i] = extra_affiliations[i]

        paper = BarePaper.create(title, authors, pubdate, 'VISIBLE',
                                 affiliations)

        result = create_publication(paper, metadata)

        if result is None:  # Creating the publication failed!
            paper.update_visibility()
            # Make sure the paper only appears if it is still associated
            # with another source.
            # TODO add unit test for this
        else:
            paper = result[0]

        return paper
Beispiel #23
0
 def setUp(self):
     self.ist = BarePaper.create('Groundbreaking Results', [
         BareName.create('Alfred', 'Kastler'),
         BareName.create('John', 'Dubuc')
     ], datetime.date(year=2015, month=3, day=2))
Beispiel #24
0
def api_paper_query(request):
    try:
        fields = json.loads(request.body.decode('utf-8'))
    except (ValueError, UnicodeDecodeError):
        raise BadRequest('Invalid JSON payload')

    doi = fields.get('doi')
    if doi:
        p = None
        try:
            p = Paper.get_by_doi(doi)
            if not p:
                p = Paper.create_by_doi(doi)
        except MetadataSourceException:
            pass
        if p is None:
            raise BadRequest('Could not find a paper with this DOI')
        return {'status': 'ok', 'paper': p.json()}

    title = fields.get('title')
    if not isinstance(title,  str) or not title or len(title) > 512:
        raise BadRequest(
            'Invalid title, has to be a non-empty string shorter than 512 characters')

    date = fields.get('date')
    if not isinstance(date, str):
        raise BadRequest('A date is required')
    try:
        date = tolerant_datestamp_to_datetime(date)
    except ValueError as e:
        raise BadRequest(str(e))

    authors = fields.get('authors')
    if not isinstance(authors, list):
        raise BadRequest('A list of authors is expected')

    parsed_authors = []
    for a in authors:
        author = None
        if not isinstance(a, dict):
            raise BadRequest('Invalid author')

        if 'first' in a and 'last' in a:
            if not isinstance(a['first'], str) or not isinstance(a['last'], str) or not a['last']:
                raise BadRequest('Invalid (first,last) name provided')
            else:
                author = (a['first'], a['last'])
        elif 'plain' in a:
            if not isinstance(a['plain'], str) or not a['plain']:
                raise BadRequest('Invalid plain name provided')
            else:
                author = parse_comma_name(a['plain'])

        if author is None:
            raise BadRequest('Invalid author')

        parsed_authors.append(BareName.create(author[0], author[1]))

    if not authors:
        raise BadRequest('No authors provided')

    try:
        # Validate the metadata against our data model,
        # and compute the fingerprint to look up the paper in the DB.
        # This does NOT create a paper in the database - we do not want
        # to create papers for every search query we get!
        p = BarePaper.create(title, parsed_authors, date)
    except ValueError as e:
        raise BadRequest('Invalid paper: {}'.format(e))

    try:
        model_paper = Paper.objects.get(fingerprint=p.fingerprint)
        return {'status': 'ok', 'paper': model_paper.json()}
    except Paper.DoesNotExist:
        return {'status': 'not found'}, 404
Beispiel #25
0
    def process_records(self, listRecords):
        for record in listRecords:
            metadata = record[1]._map
            authors = get_oai_authors(metadata)

            # Filter the record
            if all(not elem.is_known for elem in authors):
                print "No relevant author, continue"
                continue
            if not 'title' in metadata or metadata['title'] == []:
                continue

            # Find the source
            sets = record[0].setSpec()
            source_identifier = None
            for s in sets:
                if s.startswith(PROXY_SOURCE_PREFIX):
                    source_identifier = s[len(PROXY_SOURCE_PREFIX):]
                    break
            source = None
            if source_identifier:
                try:
                    source = OaiSource.objects.get(identifier=source_identifier)
                except OaiSource.DoesNotExist:
                    pass
            if not source:
                print "Invalid source '"+str(source_identifier)+"' from the proxy, skipping"
                continue

            # Find the DOI, if any
            doi = None
            for identifier in metadata['identifier']+metadata['relation']:
                if not doi:
                    doi = to_doi(identifier)

            # A publication date is necessary
            pubdate = find_earliest_oai_date(record)
            if not pubdate:
                print "No publication date, skipping"
                continue

            print 'Saving record %s' % record[0].identifier()
            paper = BarePaper.create(metadata['title'][0], authors, pubdate)

            if doi:
                try:
                    metadata = crossref.fetch_metadata_by_DOI(doi)
                    crossref.create_publication(paper, metadata)
                except MetadataSourceException as e:
                    print("Warning, metadata source exception while fetching DOI "+doi+":\n"+unicode(e))
                    pass


            if paper is None:
                print "Paper creation failed, skipping"
                continue

            # Save the record
            # TODO: we should check record validity *BEFORE* creating the paper
            try:
                add_oai_record(record, source, paper)
                yield paper
            except ValueError as e:
                print "Warning, OAI record "+record[0].identifier()+" skipped:\n"+unicode(e)
                paper.update_availability()