Beispiel #1
2
 def test_checksum(self):
     self.assertEqual(validate_orcid('0000-0002-8612-8827'),
                      '0000-0002-8612-8827')
     self.assertEqual(validate_orcid('0000-0002-8612-8828'), None)
     self.assertEqual(validate_orcid('0000-0001-5892-743X'),
                      '0000-0001-5892-743X')
     self.assertEqual(validate_orcid('0000-0001-5892-7431'), None)
Beispiel #2
0
    def get(self, request, *args, **kwargs):
        if 'researcher' in kwargs:
            researcher = get_object_or_404(Researcher, pk=kwargs['researcher'])
        elif 'orcid' in kwargs:
            try:
                researcher = Researcher.objects.get(orcid=kwargs['orcid'])
            except Researcher.DoesNotExist:
                try:
                    orcid = validate_orcid(kwargs['orcid'])
                    researcher = Researcher.get_or_create_by_orcid(orcid)
                    if not researcher:
                        raise Http404(_("Invalid ORCID profile. Please make sure it includes a public name."))
                    researcher.init_from_orcid()
                except MetadataSourceException:
                    raise Http404(_('Invalid ORCID profile.'))

        if not researcher.visible:
            name = researcher.name.full
            return HttpResponsePermanentRedirect(reverse('search')+'?'+urlencode({'authors':name}))

        if kwargs.get('slug') != researcher.slug:
            view_args = {'researcher': researcher.id, 'slug': researcher.slug}
            url = reverse('researcher', kwargs=view_args)
            self.url = self.url_with_query_string(url=url)
            return HttpResponsePermanentRedirect(self.url)

        self.queryset = self.queryset.filter(researchers=researcher.id)
        self.researcher = researcher
        return super(ResearcherView, self).get(request, *args, **kwargs)
Beispiel #3
0
def fetch_on_orcid_login(sender, **kwargs):
    account = kwargs['sociallogin'].account

    # Only prefetch if the social login refers to a valid ORCID account
    orcid = validate_orcid(account.uid)
    if not orcid:
        raise ImmediateHttpResponse(
            render(kwargs['request'], 'dissemin/error.html', {'message':_('Invalid ORCID identifier.')})
        )

    profile = None # disabled account.extra_data because of API version mismatches
    user = None
    if '_user_cache' in account.__dict__:
        user = account.user
    r = Researcher.get_or_create_by_orcid(orcid, profile, user)

    if not r: # invalid ORCID profile (e.g. no name provided)
        raise ImmediateHttpResponse(
            render(kwargs['request'], 'dissemin/error.html', {'message':
            _('Dissemin requires access to your ORCID name, which is marked as private in your ORCID profile.')})
        )

    if r.user_id is None and user is not None:
        r.user = user
        r.save(update_fields=['user'])
    if r.empty_orcid_profile is None:
        r.init_from_orcid()
    else:
        r.fetch_everything_if_outdated()
Beispiel #4
0
    def get(self, request, *args, **kwargs):
        if 'researcher' in kwargs:
            researcher = get_object_or_404(Researcher, pk=kwargs['researcher'])
        elif 'orcid' in kwargs:
            try:
                researcher = Researcher.objects.get(orcid=kwargs['orcid'])
            except Researcher.DoesNotExist:
                try:
                    orcid = validate_orcid(kwargs['orcid'])
                    researcher = Researcher.get_or_create_by_orcid(orcid)
                    if not researcher:
                        raise Http404(
                            _("Invalid ORCID profile. Please make sure it includes a public name."
                              ))
                    researcher.init_from_orcid()
                except MetadataSourceException:
                    raise Http404(_('Invalid ORCID profile.'))

        if not researcher.visible:
            name = researcher.name.full
            return HttpResponsePermanentRedirect(
                reverse('search') + '?' + urlencode({'authors': name}))

        if kwargs.get('slug') != researcher.slug:
            view_args = {'researcher': researcher.id, 'slug': researcher.slug}
            url = reverse('researcher', kwargs=view_args)
            self.url = self.url_with_query_string(url=url)
            return HttpResponsePermanentRedirect(self.url)

        self.queryset = self.queryset.filter(researchers=researcher.id)
        self.researcher = researcher
        return super(ResearcherView, self).get(request, *args, **kwargs)
Beispiel #5
0
def fetch_on_orcid_login(sender, sociallogin, **kwargs):
    """
    Here we prepare some things, i.e. create a Researcher and require that the name on the orcid profile is public
    """
    account = sociallogin.account

    # Only prefetch if the social login refers to a valid ORCID account
    orcid = validate_orcid(account.uid)
    if not orcid:
        raise ImmediateHttpResponse(
            render(kwargs['request'], 'dissemin/error.html',
                   {'message': _('Invalid ORCID identifier.')}))

    profile = None  # disabled account.extra_data because of API version mismatches
    user = None
    if '_user_cache' in account.__dict__:
        user = account.user
    r = Researcher.get_or_create_by_orcid(orcid, profile, user)

    if not r:  # invalid ORCID profile (e.g. no name provided)
        raise ImmediateHttpResponse(
            render(
                kwargs['request'], 'dissemin/error.html', {
                    'message':
                    _('Dissemin requires access to your ORCID name, '
                      'which is marked as private in your ORCID profile.')
                }))
Beispiel #6
0
 def _get_orcid(author_elem):
     """
     Return a validated orcid or None
     :param author_elem: author as in citeproc
     :returns: orcid or None
     """
     return validate_orcid(author_elem.get('ORCID'))
Beispiel #7
0
 def to_python(self, value):
     if not value:
         return
     cleaned_value = validate_orcid(value)
     if cleaned_value is None:
         raise forms.ValidationError(
             _('Invalid ORCID identifier.'), code='invalid')
     return cleaned_value
Beispiel #8
0
 def to_python(self, val):
     if not val:
         return
     cleaned_val = validate_orcid(val)
     if cleaned_val is None:
         raise forms.ValidationError(_('Invalid ORCID identifier.'),
                                     code='invalid')
     return cleaned_val
Beispiel #9
0
 def get_affiliation(author_elem):
     # First, look for an ORCID id
     orcid = validate_orcid(author_elem.get('ORCID'))
     if orcid:
         return orcid
     # Otherwise return the plain affiliation, if any
     for dct in author_elem.get('affiliation', []):
         if 'name' in dct:
             return dct['name']
Beispiel #10
0
 def get_affiliation(author_elem):
     # First, look for an ORCID id
     orcid = validate_orcid(author_elem.get('ORCID'))
     if orcid:
         return orcid
     # Otherwise return the plain affiliation, if any
     for dct in author_elem.get('affiliation', []):
         if 'name' in dct:
             return dct['name']
Beispiel #11
0
    def create(cls, title, author_names, pubdate, visible=True,
               affiliations=None, orcids=None):
        """
        Creates a (bare) paper. To save it to the database, we
        need to run the clustering algorithm to resolve Researchers for the authors,
        using `from_bare` from the (non-bare) :class:`Paper` subclass..

        :param title: The title of the paper (as a string). If it is too long for the database,
                      ValueError is raised.
        :param author_names: The ordered list of author names, as Name objects.
        :param pubdate: The publication date, as a python date object
        :param visible: The visibility of the paper if it is created. If another paper
                    exists, the visibility will be set to the maximum of the two possible
                    visibilities.
        :param affiliations: A list of (possibly None) affiliations for the authors. It has to
                    have the same length as the list of author names.
        :param orcids: same as affiliations, but for ORCID ids.
        """
        if not title or not author_names or not pubdate:
            raise ValueError(
                "A title, pubdate and authors have to be provided to create a paper.")

        if affiliations is not None and len(author_names) != len(affiliations):
            raise ValueError(
                "The number of affiliations and authors have to be equal.")
        if orcids is not None and len(author_names) != len(orcids):
            raise ValueError(
                "The number of ORCIDs (or Nones) and authors have to be equal.")
        if not isinstance(visible, bool):
            raise ValueError("Invalid paper visibility: %s" % str(visible))

        title = sanitize_html(title)
        title = maybe_recapitalize_title(title)

        p = cls()
        p.title = title
        p.pubdate = pubdate  # pubdate will be checked in fingerprint computation
        p.visible = visible
        for idx, n in enumerate(author_names):
            a = BareAuthor()
            a.name = n
            if affiliations is not None:
                a.affiliation = affiliations[idx]
            if orcids is not None:
                orcid = validate_orcid(orcids[idx])
                if orcid:
                    a.orcid = orcid
            p.add_author(a, position=idx)

        p.fingerprint = p.new_fingerprint()

        return p
Beispiel #12
0
    def create(cls, title, author_names, pubdate, visible=True,
               affiliations=None, orcids=None):
        """
        Creates a (bare) paper. To save it to the database, we
        need to run the clustering algorithm to resolve Researchers for the authors,
        using `from_bare` from the (non-bare) :class:`Paper` subclass..

        :param title: The title of the paper (as a string). If it is too long for the database,
                      ValueError is raised.
        :param author_names: The ordered list of author names, as Name objects.
        :param pubdate: The publication date, as a python date object
        :param visible: The visibility of the paper if it is created. If another paper
                    exists, the visibility will be set to the maximum of the two possible
                    visibilities.
        :param affiliations: A list of (possibly None) affiliations for the authors. It has to
                    have the same length as the list of author names.
        :param orcids: same as affiliations, but for ORCID ids.
        """
        if not title or not author_names or not pubdate:
            raise ValueError(
                "A title, pubdate and authors have to be provided to create a paper.")

        if affiliations is not None and len(author_names) != len(affiliations):
            raise ValueError(
                "The number of affiliations and authors have to be equal.")
        if orcids is not None and len(author_names) != len(orcids):
            raise ValueError(
                "The number of ORCIDs (or Nones) and authors have to be equal.")
        if not isinstance(visible, bool):
            raise ValueError("Invalid paper visibility: %s" % str(visible))

        title = sanitize_html(title)
        title = maybe_recapitalize_title(title)

        p = cls()
        p.title = title
        p.pubdate = pubdate  # pubdate will be checked in fingerprint computation
        p.visible = visible
        for idx, n in enumerate(author_names):
            a = BareAuthor()
            a.name = n
            if affiliations is not None:
                a.affiliation = affiliations[idx]
            if orcids is not None:
                orcid = validate_orcid(orcids[idx])
                if orcid:
                    a.orcid = orcid
            p.add_author(a, position=idx)

        p.fingerprint = p.new_fingerprint()

        return p
def populate_authors(apps, schema_editor):
    Paper = apps.get_model('papers', 'Paper')

    for p in Paper.objects.all():
        authors = sorted(p.author_set.all().prefetch_related('name'),
                         key=lambda r: r.position)
        authors_list = [a.serialize() for a in authors]
        for idx, a in enumerate(authors_list):
            orcid = validate_orcid(a['affiliation'])
            if orcid:
                authors_list[idx]['orcid'] = orcid
                authors_list[idx]['affiliation'] = None
        p.authors_list = authors_list
        p.save(update_fields=['authors_list'])
Beispiel #14
0
def populate_authors(apps, schema_editor):
    Paper = apps.get_model('papers', 'Paper')

    for p in Paper.objects.all():
        authors = sorted(p.author_set.all().prefetch_related('name'),
                        key=lambda r: r.position)
        authors_list = [a.serialize() for a in authors]
        for idx, a in enumerate(authors_list):
            orcid = validate_orcid(a['affiliation'])
            if orcid:
                authors_list[idx]['orcid'] = orcid
                authors_list[idx]['affiliation'] = None
        p.authors_list = authors_list
        p.save(update_fields=['authors_list'])
Beispiel #15
0
    def create_by_name(cls, first, last, **kwargs):
        """
        Creates a :class:`Researcher` with the given name.
        If an ORCID is provided, and a researcher with this ORCID already exists,
        this researcher will be returned. In any other case, a new researcher will be created.
        """
        name, created = Name.get_or_create(first, last)

        if kwargs.get('orcid') is not None:
            orcid = validate_orcid(kwargs['orcid'])
            if kwargs['orcid'] is None:
                raise ValueError('Invalid ORCiD: "%s"' % orcid)
            researcher, created = Researcher.objects.get_or_create(name=name, orcid=orcid, defaults=kwargs)
        else:
            args = kwargs.copy()
            args['name'] = name
            researcher = Researcher.objects.create(**args)
            created = True

        if created:
            researcher.update_variants()
            researcher.update_stats()
        return researcher
Beispiel #16
0
    def create_by_name(cls, first, last, **kwargs):
        """
        Creates a :class:`Researcher` with the given name.
        If an ORCID is provided, and a researcher with this ORCID already exists,
        this researcher will be returned. In any other case, a new researcher will be created.
        """
        name, created = Name.get_or_create(first, last)

        if kwargs.get('orcid') is not None:
            orcid = validate_orcid(kwargs['orcid'])
            if kwargs['orcid'] is None:
                raise ValueError('Invalid ORCiD: "%s"' % orcid)
            researcher, created = Researcher.objects.get_or_create(
                name=name, orcid=orcid, defaults=kwargs)
        else:
            args = kwargs.copy()
            args['name'] = name
            researcher = Researcher.objects.create(**args)
            created = True

        if created:
            researcher.update_variants()
            researcher.update_stats()
        return researcher
Beispiel #17
0
def fetch_on_orcid_login(sender, **kwargs):
    account = kwargs['sociallogin'].account

    # Only prefetch if the social login refers to a valid ORCID account
    orcid = validate_orcid(account.uid)
    if not orcid:
        return

    profile = account.extra_data
    user = None
    if '_user_cache' in account.__dict__:
        user = account.user
    r = Researcher.get_or_create_by_orcid(orcid, profile, user)

    if not r:  # invalid ORCID profile (e.g. no name provided)
        return

    if r.user_id is None and user is not None:
        r.user = user
        r.save(update_fields=['user'])
    if r.empty_orcid_profile is None:
        r.init_from_orcid()
    else:
        r.fetch_everything_if_outdated()
Beispiel #18
0
    def search(self):
        self.queryset = self.searchqueryset.models(Paper)

        q = remove_diacritics(self.cleaned_data['q'])
        if q:
            self.queryset = self.queryset.auto_query(q)

        visible = self.cleaned_data['visible']
        if visible == '':
            self.filter(visible=True)
        elif visible == 'invisible':
            self.filter(visible=False)

        self.form_filter('availability', 'availability')
        self.form_filter('oa_status__in', 'oa_status')
        self.form_filter('pubdate__gte', 'pub_after')
        self.form_filter('pubdate__lte', 'pub_before')
        self.form_filter('doctype__in', 'doctypes')

        # Filter by authors.
        # authors field: a comma separated list of full/last names.
        # Items with no whitespace of prefixed with 'last:' are considered as
        # last names; others are full names.
        for name in self.cleaned_data['authors'].split(','):
            name = name.strip()

            # If part of this author name matches ORCID identifiers, consider
            # these as orcid ids and do the filtering
            orcid_ids = [x for x in name.split(' ') if validate_orcid(x)]
            for orcid_id in orcid_ids:
                try:
                    researcher = Researcher.objects.get(orcid=orcid_id)
                    self.filter(researchers=researcher.id)
                except Researcher.DoesNotExist:
                    pass
                continue
            # Rebuild a full name excluding the ORCID id terms
            name = ' '.join([x for x in name.split(' ') if x not in orcid_ids])

            name = remove_diacritics(name.strip())

            if name.startswith('last:'):
                is_lastname = True
                name = name[5:].strip()
            else:
                is_lastname = ' ' not in name

            if not name:
                continue

            if is_lastname:
                self.filter(authors_last=name)
            else:
                reversed_name = ' '.join(reversed(name.split(' ')))
                sq = SQ()
                sq.add(SQ(authors_full=Sloppy(name, slop=1)), SQ.OR)
                sq.add(SQ(authors_full=Sloppy(reversed_name, slop=1)), SQ.OR)
                self.queryset = self.queryset.filter(sq)

        self.queryset = aggregate_combined_status(self.queryset)

        status = self.cleaned_data['status']
        if status:
            self.queryset = self.queryset.post_filter(
                combined_status__in=status)

        # Default ordering by decreasing publication date
        order = self.cleaned_data['sort_by'] or '-pubdate'
        self.queryset = self.queryset.order_by(order).load_all()

        return self.queryset
Beispiel #19
0
    def fetch_orcid_records(self,
                            orcid_identifier,
                            profile=None,
                            use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                                                            profile.json,
                                                            update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        ignored_papers = [
        ]  # list of ignored papers due to incomplete metadata

        # Get summary publications and separate them in two classes:
        # - the ones with DOIs, that we will fetch with CrossRef
        dois_and_putcodes = []  # list of (DOIs,putcode) to fetch
        # - the ones without: we will fetch ORCID's metadata about them
        #   and try to create a paper with what they provide
        put_codes = []
        for summary in profile.work_summaries:
            if summary.doi and use_doi:
                dois_and_putcodes.append((summary.doi, summary.put_code))
            else:
                put_codes.append(summary.put_code)

        # 1st attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers with DOIs found in our ORCiD profile.
            dois = [doi for doi, put_code in dois_and_putcodes]
            for idx, (success, paper_or_metadata) in enumerate(
                    self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id,
                                                  dois)):
                if success:
                    yield paper_or_metadata
                else:
                    put_codes.append(dois_and_putcodes[idx][1])

        # 2nd attempt with ORCID's own crappy metadata
        works = profile.fetch_works(put_codes)
        for work in works:
            if not work:
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if work.skipped:
                print(work.json)
                print(work.skip_reason)
                print('work skipped due to incorrect metadata (%s)' %
                      (work.skip_reason))

                ignored_papers.append(work.as_dict())
                continue

            yield self.create_paper(work)

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
Beispiel #20
0
 def test_validate_orcid(self):
     self.assertEqual(validate_orcid(' 0000-0001-8633-6098\n'), '0000-0001-8633-6098')
Beispiel #21
0
 def test_url(self):
     self.assertEqual(validate_orcid(
         'http://orcid.org/0000-0002-8612-8827'), '0000-0002-8612-8827')
Beispiel #22
0
 def test_whitespace(self):
     self.assertEqual(validate_orcid(
         '\t0000-0002-8612-8827  '), '0000-0002-8612-8827')
Beispiel #23
0
    def fetch_orcid_records(self, id, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        id = validate_orcid(id)
        if id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = []  # list of DOIs to fetch
        papers = []  # list of papers created
        records_found = 0  # how many records did we successfully import from the profile?

        # Fetch publications
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work',
                     profile, [])
        for pub in pubs:

            def j(path, default=None):
                return jpath(path, pub, default)

            # DOI
            doi = None
            for extid in j(
                    'work-external-identifiers/work-external-identifier', []):
                if extid.get('work-external-identifier-type') == 'DOI':
                    doi = to_doi(
                        jpath('work-external-identifier-id/value', extid))
                    if doi:
                        # If a DOI is available, create the paper using metadata from CrossRef.
                        # We don't do it yet, we only store the DOI, so that we can fetch them
                        # by batch later.
                        dois.append(doi)

            if doi and use_doi:
                continue

            # Extract information from ORCiD

            # Title
            title = j('work-title/title/value')
            if title is None:
                print "Warning: Skipping ORCID publication: no title"

            # Type
            doctype = orcid_to_doctype(j('work-type', 'other'))

            # Contributors (ignored for now as they are very often not present)
            def get_contrib(js):
                return {
                    'orcid': jpath('contributor-orcid', js),
                    'name': jpath('credit-name/value', js),
                }

            contributors = map(get_contrib,
                               j('work-contributors/contributor', []))

            author_names = filter(lambda x: x is not None,
                                  map(lambda x: x['name'], contributors))
            authors = map(parse_comma_name, author_names)
            pubdate = None
            # ORCiD internal id
            identifier = j('put-code')
            affiliations = map(lambda x: x['orcid'], contributors)
            # Pubdate
            year = parse_int(j('publication-date/year/value'), 1970)
            month = parse_int(j('publication-date/month/value'), 01)
            day = parse_int(j('publication-date/day/value'), 01)
            pubdate = None
            try:
                pubdate = date(year=year, month=01, day=01)
                pubdate = date(year=year, month=month, day=01)
                pubdate = date(year=year, month=month, day=day)
            except ValueError:
                if pubdate is None:
                    print "Invalid publication date in ORCID publication, skipping"
                    continue

            # Citation type: metadata format
            citation_format = j('work-citation/work-citation-type')
            print citation_format
            bibtex = j('work-citation/citation')

            if bibtex is not None:
                try:
                    entry = parse_bibtex(bibtex)

                    if entry.get('author', []) == []:
                        print "Warning: Skipping ORCID publication: no authors."
                        print j('work-citation/citation')
                    if not authors:
                        authors = entry['author']
                except ValueError:
                    pass

            affiliations = affiliate_author_with_orcid(
                ref_name, id, authors, initial_affiliations=affiliations)

            authors = map(name_lookup_cache.lookup, authors)

            if not authors:
                print "No authors found, skipping"
                continue

            # Create paper:
            paper = BarePaper.create(title, authors, pubdate, 'VISIBLE',
                                     affiliations)

            record = BareOaiRecord(source=orcid_oai_source,
                                   identifier=identifier,
                                   splash_url='http://orcid.org/' + id,
                                   pubtype=doctype)

            paper.add_oairecord(record)
            yield paper

        if use_doi:
            for metadata in crps.search_for_dois_incrementally(
                    '', {'orcid': id}):
                try:
                    paper = crps.save_doi_metadata(metadata)
                    if paper:
                        yield paper
                except ValueError as e:
                    print "Saving CrossRef record from ORCID failed: %s" % unicode(
                        e)

            # Now we add the DOIs found in the ORCID profile.
            doi_metadata = fetch_dois(dois)
            for metadata in doi_metadata:
                try:
                    authors = map(convert_to_name_pair, metadata['author'])
                    affiliations = affiliate_author_with_orcid(
                        ref_name, id, authors)
                    paper = crps.save_doi_metadata(metadata, affiliations)
                    if not paper:
                        continue
                    record = BareOaiRecord(source=orcid_oai_source,
                                           identifier='orcid:' + id + ':' +
                                           metadata['DOI'],
                                           splash_url='http://orcid.org/' + id,
                                           pubtype=paper.doctype)
                    paper.add_oairecord(record)
                    yield paper
                except (KeyError, ValueError, TypeError):
                    pass
Beispiel #24
0
 def test_simple(self):
     self.assertEqual(validate_orcid(None), None)
     self.assertEqual(validate_orcid(189), None)
     self.assertEqual(validate_orcid('rst'), None)
     self.assertEqual(validate_orcid('0123012301230123'), None)
Beispiel #25
0
    def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=orcid_id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = [] # list of DOIs to fetch
        ignored_papers = [] # list of ignored papers due to incomplete metadata

        # Fetch publications (1st attempt with ORCiD data)
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, [])
        for pub in pubs:
            data_paper = ORCIDDataPaper.from_orcid_metadata(
                ref_name,
                orcid_id,
                pub,
                stop_if_dois_exists=use_doi
            )

            if data_paper.dois and use_doi: # We want to batch it rather than manually do it.
                dois.extend(data_paper.dois)
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if data_paper.skipped:
                print ('%s is skipped due to incorrect metadata (%s)' % (data_paper, data_paper.skip_reason))

                print ('Trying to reconcile it with local researcher.')
                data_paper = self.reconcile_paper(
                    ref_name,
                    orcid_id,
                    pub,
                    overrides={
                        'authors': [(self.researcher.name.first, self.researcher.name.last)]
                    }
                )
                if data_paper.skipped:
                    ignored_papers.append(data_paper.as_dict())
                    continue

            yield self.create_paper(data_paper)

        # 2nd attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers from CrossRef
            for success, paper_or_metadata in self.fetch_crossref_incrementally(crps, orcid_id):
                if success:
                    yield paper_or_metadata
                else:
                    ignored_papers.append(paper_or_metadata)
                    print ('This metadata (%s) yields no paper.' % (metadata))

            # Let's grab papers with DOIs found in our ORCiD profile.
            # FIXME(RaitoBezarius): if we fail here, we should get back the pub and yield it.
            for success, paper_or_metadata in self.fetch_metadata_from_dois(crps, ref_name, orcid_id, dois):
                if success:
                    yield paper_or_metadata
                else:
                    ignored_papers.append(paper_or_metadata)
                    print ('This metadata (%s) yields no paper.' % (paper_or_metadata))
       
        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            print ('Warning: Total ignored papers: %d' % (len(ignored_papers)))
Beispiel #26
0
 def get_orcid(author_elem):
     orcid = validate_orcid(author_elem.get('ORCID'))
     if orcid:
         return orcid
Beispiel #27
0
 def get_orcid(author_elem):
     orcid = validate_orcid(author_elem.get('ORCID'))
     if orcid:
         return orcid
Beispiel #28
0
    def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
        except MetadataSourceException:
            logger.exception("ORCID Profile Error")
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                profile.json, update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        ignored_papers = []  # list of ignored papers due to incomplete metadata

        # Get summary publications and separate them in two classes:
        # - the ones with DOIs, that we will fetch with CrossRef
        dois_and_putcodes = []  # list of (DOIs,putcode) to fetch
        # - the ones without: we will fetch ORCID's metadata about them
        #   and try to create a paper with what they provide
        put_codes = []
        for summary in profile.work_summaries:
            if summary.doi and use_doi:
                dois_and_putcodes.append((summary.doi, summary.put_code))
            else:
                put_codes.append(summary.put_code)

        # 1st attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers with DOIs found in our ORCiD profile.
            dois = [doi for doi, put_code in dois_and_putcodes]
            for idx, (success, paper_or_metadata) in enumerate(self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id, dois)):
                if success:
                    yield paper_or_metadata # We know that this is a paper
                else:
                    put_codes.append(dois_and_putcodes[idx][1])

        # 2nd attempt with ORCID's own crappy metadata
        works = profile.fetch_works(put_codes)
        for work in works:
            if not work:
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if work.skipped:
                logger.warning("Work skipped due to incorrect metadata. \n %s \n %s" % (work.reason, work.skip_reason))

                ignored_papers.append(work.as_dict())
                continue

            yield self.create_paper(work)

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            logger.warning("Total ignored papers: %d" % (len(ignored_papers)))
Beispiel #29
0
    def fetch_orcid_records(self, id, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        crps = CrossRefPaperSource(self.ccf)

        # Cleanup iD:
        id = validate_orcid(id)
        if id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(id=id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = [] # list of DOIs to fetch
        papers = [] # list of papers created
        records_found = 0 # how many records did we successfully import from the profile?

        # Fetch publications
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, [])
        for pub in pubs:
            def j(path, default=None):
                return jpath(path, pub, default)

            # DOI
            doi = None
            for extid in j('work-external-identifiers/work-external-identifier', []):
                if extid.get('work-external-identifier-type') == 'DOI':
                    doi = to_doi(jpath('work-external-identifier-id/value', extid))
                    if doi:
                        # If a DOI is available, create the paper using metadata from CrossRef.
                        # We don't do it yet, we only store the DOI, so that we can fetch them
                        # by batch later.
                        dois.append(doi)

            if doi and use_doi:
                continue

            # Extract information from ORCiD

            # Title
            title = j('work-title/title/value')
            if title is None:
                print "Warning: Skipping ORCID publication: no title"
            
            # Type
            doctype = orcid_to_doctype(j('work-type', 'other'))

            # Contributors (ignored for now as they are very often not present)
            def get_contrib(js):
                return {
                     'orcid':jpath('contributor-orcid', js),
                     'name': jpath('credit-name/value', js),
                    }
            contributors = map(get_contrib, j('work-contributors/contributor',[]))

            author_names = filter(lambda x: x is not None, map(
                                  lambda x: x['name'], contributors))
            authors = map(parse_comma_name, author_names)
            pubdate = None
            # ORCiD internal id
            identifier = j('put-code')
            affiliations = map(lambda x: x['orcid'], contributors)
            # Pubdate
            year = parse_int(j('publication-date/year/value'), 1970)
            month = parse_int(j('publication-date/month/value'), 01)
            day = parse_int(j('publication-date/day/value'), 01)
            pubdate = None
            try:
                pubdate = date(year=year, month=01, day=01)
                pubdate = date(year=year, month=month, day=01)
                pubdate = date(year=year, month=month, day=day)
            except ValueError:
                if pubdate is None:
                    print "Invalid publication date in ORCID publication, skipping"
                    continue

            # Citation type: metadata format
            citation_format = j('work-citation/work-citation-type')
            print citation_format
            bibtex = j('work-citation/citation')

            if bibtex is not None:
                try:
                    entry = parse_bibtex(bibtex)

                    if entry.get('author', []) == []:
                        print "Warning: Skipping ORCID publication: no authors."
                        print j('work-citation/citation')
                    if not authors:
                        authors = entry['author']
                except ValueError:
                    pass

            affiliations = affiliate_author_with_orcid(ref_name, id, authors, initial_affiliations=affiliations)

            authors = map(name_lookup_cache.lookup, authors)

            if not authors:
                print "No authors found, skipping"
                continue

            # Create paper:
            paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations)

            record = BareOaiRecord(
                    source=orcid_oai_source,
                    identifier=identifier,
                    splash_url='http://orcid.org/'+id,
                    pubtype=doctype)

            paper.add_oairecord(record)
            yield paper

        if use_doi:
            for metadata in crps.search_for_dois_incrementally('', {'orcid':id}):
                try:
                    paper = crps.save_doi_metadata(metadata)
                    if paper:
                        yield paper
                except ValueError as e:
                    print "Saving CrossRef record from ORCID failed: %s" % unicode(e)

            # Now we add the DOIs found in the ORCID profile.
            doi_metadata = fetch_dois(dois)
            for metadata in doi_metadata:
                try:
                    authors = map(convert_to_name_pair, metadata['author'])
                    affiliations = affiliate_author_with_orcid(ref_name, id, authors)
                    paper = crps.save_doi_metadata(metadata, affiliations)
                    if not paper:
                        continue
                    record = BareOaiRecord(
                            source=orcid_oai_source,
                            identifier='orcid:'+id+':'+metadata['DOI'],
                            splash_url='http://orcid.org/'+id,
                            pubtype=paper.doctype)
                    paper.add_oairecord(record)
                    yield paper
                except (KeyError, ValueError, TypeError):
                    pass
Beispiel #30
0
    def search(self):
        self.queryset = self.searchqueryset.models(Paper)

        q = remove_diacritics(self.cleaned_data['q'])
        if q:
            self.queryset = self.queryset.auto_query(q)

        visible = self.cleaned_data['visible']
        if visible == '':
            self.filter(visible=True)
        elif visible == 'invisible':
            self.filter(visible=False)

        self.form_filter('availability', 'availability')
        self.form_filter('oa_status__in', 'oa_status')
        self.form_filter('pubdate__gte', 'pub_after')
        self.form_filter('pubdate__lte', 'pub_before')
        self.form_filter('doctype__in', 'doctypes')

        # Filter by authors.
        # authors field: a comma separated list of full/last names.
        # Items with no whitespace of prefixed with 'last:' are considered as
        # last names; others are full names.
        for name in self.cleaned_data['authors'].split(','):
            name = name.strip()

            # If part of this author name matches ORCID identifiers, consider
            # these as orcid ids and do the filtering
            orcid_ids = [x for x in name.split(' ') if validate_orcid(x)]
            for orcid_id in orcid_ids:
                self.filter(orcids=orcid_id)

            # Rebuild a full name excluding the ORCID id terms
            name = ' '.join([x for x in name.split(' ') if x not in orcid_ids])

            name = remove_diacritics(name.strip())

            if name.startswith('last:'):
                is_lastname = True
                name = name[5:].strip()
            else:
                is_lastname = ' ' not in name

            if not name:
                continue

            if is_lastname:
                self.filter(authors_last=name)
            else:
                reversed_name = ' '.join(reversed(name.split(' ')))
                sq = SQ()
                sq.add(SQ(authors_full=Sloppy(name, slop=1)), SQ.OR)
                sq.add(SQ(authors_full=Sloppy(reversed_name, slop=1)), SQ.OR)
                self.queryset = self.queryset.filter(sq)

        self.queryset = aggregate_combined_status(self.queryset)

        status = self.cleaned_data['status']
        if status:
            self.queryset = self.queryset.post_filter(
                combined_status__in=status)

        # Default ordering by decreasing publication date
        order = self.cleaned_data['sort_by'] or '-pubdate'
        self.queryset = self.queryset.order_by(order).load_all()

        return self.queryset
Beispiel #31
0
    def authenticate(self, request, remote_user, shib_meta):
        """
        The remote_user is considered as trusted.
        Sets up a user based on shibboleth data.
        We file in website.models.ShibbolethAccount for it. If it does not exist, we create a user.
        If we have an orcid passed in the shib_meta, we try to find a researcher, otherwise we create a researcher.
        """
        # If no remote_user is given, we abort
        if not remote_user:
            logger.info('remote_user invalid')
            return

        logger.debug('Received remote_user: {}'.format(remote_user))

        # This is the real process of authentication
        user = None
        shib_account = None
        try:
            shib_account = ShibbolethAccount.objects.get(
                shib_username=shib_meta.get('username'))
        except ShibbolethAccount.DoesNotExist:
            logger.debug("username {} not found".format(
                shib_meta.get('username')))

        orcid = validate_orcid(shib_meta.get('orcid'))

        if shib_account:
            logger.debug("Found ShibbolethAccount: {}".format(shib_account))
            # If we have a ShibbolethAccount object, we have a Researcher object
            researcher = Researcher.objects.get(user=shib_account.user)
            # If we have a ORCID, we can do some stuff
            if orcid:
                if researcher.orcid:
                    # If both objects have ORCIDs, we can assume that they are identical
                    user = shib_account.user
                # Researcher object has no ORCID. We try to find a Researcher with that ORCID and merge, otherwise we can just set the ORCID to the current researcher
                else:
                    try:
                        alt_researcher = Researcher.objects.get(orcid=orcid)
                    except Researcher.DoesNotExist:
                        logger.debug(
                            "Found no researcher with orcid {}, save that on related researcher"
                            .format(orcid))
                        researcher.orcid = orcid
                        researcher.save()
                    else:
                        # We have an alternative researcher. If there is user, merge them, otherwise proceed directly to merging researchers
                        if alt_researcher.user:
                            merge_users(shib_account.user, alt_researcher.user)
                        researcher.merge(alt_researcher, delete_user=True)
                    user = shib_account.user
            else:
                user = shib_account.user

        # We have no ShibbolethAccount object
        # If we have an ORCID, we can try to find a Researcher
        elif orcid:
            try:
                researcher = Researcher.objects.get(orcid=orcid)
            except Researcher.DoesNotExist:
                pass
            else:
                # We have found a Researcher object
                if researcher.user:
                    # The found researcher has a user object. We use it
                    ShibbolethAccount.objects.create(
                        user=researcher.user,
                        shib_username=shib_meta.get('username'))
                    user = researcher.user
                else:
                    # The found researcher has no user object. We create a user and connect it
                    user = User.objects.create_user(
                        remote_user,
                        first_name=shib_meta.get('first_name'),
                        last_name=shib_meta.get('last_name'),
                    )
                    ShibbolethAccount.objects.create(
                        user=user, shib_username=shib_meta.get('username'))
                    researcher.user = user
                    researcher.save()

        # We have no ORCID, so we create a ShibbolethAccount and Researcher
        if not user:
            user = self.create_new_user_and_researcher(remote_user, orcid,
                                                       shib_meta)

        return user