def test_checksum(self): self.assertEqual(validate_orcid('0000-0002-8612-8827'), '0000-0002-8612-8827') self.assertEqual(validate_orcid('0000-0002-8612-8828'), None) self.assertEqual(validate_orcid('0000-0001-5892-743X'), '0000-0001-5892-743X') self.assertEqual(validate_orcid('0000-0001-5892-7431'), None)
def get(self, request, *args, **kwargs): if 'researcher' in kwargs: researcher = get_object_or_404(Researcher, pk=kwargs['researcher']) elif 'orcid' in kwargs: try: researcher = Researcher.objects.get(orcid=kwargs['orcid']) except Researcher.DoesNotExist: try: orcid = validate_orcid(kwargs['orcid']) researcher = Researcher.get_or_create_by_orcid(orcid) if not researcher: raise Http404(_("Invalid ORCID profile. Please make sure it includes a public name.")) researcher.init_from_orcid() except MetadataSourceException: raise Http404(_('Invalid ORCID profile.')) if not researcher.visible: name = researcher.name.full return HttpResponsePermanentRedirect(reverse('search')+'?'+urlencode({'authors':name})) if kwargs.get('slug') != researcher.slug: view_args = {'researcher': researcher.id, 'slug': researcher.slug} url = reverse('researcher', kwargs=view_args) self.url = self.url_with_query_string(url=url) return HttpResponsePermanentRedirect(self.url) self.queryset = self.queryset.filter(researchers=researcher.id) self.researcher = researcher return super(ResearcherView, self).get(request, *args, **kwargs)
def fetch_on_orcid_login(sender, **kwargs): account = kwargs['sociallogin'].account # Only prefetch if the social login refers to a valid ORCID account orcid = validate_orcid(account.uid) if not orcid: raise ImmediateHttpResponse( render(kwargs['request'], 'dissemin/error.html', {'message':_('Invalid ORCID identifier.')}) ) profile = None # disabled account.extra_data because of API version mismatches user = None if '_user_cache' in account.__dict__: user = account.user r = Researcher.get_or_create_by_orcid(orcid, profile, user) if not r: # invalid ORCID profile (e.g. no name provided) raise ImmediateHttpResponse( render(kwargs['request'], 'dissemin/error.html', {'message': _('Dissemin requires access to your ORCID name, which is marked as private in your ORCID profile.')}) ) if r.user_id is None and user is not None: r.user = user r.save(update_fields=['user']) if r.empty_orcid_profile is None: r.init_from_orcid() else: r.fetch_everything_if_outdated()
def get(self, request, *args, **kwargs): if 'researcher' in kwargs: researcher = get_object_or_404(Researcher, pk=kwargs['researcher']) elif 'orcid' in kwargs: try: researcher = Researcher.objects.get(orcid=kwargs['orcid']) except Researcher.DoesNotExist: try: orcid = validate_orcid(kwargs['orcid']) researcher = Researcher.get_or_create_by_orcid(orcid) if not researcher: raise Http404( _("Invalid ORCID profile. Please make sure it includes a public name." )) researcher.init_from_orcid() except MetadataSourceException: raise Http404(_('Invalid ORCID profile.')) if not researcher.visible: name = researcher.name.full return HttpResponsePermanentRedirect( reverse('search') + '?' + urlencode({'authors': name})) if kwargs.get('slug') != researcher.slug: view_args = {'researcher': researcher.id, 'slug': researcher.slug} url = reverse('researcher', kwargs=view_args) self.url = self.url_with_query_string(url=url) return HttpResponsePermanentRedirect(self.url) self.queryset = self.queryset.filter(researchers=researcher.id) self.researcher = researcher return super(ResearcherView, self).get(request, *args, **kwargs)
def fetch_on_orcid_login(sender, sociallogin, **kwargs): """ Here we prepare some things, i.e. create a Researcher and require that the name on the orcid profile is public """ account = sociallogin.account # Only prefetch if the social login refers to a valid ORCID account orcid = validate_orcid(account.uid) if not orcid: raise ImmediateHttpResponse( render(kwargs['request'], 'dissemin/error.html', {'message': _('Invalid ORCID identifier.')})) profile = None # disabled account.extra_data because of API version mismatches user = None if '_user_cache' in account.__dict__: user = account.user r = Researcher.get_or_create_by_orcid(orcid, profile, user) if not r: # invalid ORCID profile (e.g. no name provided) raise ImmediateHttpResponse( render( kwargs['request'], 'dissemin/error.html', { 'message': _('Dissemin requires access to your ORCID name, ' 'which is marked as private in your ORCID profile.') }))
def _get_orcid(author_elem): """ Return a validated orcid or None :param author_elem: author as in citeproc :returns: orcid or None """ return validate_orcid(author_elem.get('ORCID'))
def to_python(self, value): if not value: return cleaned_value = validate_orcid(value) if cleaned_value is None: raise forms.ValidationError( _('Invalid ORCID identifier.'), code='invalid') return cleaned_value
def to_python(self, val): if not val: return cleaned_val = validate_orcid(val) if cleaned_val is None: raise forms.ValidationError(_('Invalid ORCID identifier.'), code='invalid') return cleaned_val
def get_affiliation(author_elem): # First, look for an ORCID id orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid # Otherwise return the plain affiliation, if any for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name']
def create(cls, title, author_names, pubdate, visible=True, affiliations=None, orcids=None): """ Creates a (bare) paper. To save it to the database, we need to run the clustering algorithm to resolve Researchers for the authors, using `from_bare` from the (non-bare) :class:`Paper` subclass.. :param title: The title of the paper (as a string). If it is too long for the database, ValueError is raised. :param author_names: The ordered list of author names, as Name objects. :param pubdate: The publication date, as a python date object :param visible: The visibility of the paper if it is created. If another paper exists, the visibility will be set to the maximum of the two possible visibilities. :param affiliations: A list of (possibly None) affiliations for the authors. It has to have the same length as the list of author names. :param orcids: same as affiliations, but for ORCID ids. """ if not title or not author_names or not pubdate: raise ValueError( "A title, pubdate and authors have to be provided to create a paper.") if affiliations is not None and len(author_names) != len(affiliations): raise ValueError( "The number of affiliations and authors have to be equal.") if orcids is not None and len(author_names) != len(orcids): raise ValueError( "The number of ORCIDs (or Nones) and authors have to be equal.") if not isinstance(visible, bool): raise ValueError("Invalid paper visibility: %s" % str(visible)) title = sanitize_html(title) title = maybe_recapitalize_title(title) p = cls() p.title = title p.pubdate = pubdate # pubdate will be checked in fingerprint computation p.visible = visible for idx, n in enumerate(author_names): a = BareAuthor() a.name = n if affiliations is not None: a.affiliation = affiliations[idx] if orcids is not None: orcid = validate_orcid(orcids[idx]) if orcid: a.orcid = orcid p.add_author(a, position=idx) p.fingerprint = p.new_fingerprint() return p
def populate_authors(apps, schema_editor): Paper = apps.get_model('papers', 'Paper') for p in Paper.objects.all(): authors = sorted(p.author_set.all().prefetch_related('name'), key=lambda r: r.position) authors_list = [a.serialize() for a in authors] for idx, a in enumerate(authors_list): orcid = validate_orcid(a['affiliation']) if orcid: authors_list[idx]['orcid'] = orcid authors_list[idx]['affiliation'] = None p.authors_list = authors_list p.save(update_fields=['authors_list'])
def create_by_name(cls, first, last, **kwargs): """ Creates a :class:`Researcher` with the given name. If an ORCID is provided, and a researcher with this ORCID already exists, this researcher will be returned. In any other case, a new researcher will be created. """ name, created = Name.get_or_create(first, last) if kwargs.get('orcid') is not None: orcid = validate_orcid(kwargs['orcid']) if kwargs['orcid'] is None: raise ValueError('Invalid ORCiD: "%s"' % orcid) researcher, created = Researcher.objects.get_or_create(name=name, orcid=orcid, defaults=kwargs) else: args = kwargs.copy() args['name'] = name researcher = Researcher.objects.create(**args) created = True if created: researcher.update_variants() researcher.update_stats() return researcher
def create_by_name(cls, first, last, **kwargs): """ Creates a :class:`Researcher` with the given name. If an ORCID is provided, and a researcher with this ORCID already exists, this researcher will be returned. In any other case, a new researcher will be created. """ name, created = Name.get_or_create(first, last) if kwargs.get('orcid') is not None: orcid = validate_orcid(kwargs['orcid']) if kwargs['orcid'] is None: raise ValueError('Invalid ORCiD: "%s"' % orcid) researcher, created = Researcher.objects.get_or_create( name=name, orcid=orcid, defaults=kwargs) else: args = kwargs.copy() args['name'] = name researcher = Researcher.objects.create(**args) created = True if created: researcher.update_variants() researcher.update_stats() return researcher
def fetch_on_orcid_login(sender, **kwargs): account = kwargs['sociallogin'].account # Only prefetch if the social login refers to a valid ORCID account orcid = validate_orcid(account.uid) if not orcid: return profile = account.extra_data user = None if '_user_cache' in account.__dict__: user = account.user r = Researcher.get_or_create_by_orcid(orcid, profile, user) if not r: # invalid ORCID profile (e.g. no name provided) return if r.user_id is None and user is not None: r.user = user r.save(update_fields=['user']) if r.empty_orcid_profile is None: r.init_from_orcid() else: r.fetch_everything_if_outdated()
def search(self): self.queryset = self.searchqueryset.models(Paper) q = remove_diacritics(self.cleaned_data['q']) if q: self.queryset = self.queryset.auto_query(q) visible = self.cleaned_data['visible'] if visible == '': self.filter(visible=True) elif visible == 'invisible': self.filter(visible=False) self.form_filter('availability', 'availability') self.form_filter('oa_status__in', 'oa_status') self.form_filter('pubdate__gte', 'pub_after') self.form_filter('pubdate__lte', 'pub_before') self.form_filter('doctype__in', 'doctypes') # Filter by authors. # authors field: a comma separated list of full/last names. # Items with no whitespace of prefixed with 'last:' are considered as # last names; others are full names. for name in self.cleaned_data['authors'].split(','): name = name.strip() # If part of this author name matches ORCID identifiers, consider # these as orcid ids and do the filtering orcid_ids = [x for x in name.split(' ') if validate_orcid(x)] for orcid_id in orcid_ids: try: researcher = Researcher.objects.get(orcid=orcid_id) self.filter(researchers=researcher.id) except Researcher.DoesNotExist: pass continue # Rebuild a full name excluding the ORCID id terms name = ' '.join([x for x in name.split(' ') if x not in orcid_ids]) name = remove_diacritics(name.strip()) if name.startswith('last:'): is_lastname = True name = name[5:].strip() else: is_lastname = ' ' not in name if not name: continue if is_lastname: self.filter(authors_last=name) else: reversed_name = ' '.join(reversed(name.split(' '))) sq = SQ() sq.add(SQ(authors_full=Sloppy(name, slop=1)), SQ.OR) sq.add(SQ(authors_full=Sloppy(reversed_name, slop=1)), SQ.OR) self.queryset = self.queryset.filter(sq) self.queryset = aggregate_combined_status(self.queryset) status = self.cleaned_data['status'] if status: self.queryset = self.queryset.post_filter( combined_status__in=status) # Default ordering by decreasing publication date order = self.cleaned_data['sort_by'] or '-pubdate' self.queryset = self.queryset.order_by(order).load_all() return self.queryset
def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ cr_api = CrossRefAPI() # Cleanup iD: orcid_id = validate_orcid(orcid_identifier) if orcid_id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(orcid_id=orcid_id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # As we have fetched the profile, let's update the Researcher self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier, profile.json, update=True) if not self.researcher: return # Reference name ref_name = profile.name ignored_papers = [ ] # list of ignored papers due to incomplete metadata # Get summary publications and separate them in two classes: # - the ones with DOIs, that we will fetch with CrossRef dois_and_putcodes = [] # list of (DOIs,putcode) to fetch # - the ones without: we will fetch ORCID's metadata about them # and try to create a paper with what they provide put_codes = [] for summary in profile.work_summaries: if summary.doi and use_doi: dois_and_putcodes.append((summary.doi, summary.put_code)) else: put_codes.append(summary.put_code) # 1st attempt with DOIs and CrossRef if use_doi: # Let's grab papers with DOIs found in our ORCiD profile. dois = [doi for doi, put_code in dois_and_putcodes] for idx, (success, paper_or_metadata) in enumerate( self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id, dois)): if success: yield paper_or_metadata else: put_codes.append(dois_and_putcodes[idx][1]) # 2nd attempt with ORCID's own crappy metadata works = profile.fetch_works(put_codes) for work in works: if not work: continue # If the paper is skipped due to invalid metadata. # We first try to reconcile it with local researcher author name. # Then, we consider it missed. if work.skipped: print(work.json) print(work.skip_reason) print('work skipped due to incorrect metadata (%s)' % (work.skip_reason)) ignored_papers.append(work.as_dict()) continue yield self.create_paper(work) self.warn_user_of_ignored_papers(ignored_papers) if ignored_papers: print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
def test_validate_orcid(self): self.assertEqual(validate_orcid(' 0000-0001-8633-6098\n'), '0000-0001-8633-6098')
def test_url(self): self.assertEqual(validate_orcid( 'http://orcid.org/0000-0002-8612-8827'), '0000-0002-8612-8827')
def test_whitespace(self): self.assertEqual(validate_orcid( '\t0000-0002-8612-8827 '), '0000-0002-8612-8827')
def fetch_orcid_records(self, id, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: id = validate_orcid(id) if id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch papers = [] # list of papers created records_found = 0 # how many records did we successfully import from the profile? # Fetch publications pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: def j(path, default=None): return jpath(path, pub, default) # DOI doi = None for extid in j( 'work-external-identifiers/work-external-identifier', []): if extid.get('work-external-identifier-type') == 'DOI': doi = to_doi( jpath('work-external-identifier-id/value', extid)) if doi: # If a DOI is available, create the paper using metadata from CrossRef. # We don't do it yet, we only store the DOI, so that we can fetch them # by batch later. dois.append(doi) if doi and use_doi: continue # Extract information from ORCiD # Title title = j('work-title/title/value') if title is None: print "Warning: Skipping ORCID publication: no title" # Type doctype = orcid_to_doctype(j('work-type', 'other')) # Contributors (ignored for now as they are very often not present) def get_contrib(js): return { 'orcid': jpath('contributor-orcid', js), 'name': jpath('credit-name/value', js), } contributors = map(get_contrib, j('work-contributors/contributor', [])) author_names = filter(lambda x: x is not None, map(lambda x: x['name'], contributors)) authors = map(parse_comma_name, author_names) pubdate = None # ORCiD internal id identifier = j('put-code') affiliations = map(lambda x: x['orcid'], contributors) # Pubdate year = parse_int(j('publication-date/year/value'), 1970) month = parse_int(j('publication-date/month/value'), 01) day = parse_int(j('publication-date/day/value'), 01) pubdate = None try: pubdate = date(year=year, month=01, day=01) pubdate = date(year=year, month=month, day=01) pubdate = date(year=year, month=month, day=day) except ValueError: if pubdate is None: print "Invalid publication date in ORCID publication, skipping" continue # Citation type: metadata format citation_format = j('work-citation/work-citation-type') print citation_format bibtex = j('work-citation/citation') if bibtex is not None: try: entry = parse_bibtex(bibtex) if entry.get('author', []) == []: print "Warning: Skipping ORCID publication: no authors." print j('work-citation/citation') if not authors: authors = entry['author'] except ValueError: pass affiliations = affiliate_author_with_orcid( ref_name, id, authors, initial_affiliations=affiliations) authors = map(name_lookup_cache.lookup, authors) if not authors: print "No authors found, skipping" continue # Create paper: paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) record = BareOaiRecord(source=orcid_oai_source, identifier=identifier, splash_url='http://orcid.org/' + id, pubtype=doctype) paper.add_oairecord(record) yield paper if use_doi: for metadata in crps.search_for_dois_incrementally( '', {'orcid': id}): try: paper = crps.save_doi_metadata(metadata) if paper: yield paper except ValueError as e: print "Saving CrossRef record from ORCID failed: %s" % unicode( e) # Now we add the DOIs found in the ORCID profile. doi_metadata = fetch_dois(dois) for metadata in doi_metadata: try: authors = map(convert_to_name_pair, metadata['author']) affiliations = affiliate_author_with_orcid( ref_name, id, authors) paper = crps.save_doi_metadata(metadata, affiliations) if not paper: continue record = BareOaiRecord(source=orcid_oai_source, identifier='orcid:' + id + ':' + metadata['DOI'], splash_url='http://orcid.org/' + id, pubtype=paper.doctype) paper.add_oairecord(record) yield paper except (KeyError, ValueError, TypeError): pass
def test_simple(self): self.assertEqual(validate_orcid(None), None) self.assertEqual(validate_orcid(189), None) self.assertEqual(validate_orcid('rst'), None) self.assertEqual(validate_orcid('0123012301230123'), None)
def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: orcid_id = validate_orcid(orcid_identifier) if orcid_id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=orcid_id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch ignored_papers = [] # list of ignored papers due to incomplete metadata # Fetch publications (1st attempt with ORCiD data) pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: data_paper = ORCIDDataPaper.from_orcid_metadata( ref_name, orcid_id, pub, stop_if_dois_exists=use_doi ) if data_paper.dois and use_doi: # We want to batch it rather than manually do it. dois.extend(data_paper.dois) continue # If the paper is skipped due to invalid metadata. # We first try to reconcile it with local researcher author name. # Then, we consider it missed. if data_paper.skipped: print ('%s is skipped due to incorrect metadata (%s)' % (data_paper, data_paper.skip_reason)) print ('Trying to reconcile it with local researcher.') data_paper = self.reconcile_paper( ref_name, orcid_id, pub, overrides={ 'authors': [(self.researcher.name.first, self.researcher.name.last)] } ) if data_paper.skipped: ignored_papers.append(data_paper.as_dict()) continue yield self.create_paper(data_paper) # 2nd attempt with DOIs and CrossRef if use_doi: # Let's grab papers from CrossRef for success, paper_or_metadata in self.fetch_crossref_incrementally(crps, orcid_id): if success: yield paper_or_metadata else: ignored_papers.append(paper_or_metadata) print ('This metadata (%s) yields no paper.' % (metadata)) # Let's grab papers with DOIs found in our ORCiD profile. # FIXME(RaitoBezarius): if we fail here, we should get back the pub and yield it. for success, paper_or_metadata in self.fetch_metadata_from_dois(crps, ref_name, orcid_id, dois): if success: yield paper_or_metadata else: ignored_papers.append(paper_or_metadata) print ('This metadata (%s) yields no paper.' % (paper_or_metadata)) self.warn_user_of_ignored_papers(ignored_papers) if ignored_papers: print ('Warning: Total ignored papers: %d' % (len(ignored_papers)))
def get_orcid(author_elem): orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid
def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ cr_api = CrossRefAPI() # Cleanup iD: orcid_id = validate_orcid(orcid_identifier) if orcid_id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(orcid_id=orcid_id) except MetadataSourceException: logger.exception("ORCID Profile Error") return # As we have fetched the profile, let's update the Researcher self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier, profile.json, update=True) if not self.researcher: return # Reference name ref_name = profile.name ignored_papers = [] # list of ignored papers due to incomplete metadata # Get summary publications and separate them in two classes: # - the ones with DOIs, that we will fetch with CrossRef dois_and_putcodes = [] # list of (DOIs,putcode) to fetch # - the ones without: we will fetch ORCID's metadata about them # and try to create a paper with what they provide put_codes = [] for summary in profile.work_summaries: if summary.doi and use_doi: dois_and_putcodes.append((summary.doi, summary.put_code)) else: put_codes.append(summary.put_code) # 1st attempt with DOIs and CrossRef if use_doi: # Let's grab papers with DOIs found in our ORCiD profile. dois = [doi for doi, put_code in dois_and_putcodes] for idx, (success, paper_or_metadata) in enumerate(self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id, dois)): if success: yield paper_or_metadata # We know that this is a paper else: put_codes.append(dois_and_putcodes[idx][1]) # 2nd attempt with ORCID's own crappy metadata works = profile.fetch_works(put_codes) for work in works: if not work: continue # If the paper is skipped due to invalid metadata. # We first try to reconcile it with local researcher author name. # Then, we consider it missed. if work.skipped: logger.warning("Work skipped due to incorrect metadata. \n %s \n %s" % (work.reason, work.skip_reason)) ignored_papers.append(work.as_dict()) continue yield self.create_paper(work) self.warn_user_of_ignored_papers(ignored_papers) if ignored_papers: logger.warning("Total ignored papers: %d" % (len(ignored_papers)))
def fetch_orcid_records(self, id, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: id = validate_orcid(id) if id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch papers = [] # list of papers created records_found = 0 # how many records did we successfully import from the profile? # Fetch publications pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: def j(path, default=None): return jpath(path, pub, default) # DOI doi = None for extid in j('work-external-identifiers/work-external-identifier', []): if extid.get('work-external-identifier-type') == 'DOI': doi = to_doi(jpath('work-external-identifier-id/value', extid)) if doi: # If a DOI is available, create the paper using metadata from CrossRef. # We don't do it yet, we only store the DOI, so that we can fetch them # by batch later. dois.append(doi) if doi and use_doi: continue # Extract information from ORCiD # Title title = j('work-title/title/value') if title is None: print "Warning: Skipping ORCID publication: no title" # Type doctype = orcid_to_doctype(j('work-type', 'other')) # Contributors (ignored for now as they are very often not present) def get_contrib(js): return { 'orcid':jpath('contributor-orcid', js), 'name': jpath('credit-name/value', js), } contributors = map(get_contrib, j('work-contributors/contributor',[])) author_names = filter(lambda x: x is not None, map( lambda x: x['name'], contributors)) authors = map(parse_comma_name, author_names) pubdate = None # ORCiD internal id identifier = j('put-code') affiliations = map(lambda x: x['orcid'], contributors) # Pubdate year = parse_int(j('publication-date/year/value'), 1970) month = parse_int(j('publication-date/month/value'), 01) day = parse_int(j('publication-date/day/value'), 01) pubdate = None try: pubdate = date(year=year, month=01, day=01) pubdate = date(year=year, month=month, day=01) pubdate = date(year=year, month=month, day=day) except ValueError: if pubdate is None: print "Invalid publication date in ORCID publication, skipping" continue # Citation type: metadata format citation_format = j('work-citation/work-citation-type') print citation_format bibtex = j('work-citation/citation') if bibtex is not None: try: entry = parse_bibtex(bibtex) if entry.get('author', []) == []: print "Warning: Skipping ORCID publication: no authors." print j('work-citation/citation') if not authors: authors = entry['author'] except ValueError: pass affiliations = affiliate_author_with_orcid(ref_name, id, authors, initial_affiliations=affiliations) authors = map(name_lookup_cache.lookup, authors) if not authors: print "No authors found, skipping" continue # Create paper: paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) record = BareOaiRecord( source=orcid_oai_source, identifier=identifier, splash_url='http://orcid.org/'+id, pubtype=doctype) paper.add_oairecord(record) yield paper if use_doi: for metadata in crps.search_for_dois_incrementally('', {'orcid':id}): try: paper = crps.save_doi_metadata(metadata) if paper: yield paper except ValueError as e: print "Saving CrossRef record from ORCID failed: %s" % unicode(e) # Now we add the DOIs found in the ORCID profile. doi_metadata = fetch_dois(dois) for metadata in doi_metadata: try: authors = map(convert_to_name_pair, metadata['author']) affiliations = affiliate_author_with_orcid(ref_name, id, authors) paper = crps.save_doi_metadata(metadata, affiliations) if not paper: continue record = BareOaiRecord( source=orcid_oai_source, identifier='orcid:'+id+':'+metadata['DOI'], splash_url='http://orcid.org/'+id, pubtype=paper.doctype) paper.add_oairecord(record) yield paper except (KeyError, ValueError, TypeError): pass
def search(self): self.queryset = self.searchqueryset.models(Paper) q = remove_diacritics(self.cleaned_data['q']) if q: self.queryset = self.queryset.auto_query(q) visible = self.cleaned_data['visible'] if visible == '': self.filter(visible=True) elif visible == 'invisible': self.filter(visible=False) self.form_filter('availability', 'availability') self.form_filter('oa_status__in', 'oa_status') self.form_filter('pubdate__gte', 'pub_after') self.form_filter('pubdate__lte', 'pub_before') self.form_filter('doctype__in', 'doctypes') # Filter by authors. # authors field: a comma separated list of full/last names. # Items with no whitespace of prefixed with 'last:' are considered as # last names; others are full names. for name in self.cleaned_data['authors'].split(','): name = name.strip() # If part of this author name matches ORCID identifiers, consider # these as orcid ids and do the filtering orcid_ids = [x for x in name.split(' ') if validate_orcid(x)] for orcid_id in orcid_ids: self.filter(orcids=orcid_id) # Rebuild a full name excluding the ORCID id terms name = ' '.join([x for x in name.split(' ') if x not in orcid_ids]) name = remove_diacritics(name.strip()) if name.startswith('last:'): is_lastname = True name = name[5:].strip() else: is_lastname = ' ' not in name if not name: continue if is_lastname: self.filter(authors_last=name) else: reversed_name = ' '.join(reversed(name.split(' '))) sq = SQ() sq.add(SQ(authors_full=Sloppy(name, slop=1)), SQ.OR) sq.add(SQ(authors_full=Sloppy(reversed_name, slop=1)), SQ.OR) self.queryset = self.queryset.filter(sq) self.queryset = aggregate_combined_status(self.queryset) status = self.cleaned_data['status'] if status: self.queryset = self.queryset.post_filter( combined_status__in=status) # Default ordering by decreasing publication date order = self.cleaned_data['sort_by'] or '-pubdate' self.queryset = self.queryset.order_by(order).load_all() return self.queryset
def authenticate(self, request, remote_user, shib_meta): """ The remote_user is considered as trusted. Sets up a user based on shibboleth data. We file in website.models.ShibbolethAccount for it. If it does not exist, we create a user. If we have an orcid passed in the shib_meta, we try to find a researcher, otherwise we create a researcher. """ # If no remote_user is given, we abort if not remote_user: logger.info('remote_user invalid') return logger.debug('Received remote_user: {}'.format(remote_user)) # This is the real process of authentication user = None shib_account = None try: shib_account = ShibbolethAccount.objects.get( shib_username=shib_meta.get('username')) except ShibbolethAccount.DoesNotExist: logger.debug("username {} not found".format( shib_meta.get('username'))) orcid = validate_orcid(shib_meta.get('orcid')) if shib_account: logger.debug("Found ShibbolethAccount: {}".format(shib_account)) # If we have a ShibbolethAccount object, we have a Researcher object researcher = Researcher.objects.get(user=shib_account.user) # If we have a ORCID, we can do some stuff if orcid: if researcher.orcid: # If both objects have ORCIDs, we can assume that they are identical user = shib_account.user # Researcher object has no ORCID. We try to find a Researcher with that ORCID and merge, otherwise we can just set the ORCID to the current researcher else: try: alt_researcher = Researcher.objects.get(orcid=orcid) except Researcher.DoesNotExist: logger.debug( "Found no researcher with orcid {}, save that on related researcher" .format(orcid)) researcher.orcid = orcid researcher.save() else: # We have an alternative researcher. If there is user, merge them, otherwise proceed directly to merging researchers if alt_researcher.user: merge_users(shib_account.user, alt_researcher.user) researcher.merge(alt_researcher, delete_user=True) user = shib_account.user else: user = shib_account.user # We have no ShibbolethAccount object # If we have an ORCID, we can try to find a Researcher elif orcid: try: researcher = Researcher.objects.get(orcid=orcid) except Researcher.DoesNotExist: pass else: # We have found a Researcher object if researcher.user: # The found researcher has a user object. We use it ShibbolethAccount.objects.create( user=researcher.user, shib_username=shib_meta.get('username')) user = researcher.user else: # The found researcher has no user object. We create a user and connect it user = User.objects.create_user( remote_user, first_name=shib_meta.get('first_name'), last_name=shib_meta.get('last_name'), ) ShibbolethAccount.objects.create( user=user, shib_username=shib_meta.get('username')) researcher.user = user researcher.save() # We have no ORCID, so we create a ShibbolethAccount and Researcher if not user: user = self.create_new_user_and_researcher(remote_user, orcid, shib_meta) return user