def delete_work_for_orcid(orcid): oauth_token = TestOrcidPusherBase._oauth_token(orcid) client = OrcidClient(oauth_token, orcid) all_work = client.get_all_works_summary() for work in all_work.get("group", []): putcode = work["work-summary"][0]["put-code"] client.delete_work(putcode)
def test_get_putcodes_for_source_source_client_id_none(self): orcid = '0000-0002-4490-1930' client = OrcidClient('mytoken', orcid) response = client.get_all_works_summary() response.raise_for_result() putcodes = list( response.get_putcodes_for_source_iter('0000-0001-8607-8906')) assert len(putcodes) == 90
class OrcidPutcodeGetter(object): def __init__(self, orcid, oauth_token): self.orcid = orcid self.oauth_token = oauth_token self.client = OrcidClient(self.oauth_token, self.orcid) self.source_client_id_path = current_app.config[ 'ORCID_APP_CREDENTIALS']['consumer_key'] def get_all_inspire_putcodes(self): """ Get all the Inspire putcodes for the given ORCID. """ putcodes = self._get_all_putcodes() if not putcodes: return # Filter out putcodes that do not belong to Inspire. for putcode, url in self._get_urls_for_putcodes(putcodes): if INSPIRE_WORK_URL_REGEX.match(url): yield putcode, url def _get_all_putcodes(self): response = self.client.get_all_works_summary() utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works summary') try: response.raise_for_result() except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) return list( response.get_putcodes_for_source(self.source_client_id_path)) def _get_urls_for_putcodes(self, putcodes): # The call get_bulk_works_details_iter() can be very expensive for an # author with many works (if each work also has many *contributors*). # Fi. for an ATLAS author with ~750 works, 8 calls would be performed # with a total data transfer > 0.5 Gb. chained = [] for response in self.client.get_bulk_works_details_iter(putcodes): utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works details') try: response.raise_for_result() except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) chained = itertools.chain(chained, response.get_putcodes_and_urls()) return chained
def get_putcode_for_work(orcid, token, recid): client = OrcidClient(token, orcid) response = client.get_all_works_summary() response.raise_for_result() source_client_id_path = config.get('orcid-api', 'consumer_key') putcodes = list( response.get_putcodes_for_source_iter(source_client_id_path)) if not putcodes: return None # TODO: this has to be simplified when we push recids as external # identifier (thus just the get_all_works_summary() call is required to # match recids with putcodes). for response in client.get_bulk_works_details_iter(putcodes): response.raise_for_result() for putcode, url in response.get_putcodes_and_urls_iter(): if url.endswith('/{}'.format(recid)): return putcode
class OrcidPutcodeGetter(object): def __init__(self, orcid, oauth_token): self.orcid = orcid self.oauth_token = oauth_token self.client = OrcidClient(self.oauth_token, self.orcid) self.source_client_id_path = current_app.config[ "ORCID_APP_CREDENTIALS"]["consumer_key"] def get_all_inspire_putcodes_and_recids_iter(self): """ Query ORCID api and get all the Inspire putcodes for the given ORCID. """ summary_response = self._get_all_works_summary() # `putcodes_recids` is a list like: [('43326850', 20), ('43255490', None)] putcodes_recids = list( summary_response.get_putcodes_and_recids_for_source_iter( self.source_client_id_path)) putcodes_with_recids = [x for x in putcodes_recids if x[1]] putcodes_without_recids = [x[0] for x in putcodes_recids if not x[1]] for putcode, recid in putcodes_with_recids: yield putcode, recid if not putcodes_without_recids: return for putcode, recid in self._get_putcodes_and_recids_iter( putcodes_without_recids): yield putcode, recid def _get_all_works_summary(self): """ Query ORCID api and get all the putcodes with their embedded recids for the given ORCID. An embedded recid is a recid written as external-identifier. """ response = self.client.get_all_works_summary() LOGGER.info("Get ORCID work summary", response=response, orcid=self.orcid) try: response.raise_for_result() except ( orcid_client_exceptions.TokenInvalidException, orcid_client_exceptions.TokenMismatchException, orcid_client_exceptions.TokenWithWrongPermissionException, ): LOGGER.info( "OrcidPutcodeGetter: deleting Orcid push access", token=self.oauth_token, orcid=self.orcid, ) push_access_tokens.delete_access_token(self.oauth_token, self.orcid) raise exceptions.TokenInvalidDeletedException except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) return response def _get_putcodes_and_recids_iter(self, putcodes): for putcode, url in self._get_urls_for_putcodes_iter(putcodes): # Filter out putcodes that do not belong to Inspire. if INSPIRE_WORK_URL_REGEX.match(url): recid = PidStoreBase.get_pid_from_record_uri(url)[1] if not recid: LOGGER.error( "OrcidPutcodeGetter: cannot parse recid from url", url=url, orcid=self.orcid, ) continue yield putcode, recid def _get_urls_for_putcodes_iter(self, putcodes): # The call `get_bulk_works_details_iter()` can be expensive for an # author with many works (if each work also has many *contributors*). # Fi. for an ATLAS author with ~750 works (each of them with many # authors), 8 calls would be performed with a total data transfer > 0.5 Gb. chained = [] for response in self.client.get_bulk_works_details_iter(putcodes): # Note: this log can be large. Consider removing it when this part # is considered mature. LOGGER.info("ORCID work details", response=response, orcid=self.orcid) try: response.raise_for_result() except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) chained = itertools.chain(chained, response.get_putcodes_and_urls_iter()) return chained def get_putcodes_and_recids_by_identifiers_iter(self, identifiers): """ Yield putcode and recid for each work matched by the external identifiers. Note: external identifiers of type 'other-id' are skipped. Args: identifiers (List[inspirehep.orcid.converter.ExternalIdentifier]): list af all external identifiers added after the xml conversion. """ summary_response = self._get_all_works_summary() for ( putcode, ids, ) in summary_response.get_putcodes_and_external_identifiers_iter(): # ids is a list like: # [ # {'external-id-relationship': 'SELF', # 'external-id-type': 'other-id', # 'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'}, # 'external-id-value': '20' # },... # ] # Get the recid. recid = self._get_recid_for_work(ids, str(putcode)) for identifier in ids: id_type = identifier.get("external-id-type") # We are interested only in doi, arxiv, isbns. if not id_type or id_type.lower() == "other-id": continue id_value = identifier.get("external-id-value") if not id_value: continue if ExternalIdentifier(id_type, id_value) in identifiers: yield putcode, recid def _get_recid_for_work(self, external_identifiers, putcode): """ Get the recid for a work given its external identifiers and putcode. The recid might be in the external identifiers or a get_work_details() might be called to find it. Args: external_identifier (List[Dict]): a list like: [ {'external-id-relationship': 'SELF', 'external-id-type': 'other-id', 'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'}, 'external-id-value': '20' },... ] putcode: putcode of the given work. Returns: the Inspire recid mathcing the work. """ for identifier in external_identifiers: id_type = identifier.get("external-id-type") if not id_type or id_type.lower() != "other-id": continue id_url = inspire_service_orcid_utils.smartget( identifier, "external-id-url.value", "") if not re.match(r".*inspire.*", id_url, re.I): continue id_value = identifier.get("external-id-value") if not id_value: continue # recid found. return id_value # The recid was not found in the external_identifiers. # Thus we call get_bulk_works_details_iter(). putcodes_recid = list(self._get_putcodes_and_recids_iter([putcode])) if putcodes_recid: return putcodes_recid[0][1]
class OrcidPutcodeGetter(object): def __init__(self, orcid, oauth_token): self.orcid = orcid self.oauth_token = oauth_token self.client = OrcidClient(self.oauth_token, self.orcid) self.source_client_id_path = current_app.config['ORCID_APP_CREDENTIALS'][ 'consumer_key'] def get_all_inspire_putcodes_and_recids_iter(self): """ Query ORCID api and get all the Inspire putcodes for the given ORCID. """ summary_response = self._get_all_works_summary() # `putcodes_recids` is a list like: [('43326850', 20), ('43255490', None)] putcodes_recids = list(summary_response.get_putcodes_and_recids_for_source_iter( self.source_client_id_path)) putcodes_with_recids = [x for x in putcodes_recids if x[1]] putcodes_without_recids = [x[0] for x in putcodes_recids if not x[1]] for putcode, recid in putcodes_with_recids: yield putcode, recid if not putcodes_without_recids: return for putcode, recid in self._get_putcodes_and_recids_iter(putcodes_without_recids): yield putcode, recid def _get_all_works_summary(self): """ Query ORCID api and get all the putcodes with their embedded recids for the given ORCID. An embedded recid is a recid written as external-identifier. """ response = self.client.get_all_works_summary() utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works summary') try: response.raise_for_result() except (orcid_client_exceptions.TokenInvalidException, orcid_client_exceptions.TokenMismatchException, orcid_client_exceptions.TokenWithWrongPermissionException): logger.info('OrcidPutcodeGetter: deleting Orcid push access token={} for orcid={}'.format( self.oauth_token, self.orcid)) push_access_tokens.delete_access_token(self.oauth_token, self.orcid) raise exceptions.TokenInvalidDeletedException except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) return response def _get_putcodes_and_recids_iter(self, putcodes): for putcode, url in self._get_urls_for_putcodes_iter(putcodes): # Filter out putcodes that do not belong to Inspire. if INSPIRE_WORK_URL_REGEX.match(url): recid = get_pid_from_record_uri(url)[1] if not recid: logger.error('OrcidPutcodeGetter: cannot parse recid from url={} for orcid={}'.format( url, self.orcid)) continue yield putcode, recid def _get_urls_for_putcodes_iter(self, putcodes): # The call `get_bulk_works_details_iter()` can be expensive for an # author with many works (if each work also has many *contributors*). # Fi. for an ATLAS author with ~750 works (each of them with many # authors), 8 calls would be performed with a total data transfer > 0.5 Gb. chained = [] for response in self.client.get_bulk_works_details_iter(putcodes): # Note: this log can be large. Consider removing it when this part # is considered mature. utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works details') try: response.raise_for_result() except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) chained = itertools.chain(chained, response.get_putcodes_and_urls_iter()) return chained def get_putcodes_and_recids_by_identifiers_iter(self, identifiers): """ Yield putcode and recid for each work matched by the external identifiers. Note: external identifiers of type 'other-id' are skipped. Args: identifiers (List[inspirehep.modules.orcid.converter.ExternalIdentifier]): list af all external identifiers added after the xml conversion. """ summary_response = self._get_all_works_summary() for putcode, ids in summary_response.get_putcodes_and_external_identifiers_iter(): # ids is a list like: # [ # {'external-id-relationship': 'SELF', # 'external-id-type': 'other-id', # 'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'}, # 'external-id-value': '20' # },... # ] # Get the recid. recid = self._get_recid_for_work(ids, str(putcode)) for identifier in ids: id_type = identifier.get('external-id-type') # We are interested only in doi, arxiv, isbns. if not id_type or id_type.lower() == 'other-id': continue id_value = identifier.get('external-id-value') if not id_value: continue if ExternalIdentifier(id_type, id_value) in identifiers: yield putcode, recid def _get_recid_for_work(self, external_identifiers, putcode): """ Get the recid for a work given its external identifiers and putcode. The recid might be in the external identifiers or a get_work_details() might be called to find it. Args: external_identifier (List[Dict]): a list like: [ {'external-id-relationship': 'SELF', 'external-id-type': 'other-id', 'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'}, 'external-id-value': '20' },... ] putcode: putcode of the given work. Returns: the Inspire recid mathcing the work. """ for identifier in external_identifiers: id_type = identifier.get('external-id-type') if not id_type or id_type.lower() != 'other-id': continue id_url = inspire_service_orcid_utils.smartget(identifier, 'external-id-url.value', '') if not re.match(r'.*inspire.*', id_url, re.I): continue id_value = identifier.get('external-id-value') if not id_value: continue # recid found. return id_value # The recid was not found in the external_identifiers. # Thus we call get_bulk_works_details_iter(). putcodes_recid = list(self._get_putcodes_and_recids_iter([putcode])) if putcodes_recid: return putcodes_recid[0][1]
def test_invalid_token(self): client = OrcidClient('invalidtoken', self.orcid) response = client.get_all_works_summary() with pytest.raises(exceptions.TokenInvalidException): response.raise_for_result() assert not response.ok