def get(self, uri): #juri = uri + '.json' logd.info(uri) log.debug('going to network for protocols') resp = requests.get(uri, headers=self._pio_header) #log.info(str(resp.request.headers)) if resp.ok: try: j = resp.json() # the api is reasonably consistent except BaseException as e: log.exception(e) breakpoint() raise e return j else: try: j = resp.json() sc = j['status_code'] em = j['error_message'] msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}' logd.error(msg) self.addError(msg) # can't return here because of the cache except BaseException as e: log.exception(e) logd.error(f'protocol no access {uri} {self.id!r}')
def doi(doi_string): # FIXME massive network sandbox violation here """ check if a doi string resolves, if it does, return it """ if doi_string is None: raise TypeError('WHAT HAVE YOU DONE!?') doi = idlib.Doi(doi_string) try: metadata = doi.metadata() # FIXME network sandbox violation if metadata is not None: return doi except idlib.exceptions.RemoteError: # sometimes a doi is present on the platform but does not resolve # in which case we don't add it as metadata because it has not # been officially published, just reserved, this check is more # correct than checkin the status on the platform # FIXME HOWEVER it violates the network sandbox, so we probably # need an extra step during the data retrieval phase which attempts # to fetch all the doi metadata pass except Exception as e: # XXX random errors need to be ignored here for now # since this really should not be run at this step # due to the network dependency, we need a post-network # step where we can strip out all the things that fail log.exception(e)
def _get_protocol_json(self, uri): #juri = uri + '.json' logd.info( uri.identifier if isinstance(uri, idlib.Stream) else uri) # FIXME pi = idlib.get_right_id(uri) if 'protocols.io' in pi: pioid = pi.slug # FIXME normalize before we ever get here ... log.info(pioid) else: msg = f'protocol uri is not from protocols.io {pi} {self.id}' logd.error(msg) self.addError(msg) return #uri_path = uri.rsplit('/', 1)[-1] apiuri = 'https://www.protocols.io/api/v3/protocols/' + pioid #'https://www.protocols.io/api/v3/groups/sparc/protocols' #apiuri = 'https://www.protocols.io/api/v3/filemanager/folders?top' #print(apiuri, header) log.debug('going to network for protocols') resp = requests.get(apiuri, headers=self._pio_header) #log.info(str(resp.request.headers)) if resp.ok: try: j = resp.json() # the api is reasonably consistent except BaseException as e: log.exception(e) breakpoint() raise e return j else: try: j = resp.json() sc = j['status_code'] em = j['error_message'] msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}' logd.error(msg) self.addError(msg) # can't return here because of the cache except BaseException as e: log.exception(e) logd.error(f'protocol no access {uri} {self.id!r}')
def _protocol_uris_resolved(self): # FIXME quite slow ... for start_uri in self.protocol_uris: log.debug(start_uri) try: if not hasattr(start_uri, 'dereference'): start_uri = idlib.StreamUri(start_uri) end_uri = start_uri.dereference() yield end_uri sc = end_uri.progenitor.status_code if sc > 400: msg = f'error accessing {end_uri} {sc}' self.addError(msg, blame='submission', logfunc=logd.error) except idlib.exceptions.ResolutionError as e: pass # FIXME I think we already log this error? except requests.exceptions.MissingSchema as e: self.addError(e, blame='submission', logfunc=logd.error) except OntId.BadCurieError as e: self.addError(e, blame='submission', logfunc=logd.error) except BaseException as e: #breakpoint() log.exception(e) log.critical('see exception above')