def provider_dereference(self, record): """ Check the URL that the DOI dereferences to, by taking advantage of the fact that DOI lookups use HTTP 303 to redirect you to the resource. Append to the record['provider']['url'] list with the string which describes the provider (ideally a URI) """ # check that we can actually work on this record # - must have an indentifier # - must be a doi # - must have a canonical form if not "identifier" in record: return if not "type" in record["identifier"]: return if record["identifier"]["type"] != "doi": return if not "canonical" in record["identifier"]: return # first construct a dereferenceable doi (prefix it with dx.doi.org) canon = record['identifier']['canonical'] loc = self.dereference(canon) # either way we are going to copy the doi into the provider object recordmanager.record_provider_doi(record, canon) if loc is None: return # if we find something, record it recordmanager.record_provider_url(record, loc)
def test_01_record_provider_url(self): record = {} recordmanager.record_provider_url(record, "http://hello") assert "provider" in record assert "url" in record["provider"] assert len(record["provider"]["url"]) == 1 assert record["provider"]["url"][0] == "http://hello"
def provider_resolver(self, record): """ Take a pubmed id (if that is the type) and obtain a reference to the base URL of the resource that it links to and append it to the record['provider']['url'] list """ # check that we can actually work on this record # - must have an indentifier # - must be a pmid # - must have a canonical form if not "identifier" in record: return if not "type" in record["identifier"]: return if record["identifier"]["type"] != "pmid": return if not "canonical" in record["identifier"]: return # see if we can resolve a doi for the item canon = record['identifier']['canonical'] doi, loc = self._resolve_doi(canon) if loc is not None: # if we find something, record it recordmanager.record_provider_url(record, loc) recordmanager.record_provider_doi(record, doi) return # if we get to here, the DOI lookup failed, so we need to scrape the NCBI site for possible urls urls = self._scrape_urls(canon) if urls is not None and len(urls) > 0: # if we find something, record it recordmanager.record_provider_urls(record, urls)