Ejemplo n.º 1
0
 def provider_dereference(self, record):
     """
     Check the URL that the DOI dereferences to, by taking advantage of the fact that
     DOI lookups use HTTP 303 to redirect you to the resource. Append to the record['provider']['url']
     list with the string which describes the provider (ideally a URI)
     """
     # check that we can actually work on this record
     # - must have an indentifier
     # - must be a doi
     # - must have a canonical form
     if not "identifier" in record:
         return
     
     if not "type" in record["identifier"]:
         return
     
     if record["identifier"]["type"] != "doi":
         return
     
     if not "canonical" in record["identifier"]:
         return
     
     # first construct a dereferenceable doi (prefix it with dx.doi.org)
     canon = record['identifier']['canonical']
     loc = self.dereference(canon)
     
     # either way we are going to copy the doi into the provider object
     recordmanager.record_provider_doi(record, canon)
     
     if loc is None:
         return
     
     # if we find something, record it
     recordmanager.record_provider_url(record, loc)
 def test_01_record_provider_url(self):
     record = {}
     recordmanager.record_provider_url(record, "http://hello")
     assert "provider" in record
     assert "url" in record["provider"]
     assert len(record["provider"]["url"]) == 1
     assert record["provider"]["url"][0] == "http://hello"
Ejemplo n.º 3
0
 def provider_resolver(self, record):
     """
     Take a pubmed id (if that is the type) and obtain a reference to the base
     URL of the resource that it links to and append it to the record['provider']['url'] list
     """
     # check that we can actually work on this record
     # - must have an indentifier
     # - must be a pmid
     # - must have a canonical form
     if not "identifier" in record:
         return
     
     if not "type" in record["identifier"]:
         return
     
     if record["identifier"]["type"] != "pmid":
         return
     
     if not "canonical" in record["identifier"]:
         return
     
     # see if we can resolve a doi for the item
     canon = record['identifier']['canonical']
     doi, loc = self._resolve_doi(canon)
     
     if loc is not None:
         # if we find something, record it
         recordmanager.record_provider_url(record, loc)
         recordmanager.record_provider_doi(record, doi)
         return
     
     # if we get to here, the DOI lookup failed, so we need to scrape the NCBI site for possible urls
     urls = self._scrape_urls(canon)
     if urls is not None and len(urls) > 0:
         # if we find something, record it
         recordmanager.record_provider_urls(record, urls)