def convert(ids, from_type): '''Uses the NCBI IP Converter API to converts a list of publication IDs in the same format e.g. DOI identifiers to another format e.g. PubMed identifiers. ids is a list of IDs of the type from_type e.g. a from_type of 'doi' specifies DOI identifiers. The function returns a Python dict with the mappings from the input IDs to IDs of all other types. ''' if from_type not in converter_types: raise PubMedConverterTypeException(from_type) # Avoid multiple requests of the same ID mapping = {} ids = list(set(ids)) # Request the mapping from the server query_string = "?ids=%s&idtype=%s" % (urllib2.quote(",".join(ids), ''), from_type) xml = get_resource("www.ncbi.nlm.nih.gov", '/pmc/utils/idconv/v1.0/%s' % query_string).strip() # Parse the response try: _dom = parseString(xml) main_tag = _dom.getElementsByTagName("pmcids") assert(len(main_tag) == 1) main_tag = main_tag[0] request_status = main_tag.getAttribute('status') except Exception, e: raise PubMedIDRetrievalException('An error occurred retrieving the XML from the PubMed ID Converter API: %s.' % str(e))
def get_info(self): 'Retrieve the data from CrossRef.' escaped_doi = urllib2.quote(self.doi, '') html = get_resource("www.crossref.org", '/guestquery?queryType=doi&restype=unixref&doi=%s&doi_search=Search' % escaped_doi) xml_matches = [] for m in re.finditer('(<doi_records>.*?</doi_records>)', html, re.DOTALL): xml_matches.append(m.group(0)) if len(xml_matches) == 0: raise DOIRetrievalException('No matches found for the DOI "%s".' % self.doi) elif len(xml_matches) == 1: return xml_matches[0] else: raise DOIRetrievalException('Multiple (%d) matches found for the DOI "%s".' % (len(xml_matches), self.doi))