Beispiel #1
0
def convert(ids, from_type):
    '''Uses the NCBI IP Converter API to converts a list of publication IDs in the same format e.g. DOI identifiers to
    another format e.g. PubMed identifiers.
        ids is a list of IDs of the type from_type e.g. a from_type of 'doi' specifies DOI identifiers.
        The function returns a Python dict with the mappings from the input IDs to IDs of all other types.
    '''

    if from_type not in converter_types:
        raise PubMedConverterTypeException(from_type)

    # Avoid multiple requests of the same ID
    mapping = {}
    ids = list(set(ids))

    # Request the mapping from the server
    query_string = "?ids=%s&idtype=%s" % (urllib2.quote(",".join(ids), ''), from_type)
    xml = get_resource("www.ncbi.nlm.nih.gov", '/pmc/utils/idconv/v1.0/%s' % query_string).strip()

    # Parse the response
    try:
        _dom = parseString(xml)
        main_tag = _dom.getElementsByTagName("pmcids")
        assert(len(main_tag) == 1)
        main_tag = main_tag[0]
        request_status = main_tag.getAttribute('status')
    except Exception, e:
        raise PubMedIDRetrievalException('An error occurred retrieving the XML from the PubMed ID Converter API: %s.' % str(e))
Beispiel #2
0
    def get_info(self):
        'Retrieve the data from CrossRef.'
        escaped_doi = urllib2.quote(self.doi, '')
        html = get_resource("www.crossref.org", '/guestquery?queryType=doi&restype=unixref&doi=%s&doi_search=Search' % escaped_doi)

        xml_matches = []
        for m in re.finditer('(<doi_records>.*?</doi_records>)', html, re.DOTALL):
            xml_matches.append(m.group(0))

        if len(xml_matches) == 0:
            raise DOIRetrievalException('No matches found for the DOI "%s".' % self.doi)
        elif len(xml_matches) == 1:
            return xml_matches[0]
        else:
            raise DOIRetrievalException('Multiple (%d) matches found for the DOI "%s".' % (len(xml_matches), self.doi))