def clean_doi(input_doi): input_doi = remove_nonprinting_characters(input_doi) try: input_doi = input_doi.lower() if input_doi.startswith("http"): match = re.match("^https*://(dx\.)*doi.org/(10\..+)", input_doi) doi = match.group(2) elif "doi.org" in input_doi: match = re.match("^(dx\.)*doi.org/(10\..+)", input_doi) doi = match.group(2) elif input_doi.startswith("doi:"): match = re.match("^doi:(10\..+)", input_doi) doi = match.group(1) elif input_doi.startswith("10."): doi = input_doi elif "10." in input_doi: match = re.match(".*(10\.\d+.+)", input_doi, re.DOTALL) doi = match.group(1) else: doi = None try: logger.debug( u"MALFORMED DOI {input_doi}".format(input_doi=input_doi)) except: logger.debug(u"MALFORMED DOI, can't print doi") except AttributeError: doi = None return doi
def clean_doi(input_doi): input_doi = remove_nonprinting_characters(input_doi) try: input_doi = input_doi.lower() if input_doi.startswith("http"): match = re.match("^https*://(dx\.)*doi.org/(10\..+)", input_doi) doi = match.group(2) elif "doi.org" in input_doi: match = re.match("^(dx\.)*doi.org/(10\..+)", input_doi) doi = match.group(2) elif input_doi.startswith("doi:"): match = re.match("^doi:(10\..+)", input_doi) doi = match.group(1) elif input_doi.startswith("10."): doi = input_doi elif "10." in input_doi: match = re.match(".*(10\.\d+.+)", input_doi, re.DOTALL) doi = match.group(1) else: doi = None try: logger.debug(u"MALFORMED DOI {input_doi}".format( input_doi=input_doi)) except: logger.debug(u"MALFORMED DOI, can't print doi") except AttributeError: doi = None return doi
def clean_id(nid): try: nid = nid.strip(' "') nid = unicode_helpers.remove_nonprinting_characters(nid) except (TypeError, AttributeError): #isn't a string. That's ok, might be biblio pass return(nid)
def clean_doi(input_doi): doi = None input_doi = remove_nonprinting_characters(input_doi) if input_doi.startswith("http"): match = re.match("^https*://(dx\.)*doi.org/(10\..+)", input_doi) doi = match.group(2) elif input_doi.startswith("doi:"): match = re.match("^doi:(10\..+)", input_doi) doi = match.group(1) elif input_doi.startswith("10."): doi = input_doi return doi
def get_aliases_from_product_id_strings(product_id_strings): aliases = [] for nid in product_id_strings: nid = remove_nonprinting_characters(nid) nid = nid.strip() # also remove spaces if is_doi(nid): aliases += providers.crossref.Crossref().member_items(nid) elif is_pmid(nid): aliases += providers.pubmed.Pubmed().member_items(nid) elif is_arxiv(nid): aliases += providers.arxiv.Arxiv().member_items(nid) elif is_url(nid): aliases += providers.webpage.Webpage().member_items(nid) return aliases
def normalize_alias(alias): (ns, nid) = alias if ns == "biblio": return (ns, nid) nid = remove_nonprinting_characters(nid) nid = nid.strip() # also remove spaces if is_doi(nid): nid = providers.crossref.clean_doi(nid) elif is_pmid(nid): nid = providers.pubmed.clean_pmid(nid) elif is_arxiv(nid): nid = providers.arxiv.clean_arxiv_id(nid) elif is_url(nid): nid = providers.webpage.clean_url(nid) return (ns, nid)
def provider_memberitems_get(provider_name, query): """ Gets aliases associated with a query from a given provider. """ query = unicode_helpers.remove_nonprinting_characters(query) provider = ProviderFactory.get_provider(provider_name) try: items_dict = provider.member_items(query) except ProviderItemNotFoundError: abort_custom(404, "item not found") except (ProviderTimeout, ProviderServerError): abort_custom(503, "crossref lookup error, might be transient") except ProviderError: abort(500, "internal error from provider") resp = make_response( json.dumps({"memberitems": items_dict}, sort_keys=True, indent=4), 200) return resp
def provider_memberitems_get(provider_name, query): """ Gets aliases associated with a query from a given provider. """ query = unicode_helpers.remove_nonprinting_characters(query) provider = ProviderFactory.get_provider(provider_name) try: items_dict = provider.member_items(query) except ProviderItemNotFoundError: abort_custom(404, "item not found") except (ProviderTimeout, ProviderServerError): abort_custom(503, "crossref lookup error, might be transient") except ProviderError: abort(500, "internal error from provider") resp = make_response( json.dumps({"memberitems": items_dict}, sort_keys=True, indent=4), 200 ) return resp
def test_remove_nonprinting_characters_unicode_input(self): unicode_input = u'0000-0001-8907-4150\u200e' # a nonprinting character at the end response = unicode_helpers.remove_nonprinting_characters(unicode_input) expected = u"0000-0001-8907-4150" assert_equals(response, expected)
def test_remove_nonprinting_characters(self): unicode_input = u"hi" response = unicode_helpers.remove_nonprinting_characters(unicode_input) expected = u"hi" assert_equals(response, expected)
def clean_pmid(pmid): pmid = remove_nonprinting_characters(pmid) pmid = pmid.lower().replace("pmid:", "") return pmid
def clean_url(input_url): url = remove_nonprinting_characters(input_url) return url
def clean_url(input_url): url = unicode_helpers.remove_nonprinting_characters(input_url) return url
def clean_arxiv_id(arxiv_id): arxiv_id = remove_nonprinting_characters(arxiv_id) arxiv_id = arxiv_id.lower().replace("arxiv:", "").replace("http://arxiv.org/abs/", "") return arxiv_id