Beispiel #1
0
def clean_doi(input_doi):
    input_doi = remove_nonprinting_characters(input_doi)
    try:
        input_doi = input_doi.lower()
        if input_doi.startswith("http"):
            match = re.match("^https*://(dx\.)*doi.org/(10\..+)", input_doi)
            doi = match.group(2)
        elif "doi.org" in input_doi:
            match = re.match("^(dx\.)*doi.org/(10\..+)", input_doi)
            doi = match.group(2)
        elif input_doi.startswith("doi:"):
            match = re.match("^doi:(10\..+)", input_doi)
            doi = match.group(1)
        elif input_doi.startswith("10."):
            doi = input_doi
        elif "10." in input_doi:
            match = re.match(".*(10\.\d+.+)", input_doi, re.DOTALL)
            doi = match.group(1)
        else:
            doi = None
            try:
                logger.debug(
                    u"MALFORMED DOI {input_doi}".format(input_doi=input_doi))
            except:
                logger.debug(u"MALFORMED DOI, can't print doi")

    except AttributeError:
        doi = None

    return doi
def clean_doi(input_doi):
    input_doi = remove_nonprinting_characters(input_doi)
    try:
        input_doi = input_doi.lower()
        if input_doi.startswith("http"):
            match = re.match("^https*://(dx\.)*doi.org/(10\..+)", input_doi)
            doi = match.group(2)
        elif "doi.org" in input_doi:
            match = re.match("^(dx\.)*doi.org/(10\..+)", input_doi)
            doi = match.group(2)
        elif input_doi.startswith("doi:"):
            match = re.match("^doi:(10\..+)", input_doi)
            doi = match.group(1)
        elif input_doi.startswith("10."):
            doi = input_doi
        elif "10." in input_doi:
            match = re.match(".*(10\.\d+.+)", input_doi, re.DOTALL)
            doi = match.group(1)
        else:
            doi = None
            try:
                logger.debug(u"MALFORMED DOI {input_doi}".format(
                    input_doi=input_doi))
            except:
                logger.debug(u"MALFORMED DOI, can't print doi")


    except AttributeError:
        doi = None

    return doi
Beispiel #3
0
def clean_id(nid):
    try:
        nid = nid.strip(' "')
        nid = unicode_helpers.remove_nonprinting_characters(nid)
    except (TypeError, AttributeError):
        #isn't a string.  That's ok, might be biblio
        pass
    return(nid)
Beispiel #4
0
def clean_id(nid):
    try:
        nid = nid.strip(' "')
        nid = unicode_helpers.remove_nonprinting_characters(nid)
    except (TypeError, AttributeError):
        #isn't a string.  That's ok, might be biblio
        pass
    return(nid)
Beispiel #5
0
def clean_doi(input_doi):
    doi = None
    input_doi = remove_nonprinting_characters(input_doi)
    if input_doi.startswith("http"):
        match = re.match("^https*://(dx\.)*doi.org/(10\..+)", input_doi)
        doi = match.group(2)
    elif input_doi.startswith("doi:"):
        match = re.match("^doi:(10\..+)", input_doi)
        doi = match.group(1)
    elif input_doi.startswith("10."):
        doi = input_doi
    return doi
Beispiel #6
0
def clean_doi(input_doi):
    doi = None
    input_doi = remove_nonprinting_characters(input_doi)
    if input_doi.startswith("http"):
        match = re.match("^https*://(dx\.)*doi.org/(10\..+)", input_doi)
        doi = match.group(2)
    elif input_doi.startswith("doi:"):
        match = re.match("^doi:(10\..+)", input_doi)
        doi = match.group(1)
    elif input_doi.startswith("10."):
        doi = input_doi
    return doi
def get_aliases_from_product_id_strings(product_id_strings):
    aliases = []
    for nid in product_id_strings:
        nid = remove_nonprinting_characters(nid)
        nid = nid.strip()  # also remove spaces
        if is_doi(nid):
            aliases += providers.crossref.Crossref().member_items(nid)
        elif is_pmid(nid):
            aliases += providers.pubmed.Pubmed().member_items(nid)
        elif is_arxiv(nid):
            aliases += providers.arxiv.Arxiv().member_items(nid)
        elif is_url(nid):
            aliases += providers.webpage.Webpage().member_items(nid)
    return aliases
def get_aliases_from_product_id_strings(product_id_strings):
    aliases = []
    for nid in product_id_strings:
        nid = remove_nonprinting_characters(nid)
        nid = nid.strip()  # also remove spaces
        if is_doi(nid):
            aliases += providers.crossref.Crossref().member_items(nid)
        elif is_pmid(nid):
            aliases += providers.pubmed.Pubmed().member_items(nid)
        elif is_arxiv(nid):
            aliases += providers.arxiv.Arxiv().member_items(nid)
        elif is_url(nid):
            aliases += providers.webpage.Webpage().member_items(nid)
    return aliases
def normalize_alias(alias):
    (ns, nid) = alias
    if ns == "biblio":
        return (ns, nid)

    nid = remove_nonprinting_characters(nid)
    nid = nid.strip()  # also remove spaces
    if is_doi(nid):
        nid = providers.crossref.clean_doi(nid)
    elif is_pmid(nid):
        nid = providers.pubmed.clean_pmid(nid)
    elif is_arxiv(nid):
        nid = providers.arxiv.clean_arxiv_id(nid)
    elif is_url(nid):
        nid = providers.webpage.clean_url(nid)

    return (ns, nid)
def normalize_alias(alias):
    (ns, nid) = alias
    if ns == "biblio":
        return (ns, nid)

    nid = remove_nonprinting_characters(nid)
    nid = nid.strip()  # also remove spaces
    if is_doi(nid):
        nid = providers.crossref.clean_doi(nid)
    elif is_pmid(nid):
        nid = providers.pubmed.clean_pmid(nid)
    elif is_arxiv(nid):
        nid = providers.arxiv.clean_arxiv_id(nid)
    elif is_url(nid):
        nid = providers.webpage.clean_url(nid)

    return (ns, nid)
Beispiel #11
0
def provider_memberitems_get(provider_name, query):
    """
    Gets aliases associated with a query from a given provider.
    """
    query = unicode_helpers.remove_nonprinting_characters(query)
    provider = ProviderFactory.get_provider(provider_name)

    try:
        items_dict = provider.member_items(query)

    except ProviderItemNotFoundError:
        abort_custom(404, "item not found")

    except (ProviderTimeout, ProviderServerError):
        abort_custom(503, "crossref lookup error, might be transient")

    except ProviderError:
        abort(500, "internal error from provider")

    resp = make_response(
        json.dumps({"memberitems": items_dict}, sort_keys=True, indent=4), 200)
    return resp
Beispiel #12
0
def provider_memberitems_get(provider_name, query):
    """
    Gets aliases associated with a query from a given provider.
    """
    query = unicode_helpers.remove_nonprinting_characters(query)
    provider = ProviderFactory.get_provider(provider_name)

    try:
        items_dict = provider.member_items(query)

    except ProviderItemNotFoundError:
        abort_custom(404, "item not found")

    except (ProviderTimeout, ProviderServerError):
        abort_custom(503, "crossref lookup error, might be transient")

    except ProviderError:
        abort(500, "internal error from provider")

    resp = make_response(
        json.dumps({"memberitems": items_dict}, sort_keys=True, indent=4),
        200
    )
    return resp
 def test_remove_nonprinting_characters_unicode_input(self):
     unicode_input = u'0000-0001-8907-4150\u200e'  # a nonprinting character at the end
     response = unicode_helpers.remove_nonprinting_characters(unicode_input)
     expected = u"0000-0001-8907-4150"
     assert_equals(response, expected)
 def test_remove_nonprinting_characters(self):
     unicode_input = u"hi"
     response = unicode_helpers.remove_nonprinting_characters(unicode_input)
     expected = u"hi"
     assert_equals(response, expected)
Beispiel #15
0
def clean_pmid(pmid):
    pmid = remove_nonprinting_characters(pmid)
    pmid = pmid.lower().replace("pmid:", "")
    return pmid
def clean_url(input_url):
    url = remove_nonprinting_characters(input_url)
    return url
Beispiel #17
0
def clean_url(input_url):
    url = unicode_helpers.remove_nonprinting_characters(input_url)
    return url
 def test_remove_nonprinting_characters_unicode_input(self):
     unicode_input = u'0000-0001-8907-4150\u200e'  # a nonprinting character at the end
     response = unicode_helpers.remove_nonprinting_characters(unicode_input)
     expected = u"0000-0001-8907-4150"
     assert_equals(response, expected)
Beispiel #19
0
def clean_url(input_url):
    url = unicode_helpers.remove_nonprinting_characters(input_url)
    return url
Beispiel #20
0
def clean_arxiv_id(arxiv_id):
    arxiv_id = remove_nonprinting_characters(arxiv_id)
    arxiv_id = arxiv_id.lower().replace("arxiv:",
                                        "").replace("http://arxiv.org/abs/",
                                                    "")
    return arxiv_id
Beispiel #21
0
def clean_arxiv_id(arxiv_id):
    arxiv_id = remove_nonprinting_characters(arxiv_id)    
    arxiv_id = arxiv_id.lower().replace("arxiv:", "").replace("http://arxiv.org/abs/", "")
    return arxiv_id
 def test_remove_nonprinting_characters(self):
     unicode_input = u"hi"
     response = unicode_helpers.remove_nonprinting_characters(unicode_input)
     expected = u"hi"
     assert_equals(response, expected)
Beispiel #23
0
def clean_url(input_url):
    url = remove_nonprinting_characters(input_url)
    return url