Ejemplo n.º 1
0
def tnrs_lookup(name, TIMEOUT=10, CACHE=True):
    """
    Look up "name" on the TNRS web service. If a most likely standard name can be identified, 
    returns that name. Returns False if no or ambiguous result.
    """

    name = name.replace("'", "").lower()
    if name in cache and CACHE:
        return cache[name]

    url = "http://tnrs.iplantc.org/tnrsm-svc/matchNames?retrieve=best&names=%s"

    # lookup canonical plant names on TNRS web service
    try:
        response = urllib2.urlopen(url % name.replace(" ", "%20"), timeout=TIMEOUT).read()

        # response_dict = eval(response, {}, {'true':True, 'false':False, 'null':None})
        response_dict = json.loads(response)
        sci_name = response_dict["items"][0]["nameScientific"]

        if sci_name:
            result = sci_name
        else:
            result = None

    except Exception as e:
        print e
        result = False

    # cache results and return
    cache[name] = result
    if CACHE:
        caching.save_cache(cache, "tnrs")
    return result
Ejemplo n.º 2
0
def itis_lookup(name, TIMEOUT=10, CACHE=True):
    '''
    Look up "name" on itis.gov. If a standard name can be identified, returns
    that name. Returns False if no or ambiguous result.

    If a name matches multiple species that are all members of the same genus,
    itis_lookup will return "Genus sp1/sp2/sp3..."
    '''

    name = name.replace("'", '').lower()
    if name in cache and CACHE:
        return cache[name]

    url = 'http://www.itis.gov/servlet/SingleRpt/SingleRpt'
    values = {'search_topic': 'all', 
              'search_kingdom':'every', 
              'search_span':'containing', 
              'search_value': name.decode(), 
              'categories':'All', 
              'source':'html', 
              'search_credRating': 'All'}
    data = urllib.urlencode(values)
    req = urllib2.Request(url, data)
    response = urllib2.urlopen(req, timeout=TIMEOUT)
    html = response.read()

    # parse results to pull out unique species
    results = [s.tail for s in p(html)('td.body a')]
    results = sum([re.findall('Species: [A-Z][a-z ]*', result) for result in results], [])
    results = [s.split(':')[1].strip() for s in results]
    
    if results:
        genus = set()
        all_species = []
        result = None
        for this_species in results:
            genus.add(this_species.split()[0])
            if len(genus) > 1: 
                result = False
                break
            all_species.append(' '.join(this_species.split()[1:]))
        if not result is False:
            result = list(genus)[0] + ' ' + '/'.join(sorted(list(set(all_species))))
        cache[name] = result
    else:
        cache[name] = False

    if CACHE: caching.save_cache(cache, 'itis')

    return cache[name]