def tnrs_lookup(name, TIMEOUT=10, CACHE=True): """ Look up "name" on the TNRS web service. If a most likely standard name can be identified, returns that name. Returns False if no or ambiguous result. """ name = name.replace("'", "").lower() if name in cache and CACHE: return cache[name] url = "http://tnrs.iplantc.org/tnrsm-svc/matchNames?retrieve=best&names=%s" # lookup canonical plant names on TNRS web service try: response = urllib2.urlopen(url % name.replace(" ", "%20"), timeout=TIMEOUT).read() # response_dict = eval(response, {}, {'true':True, 'false':False, 'null':None}) response_dict = json.loads(response) sci_name = response_dict["items"][0]["nameScientific"] if sci_name: result = sci_name else: result = None except Exception as e: print e result = False # cache results and return cache[name] = result if CACHE: caching.save_cache(cache, "tnrs") return result
def itis_lookup(name, TIMEOUT=10, CACHE=True): ''' Look up "name" on itis.gov. If a standard name can be identified, returns that name. Returns False if no or ambiguous result. If a name matches multiple species that are all members of the same genus, itis_lookup will return "Genus sp1/sp2/sp3..." ''' name = name.replace("'", '').lower() if name in cache and CACHE: return cache[name] url = 'http://www.itis.gov/servlet/SingleRpt/SingleRpt' values = {'search_topic': 'all', 'search_kingdom':'every', 'search_span':'containing', 'search_value': name.decode(), 'categories':'All', 'source':'html', 'search_credRating': 'All'} data = urllib.urlencode(values) req = urllib2.Request(url, data) response = urllib2.urlopen(req, timeout=TIMEOUT) html = response.read() # parse results to pull out unique species results = [s.tail for s in p(html)('td.body a')] results = sum([re.findall('Species: [A-Z][a-z ]*', result) for result in results], []) results = [s.split(':')[1].strip() for s in results] if results: genus = set() all_species = [] result = None for this_species in results: genus.add(this_species.split()[0]) if len(genus) > 1: result = False break all_species.append(' '.join(this_species.split()[1:])) if not result is False: result = list(genus)[0] + ' ' + '/'.join(sorted(list(set(all_species)))) cache[name] = result else: cache[name] = False if CACHE: caching.save_cache(cache, 'itis') return cache[name]