def get_data_from_row(row): genus, sp, subsp, state, fed_status, state_status, notes, source = row name = tax_resolve(genus, sp, subsp) spp_id = spp_ids.get(name, None) if fed_status: data_row = [fed_status, state_status, notes, '', 'http://www.mendeley.com/c/5405261894/g/2058663/listings-and-occurrences-for-each-state/', ''] elif state_status: data_row = [fed_status, state_status, '', notes, '', source] else: data_row = [fed_status, '', '', notes, '', source] return spp_id, state, data_row
def test_apple(self): for l, r in [('appleb', 'apple'), ('applb', 'apple'), ('apple', 'apple'), ('a', 'a'), ('ap', 'ap'), ('appl', 'apple'), ('bionan', 'bionan'), ('banann', 'banana'), ('bannans', 'banana'), ]: self.assertEqual(tax_resolve(l, self.syn1), r)
def test_apple(self): for l, r in [('appleb', 'apple'), ('applb', 'apple'), ('apple', 'apple'), ('a', 'a'), ('ap', 'ap'), ('appl', 'apple'), ('bionan', 'bionan'), ('banann', 'banana'), ('bannans', 'banana'), ('bannnna', 'banana'), ('orangee', 'Orange'), ('Orangee', 'Orange'), ]: new_name = tax_resolve(l, syns=self.syn1) new_name = new_name if new_name else l self.assertEqual(new_name, r)
def get_spp_id(genus, species, subspecies, com_name, taxon, spp_code_dict): '''Get spp_id from spp_id dictionary. Returns: a spp_id string if this is a known or unknown species, None if only an ambiguous common_name was given''' try: result = spp_code_dict[com_name.lower()] return result except KeyError: pass sci_name = tax_resolve.scientific_name(genus, species, subspecies) try: # return species id for species, if we have one return spp_code_dict[sci_name] except KeyError: for delimiter in (' x ', ' X ', '/'): # hybrids and slashes if len(species.split(delimiter)) > 1: children = species.split(delimiter) if all([get_spp_id(child) for child in children]): # create a new species id for the hybrid/slash new_spp_id = tax_resolve.new_spp_id(taxon, genus, species, subspecies) else: # we don't have a species code for all of the species in the hybrid/slash return None else: new_name = tax_resolve.tax_resolve(genus, species, subspecies, com_name=com_name, known_species=spp_code_dict.keys(), taxon=taxon) if new_name != sci_name: print '==> corrected to %s' % new_name, sys.stdout.flush() if new_name: try: return spp_code_dict[new_name] except KeyError: corrected_sci_name = (new_name.split('(')[0].strip()).split() new_spp_id = tax_resolve.new_spp_id(taxon, *corrected_sci_name) if new_spp_id: spp_code_dict[new_name] = new_spp_id pickle.dump(spp_code_dict, open(os.path.join(DATA_DIR, '%s.spp_codes.cache' % taxon), 'w'), protocol=-1) return new_spp_id return None
return names else: return names + [""] datadir = "../data/" taxonomy_files = ["beetles_clean.csv", "ebird_tax_clean.csv", "mammals.csv", "mosquitoes.csv", "plants.csv"] status_table = get_csv_file(datadir + "status.csv") # using csv due to commas in comment fields status_table = remove_spaces(status_table[:]) header = status_table[0] del (status_table[0]) spp_ids = import_taxonomy_files(taxonomy_files, datadir) status_table_clean = [] status_table_notadded = [] for row in status_table: genus, sp, subsp, state, fed_status, st_status, notes, source = row name = tax_resolve(genus, sp, subsp) spp_id = spp_ids.get(name, None) if spp_id: new_row = [spp_id] + get_genus_sp_subsp(name) + row[3:] status_table_clean.append(new_row) else: status_table_notadded.append(row) status_table_notadded.insert(0, header) export_to_csv(status_table_notadded, "../data/status_notentered.csv") header.insert(0, "spp_id") status_table_clean.insert(0, header) export_to_csv(status_table_clean, "../data/status_clean.csv")
def test_mosquitos(self): for to_test in ['Aedes clivis', 'Aedes clivid', 'Ochlerotatus clivis', 'Ochlerotatus clivid', 'Ochlarodadus clivus']: self.assertEqual(tax_resolve(to_test, self.syn2), 'Aedes clivis')
def test_mosquitos_case_sensitivty(self): for to_test in ['Aedes clivis', 'Aedes Clivid', 'ochlerotatus clivis', 'Ochlerotatus Clivid']: self.assertEqual(tax_resolve(to_test, syns=self.syn2), 'Aedes clivis')