예제 #1
0
def get_data_from_row(row):
    genus, sp, subsp, state, fed_status, state_status, notes, source = row 
    name = tax_resolve(genus, sp, subsp)
    spp_id = spp_ids.get(name, None)
    if fed_status:
        data_row = [fed_status, state_status, notes, '', 'http://www.mendeley.com/c/5405261894/g/2058663/listings-and-occurrences-for-each-state/', '']
    elif state_status: 
        data_row = [fed_status, state_status, '', notes, '', source]
    else:
        data_row = [fed_status, '', '', notes, '', source]
    return spp_id, state, data_row
예제 #2
0
파일: tests.py 프로젝트: brymz/dodobase
 def test_apple(self):
     for l, r in [('appleb', 'apple'), 
                  ('applb', 'apple'), 
                  ('apple', 'apple'), 
                  ('a', 'a'),
                  ('ap', 'ap'), 
                  ('appl', 'apple'), 
                  ('bionan', 'bionan'), 
                  ('banann', 'banana'), 
                  ('bannans', 'banana'),
                  ]:
         self.assertEqual(tax_resolve(l, self.syn1), r)
예제 #3
0
 def test_apple(self):
     for l, r in [('appleb', 'apple'), 
                  ('applb', 'apple'), 
                  ('apple', 'apple'), 
                  ('a', 'a'),
                  ('ap', 'ap'), 
                  ('appl', 'apple'), 
                  ('bionan', 'bionan'), 
                  ('banann', 'banana'), 
                  ('bannans', 'banana'),
                  ('bannnna', 'banana'),
                  ('orangee', 'Orange'),
                  ('Orangee', 'Orange'),
                  ]:
         new_name = tax_resolve(l, syns=self.syn1)
         new_name = new_name if new_name else l
         self.assertEqual(new_name, r)
예제 #4
0
def get_spp_id(genus, species, subspecies, com_name, taxon, spp_code_dict):
    '''Get spp_id from spp_id dictionary. Returns: 
        a spp_id string if this is a known or unknown species,
        None if only an ambiguous common_name was given'''
    try:
        result = spp_code_dict[com_name.lower()]
        return result
    except KeyError: pass
    sci_name = tax_resolve.scientific_name(genus, species, subspecies)
    try:
        # return species id for species, if we have one
        return spp_code_dict[sci_name]
    except KeyError:
        for delimiter in (' x ', ' X ', '/'):
            # hybrids and slashes
            if len(species.split(delimiter)) > 1:
                children = species.split(delimiter)
                if all([get_spp_id(child) for child in children]):
                    # create a new species id for the hybrid/slash
                    new_spp_id = tax_resolve.new_spp_id(taxon, genus, species, subspecies)
                else:
                    # we don't have a species code for all of the species in the hybrid/slash
                    return None
        else:
            new_name = tax_resolve.tax_resolve(genus, species, subspecies, com_name=com_name, known_species=spp_code_dict.keys(), taxon=taxon)
            if new_name != sci_name:
                print '==> corrected to %s' % new_name,
                sys.stdout.flush()
            if new_name:
                try:
                    return spp_code_dict[new_name]
                except KeyError:
                    corrected_sci_name = (new_name.split('(')[0].strip()).split()
                    new_spp_id = tax_resolve.new_spp_id(taxon, *corrected_sci_name)
                    if new_spp_id:
                        spp_code_dict[new_name] = new_spp_id
                        pickle.dump(spp_code_dict, open(os.path.join(DATA_DIR, '%s.spp_codes.cache' % taxon), 'w'), protocol=-1)
                        return new_spp_id
            return None
예제 #5
0
        return names
    else:
        return names + [""]


datadir = "../data/"
taxonomy_files = ["beetles_clean.csv", "ebird_tax_clean.csv", "mammals.csv", "mosquitoes.csv", "plants.csv"]

status_table = get_csv_file(datadir + "status.csv")  # using csv due to commas in comment fields
status_table = remove_spaces(status_table[:])
header = status_table[0]
del (status_table[0])
spp_ids = import_taxonomy_files(taxonomy_files, datadir)
status_table_clean = []
status_table_notadded = []
for row in status_table:
    genus, sp, subsp, state, fed_status, st_status, notes, source = row
    name = tax_resolve(genus, sp, subsp)
    spp_id = spp_ids.get(name, None)
    if spp_id:
        new_row = [spp_id] + get_genus_sp_subsp(name) + row[3:]
        status_table_clean.append(new_row)
    else:
        status_table_notadded.append(row)

status_table_notadded.insert(0, header)
export_to_csv(status_table_notadded, "../data/status_notentered.csv")
header.insert(0, "spp_id")
status_table_clean.insert(0, header)
export_to_csv(status_table_clean, "../data/status_clean.csv")
예제 #6
0
파일: tests.py 프로젝트: brymz/dodobase
 def test_mosquitos(self):
     for to_test in ['Aedes clivis', 'Aedes clivid', 'Ochlerotatus clivis', 'Ochlerotatus clivid', 'Ochlarodadus clivus']:
         self.assertEqual(tax_resolve(to_test, self.syn2), 'Aedes clivis')
예제 #7
0
 def test_mosquitos_case_sensitivty(self):
     for to_test in ['Aedes clivis', 'Aedes Clivid', 'ochlerotatus clivis', 'Ochlerotatus Clivid']:
         self.assertEqual(tax_resolve(to_test, syns=self.syn2), 'Aedes clivis')