def find_species(url, species_data): content = retrieve_document(url, 'content') for link in content.iterlinks(): element, attribute, tgt = link[:-1] # looking for links containing the word *species* in their target url match = re.search("".join(("\/", species_suffix, "\/")), tgt) if match is not None: # retrieving species name sp_name = element.text #print "<<<", len(species_data), ">>>" if species_data.has_key(sp_name): species = species_data[sp_name] print "\tSpecies '%s' already in database..." % sp_name else: spf = SpeciesFinder(sp_name) species = spf.find_species() if not species is None and species.is_plant(): species.print_species_data() species_data[sp_name] = species
new_df_tokens.append('plot') new_df_tokens.append('x') new_df_tokens.append('y') for token in df_tokens: if translate_dict.has_key(token): new_df_tokens.append(translate_dict[token]) else: match = re.search(df_sp_regex, token) if not match: print "Couldn't match species name... %s" % token else: if uh_to_df_species.has_key(token): new_df_token = uh_to_df_species[token] else: spf = SpeciesFinder() spf.set_search_name(token) sr = spf.search_on_eunis() (sp_name, sp_type, sp_url), ratio = spf.find_best_match(sr) found = sp_name if token == found.replace(" ", ""): new_df_token = found.replace(" ", "_") else: print "Couldn't find new species name for '%s'... Please re-check!" % token new_df_token = token new_df_tokens.append(new_df_token) uh = dict() uh[2009] = list() uh[2011] = list()
output.append("\t".join((str(i), ht, str(species.species_id)))) i += 1 print "\n".join(output) #print species.species_id, species.name, habitat_types import sys sys.exit() spf = SpeciesFinder() output = list() for line in lines[:]: species, habitat_types = line.strip().split("\t") habitat_types = habitat_types.split("/") print species, habitat_types spf.set_search_name(species) spf.find_species() spf.get_species_data() url = spf.species.urls['eunis.eea.europa.eu'] sp_id = int(url.split("/")[-1]) basic_info = "\t".join((str(sp_id), spf.species.name, url, spf.species.author)) taxonomic_info = list()
habitat_types = habitat_types.split("/") #print species, habitat_types species = DbSpecies().find(species_name) for ht in habitat_types: output.append("\t".join((str(i), ht, str(species.species_id)))) i += 1 print "\n".join(output) #print species.species_id, species.name, habitat_types import sys sys.exit() spf = SpeciesFinder() output = list() for line in lines[:]: species, habitat_types = line.strip().split("\t") habitat_types = habitat_types.split("/") print species, habitat_types spf.set_search_name(species) spf.find_species() spf.get_species_data() url = spf.species.urls['eunis.eea.europa.eu'] sp_id = int(url.split("/")[-1]) basic_info = "\t".join( (str(sp_id), spf.species.name, url, spf.species.author))
# adding found information to habitat object hab.set_description(description) hab.set_administrative_area(nuts_code) hab.set_region(region) hab.set_size(size) hab.set_url(hab_url) hab.set_geometry(src_ft) # retrieving available habitat types and species for link in contents.iterlinks(): if link[0].find_class('linkint'): key = link[0].text_content().encode('utf-8') if not len(key) == 4: spf = SpeciesFinder(key) sp = spf.find_species(True) if sp is not None and sp.is_plant(): hab.species.append(sp) else: hab.habitat_types.append(habitat_types[key]) # adding found habitat types to habitat object habitats[sitecode] = hab pickle.dump(habitats, open(hab_area_pkl_tgt, 'wb')) hab.print_habitat_data() print "===============================================================" # saving found habitat data in pickle file print "Dumping habitat area data to %s..." % hab_area_pkl_tgt,