def find_species(url, species_data): content = retrieve_document(url, 'content') for link in content.iterlinks(): element, attribute, tgt = link[:-1] # looking for links containing the word *species* in their target url match = re.search("".join(("\/", species_suffix, "\/")), tgt) if match is not None: # retrieving species name sp_name = element.text #print "<<<", len(species_data), ">>>" if species_data.has_key(sp_name): species = species_data[sp_name] print "\tSpecies '%s' already in database..." % sp_name else: spf = SpeciesFinder(sp_name) species = spf.find_species() if not species is None and species.is_plant(): species.print_species_data() species_data[sp_name] = species
import sys sys.exit() spf = SpeciesFinder() output = list() for line in lines[:]: species, habitat_types = line.strip().split("\t") habitat_types = habitat_types.split("/") print species, habitat_types spf.set_search_name(species) spf.find_species() spf.get_species_data() url = spf.species.urls['eunis.eea.europa.eu'] sp_id = int(url.split("/")[-1]) basic_info = "\t".join((str(sp_id), spf.species.name, url, spf.species.author)) taxonomic_info = list() for t in spf.species.TAXA: if spf.species.taxonomic_information.has_key(t): taxonomic_info.append(spf.species.taxonomic_information[t]) else: taxonomic_info.append("") output.append("\t".join((basic_info, "\t".join(taxonomic_info)))) print "\n".join(output)
#print species.species_id, species.name, habitat_types import sys sys.exit() spf = SpeciesFinder() output = list() for line in lines[:]: species, habitat_types = line.strip().split("\t") habitat_types = habitat_types.split("/") print species, habitat_types spf.set_search_name(species) spf.find_species() spf.get_species_data() url = spf.species.urls['eunis.eea.europa.eu'] sp_id = int(url.split("/")[-1]) basic_info = "\t".join( (str(sp_id), spf.species.name, url, spf.species.author)) taxonomic_info = list() for t in spf.species.TAXA: if spf.species.taxonomic_information.has_key(t): taxonomic_info.append(spf.species.taxonomic_information[t]) else: taxonomic_info.append("") output.append("\t".join((basic_info, "\t".join(taxonomic_info)))) print "\n".join(output)
# adding found information to habitat object hab.set_description(description) hab.set_administrative_area(nuts_code) hab.set_region(region) hab.set_size(size) hab.set_url(hab_url) hab.set_geometry(src_ft) # retrieving available habitat types and species for link in contents.iterlinks(): if link[0].find_class('linkint'): key = link[0].text_content().encode('utf-8') if not len(key) == 4: spf = SpeciesFinder(key) sp = spf.find_species(True) if sp is not None and sp.is_plant(): hab.species.append(sp) else: hab.habitat_types.append(habitat_types[key]) # adding found habitat types to habitat object habitats[sitecode] = hab pickle.dump(habitats, open(hab_area_pkl_tgt, 'wb')) hab.print_habitat_data() print "===============================================================" # saving found habitat data in pickle file print "Dumping habitat area data to %s..." % hab_area_pkl_tgt, pickle.dump(habitats, open(hab_area_pkl_tgt, 'wb'))