def find_species(url, species_data):
    content = retrieve_document(url, 'content')
    
    for link in content.iterlinks():
        element, attribute, tgt = link[:-1]
        # looking for links containing the word *species* in their target url
        match = re.search("".join(("\/", species_suffix, "\/")), tgt)
        if match is not None:
            # retrieving species name
            sp_name = element.text
            #print "<<<", len(species_data), ">>>"
            if species_data.has_key(sp_name):
                species = species_data[sp_name]
                print "\tSpecies '%s' already in database..." % sp_name
            else:
                spf = SpeciesFinder(sp_name)
                species = spf.find_species()
                if not species is None and species.is_plant():
                    species.print_species_data()
                    species_data[sp_name] = species
def find_species(url, species_data):
    content = retrieve_document(url, 'content')

    for link in content.iterlinks():
        element, attribute, tgt = link[:-1]
        # looking for links containing the word *species* in their target url
        match = re.search("".join(("\/", species_suffix, "\/")), tgt)
        if match is not None:
            # retrieving species name
            sp_name = element.text
            #print "<<<", len(species_data), ">>>"
            if species_data.has_key(sp_name):
                species = species_data[sp_name]
                print "\tSpecies '%s' already in database..." % sp_name
            else:
                spf = SpeciesFinder(sp_name)
                species = spf.find_species()
                if not species is None and species.is_plant():
                    species.print_species_data()
                    species_data[sp_name] = species
Esempio n. 3
0

    import sys
    sys.exit()

    
    
    spf = SpeciesFinder()
    output = list()
    
    for line in lines[:]:
        species, habitat_types = line.strip().split("\t")
        habitat_types = habitat_types.split("/")
        print species, habitat_types
        
        spf.set_search_name(species)
        spf.find_species()
        spf.get_species_data()
        
        url = spf.species.urls['eunis.eea.europa.eu']
        sp_id = int(url.split("/")[-1])
        basic_info = "\t".join((str(sp_id), spf.species.name, url, spf.species.author))
        taxonomic_info = list()
        for t in spf.species.TAXA:
            if spf.species.taxonomic_information.has_key(t):
                taxonomic_info.append(spf.species.taxonomic_information[t])
            else:
                taxonomic_info.append("")
        output.append("\t".join((basic_info, "\t".join(taxonomic_info))))

    print "\n".join(output)
Esempio n. 4
0
    #print species.species_id, species.name, habitat_types

    import sys
    sys.exit()

    spf = SpeciesFinder()
    output = list()

    for line in lines[:]:
        species, habitat_types = line.strip().split("\t")
        habitat_types = habitat_types.split("/")
        print species, habitat_types

        spf.set_search_name(species)
        spf.find_species()
        spf.get_species_data()

        url = spf.species.urls['eunis.eea.europa.eu']
        sp_id = int(url.split("/")[-1])
        basic_info = "\t".join(
            (str(sp_id), spf.species.name, url, spf.species.author))
        taxonomic_info = list()
        for t in spf.species.TAXA:
            if spf.species.taxonomic_information.has_key(t):
                taxonomic_info.append(spf.species.taxonomic_information[t])
            else:
                taxonomic_info.append("")
        output.append("\t".join((basic_info, "\t".join(taxonomic_info))))

    print "\n".join(output)
Esempio n. 5
0
            # adding found information to habitat object
            hab.set_description(description)
            hab.set_administrative_area(nuts_code)
            hab.set_region(region)
            hab.set_size(size)
            hab.set_url(hab_url)
            
            hab.set_geometry(src_ft)
            
            # retrieving available habitat types and species
            for link in contents.iterlinks():
                if link[0].find_class('linkint'):
                    key = link[0].text_content().encode('utf-8')
                    if not len(key) == 4:
                        spf = SpeciesFinder(key)
                        sp = spf.find_species(True)
                        if sp is not None and sp.is_plant():
                            hab.species.append(sp)
                    else:
                        hab.habitat_types.append(habitat_types[key])

            # adding found habitat types to habitat object
            habitats[sitecode] = hab
            pickle.dump(habitats, open(hab_area_pkl_tgt, 'wb'))

        hab.print_habitat_data()
        print "==============================================================="

    # saving found habitat data in pickle file
    print "Dumping habitat area data to %s..." % hab_area_pkl_tgt,
    pickle.dump(habitats, open(hab_area_pkl_tgt, 'wb'))