def find_species(url, species_data):
    content = retrieve_document(url, 'content')
    
    for link in content.iterlinks():
        element, attribute, tgt = link[:-1]
        # looking for links containing the word *species* in their target url
        match = re.search("".join(("\/", species_suffix, "\/")), tgt)
        if match is not None:
            # retrieving species name
            sp_name = element.text
            #print "<<<", len(species_data), ">>>"
            if species_data.has_key(sp_name):
                species = species_data[sp_name]
                print "\tSpecies '%s' already in database..." % sp_name
            else:
                spf = SpeciesFinder(sp_name)
                species = spf.find_species()
                if not species is None and species.is_plant():
                    species.print_species_data()
                    species_data[sp_name] = species
def find_species(url, species_data):
    content = retrieve_document(url, 'content')

    for link in content.iterlinks():
        element, attribute, tgt = link[:-1]
        # looking for links containing the word *species* in their target url
        match = re.search("".join(("\/", species_suffix, "\/")), tgt)
        if match is not None:
            # retrieving species name
            sp_name = element.text
            #print "<<<", len(species_data), ">>>"
            if species_data.has_key(sp_name):
                species = species_data[sp_name]
                print "\tSpecies '%s' already in database..." % sp_name
            else:
                spf = SpeciesFinder(sp_name)
                species = spf.find_species()
                if not species is None and species.is_plant():
                    species.print_species_data()
                    species_data[sp_name] = species
Exemple #3
0
 new_df_tokens.append('plot')
 new_df_tokens.append('x')
 new_df_tokens.append('y')
 
 for token in df_tokens:
     if translate_dict.has_key(token):
         new_df_tokens.append(translate_dict[token])
     else:
         match = re.search(df_sp_regex, token)
         if not match:
             print "Couldn't match species name... %s" % token
         else:
             if uh_to_df_species.has_key(token):
                 new_df_token = uh_to_df_species[token]
             else:
                 spf = SpeciesFinder()
                 spf.set_search_name(token)
                 sr = spf.search_on_eunis()
                 (sp_name, sp_type, sp_url), ratio = spf.find_best_match(sr)
                 found = sp_name
                 if token == found.replace(" ", ""):
                     new_df_token = found.replace(" ", "_")
                 else:
                     print "Couldn't find new species name for '%s'... Please re-check!" % token
                     new_df_token = token
         new_df_tokens.append(new_df_token)
 
 uh = dict()
 uh[2009] = list()
 uh[2011] = list()
 
            output.append("\t".join((str(i), ht, str(species.species_id))))
            i += 1
    
    print "\n".join(output)
        
        #print species.species_id, species.name, habitat_types
    
    


    import sys
    sys.exit()

    
    
    spf = SpeciesFinder()
    output = list()
    
    for line in lines[:]:
        species, habitat_types = line.strip().split("\t")
        habitat_types = habitat_types.split("/")
        print species, habitat_types
        
        spf.set_search_name(species)
        spf.find_species()
        spf.get_species_data()
        
        url = spf.species.urls['eunis.eea.europa.eu']
        sp_id = int(url.split("/")[-1])
        basic_info = "\t".join((str(sp_id), spf.species.name, url, spf.species.author))
        taxonomic_info = list()
        habitat_types = habitat_types.split("/")
        #print species, habitat_types
        species = DbSpecies().find(species_name)

        for ht in habitat_types:
            output.append("\t".join((str(i), ht, str(species.species_id))))
            i += 1

    print "\n".join(output)

    #print species.species_id, species.name, habitat_types

    import sys
    sys.exit()

    spf = SpeciesFinder()
    output = list()

    for line in lines[:]:
        species, habitat_types = line.strip().split("\t")
        habitat_types = habitat_types.split("/")
        print species, habitat_types

        spf.set_search_name(species)
        spf.find_species()
        spf.get_species_data()

        url = spf.species.urls['eunis.eea.europa.eu']
        sp_id = int(url.split("/")[-1])
        basic_info = "\t".join(
            (str(sp_id), spf.species.name, url, spf.species.author))
Exemple #6
0
            # adding found information to habitat object
            hab.set_description(description)
            hab.set_administrative_area(nuts_code)
            hab.set_region(region)
            hab.set_size(size)
            hab.set_url(hab_url)
            
            hab.set_geometry(src_ft)
            
            # retrieving available habitat types and species
            for link in contents.iterlinks():
                if link[0].find_class('linkint'):
                    key = link[0].text_content().encode('utf-8')
                    if not len(key) == 4:
                        spf = SpeciesFinder(key)
                        sp = spf.find_species(True)
                        if sp is not None and sp.is_plant():
                            hab.species.append(sp)
                    else:
                        hab.habitat_types.append(habitat_types[key])

            # adding found habitat types to habitat object
            habitats[sitecode] = hab
            pickle.dump(habitats, open(hab_area_pkl_tgt, 'wb'))

        hab.print_habitat_data()
        print "==============================================================="

    # saving found habitat data in pickle file
    print "Dumping habitat area data to %s..." % hab_area_pkl_tgt,