Ejemplo n.º 1
0
def collect_all_species(fname="../data/tfidf/data_gbif.json"):
    ''' collect all species '''

    last_read = 1
    offset = 0
    ts = time.time()
    dct = {}
    nb_species = 0
    data = []
    while last_read > 0:
        tab_rep = species.name_suggest(offset=offset)
        offset += 1
        # if time.time()-ts>60:
        #    break
        last_read = len(tab_rep)
        for x in tab_rep:
            nb_species += 1
            data.append(x)
            for k in x.keys():
                if k not in dct:
                    dct[k] = 1
                else:
                    dct[k] += 1

    pp = pprint.PrettyPrinter(2)
    print('nb species', nb_species)
    print('Dict:', len(dct))
    pp.pprint(dct)

    with open(fname, "w", encoding='utf-8') as jsonfile:
        json.dump(data, jsonfile, ensure_ascii=False)
    print(time.time() - ts)
Ejemplo n.º 2
0
def test_species():
    data = species.name_suggest(q='Puma concolor')
    for x in data:
        print(len(x))

    data2 = species.name_suggest()
    for x in data2:
        print(len(x))

    nb_species = 0
    for i in range(0, 10):
        tab_rep = species.name_suggest(q='vespa', offset=i)
        print(len(tab_rep))
        for doc in tab_rep:
            if 'species' in doc:
                nb_species += 1
                print(doc['species'])

    print(nb_species)
Ejemplo n.º 3
0
def gbif_name_suggest(**kwargs):
    # Wrapper for pygbif name_suggest function
    response = species.name_suggest(**kwargs)
    if len(response) == 0:
        return None
    accepted_data = None
    synonym_data = None
    other_data = None
    for result in response:
        rank = result.get('rank', '')
        if rank.lower() in RANK_KEYS:
            rank_key = rank.lower() + 'Key'
        else:
            rank_key = 'key'
        key_found = ('nubKey' in result or rank_key in result)
        if key_found and 'status' in result:
            if result['status'] == 'ACCEPTED':
                if accepted_data:
                    if result['key'] < accepted_data['key']:
                        accepted_data = result
                else:
                    accepted_data = result
            if result['status'] == 'SYNONYM':
                if synonym_data:
                    if result['key'] < synonym_data['key']:
                        synonym_data = result
                else:
                    synonym_data = result
            else:
                if other_data:
                    if result['key'] < other_data['key']:
                        other_data = result
                else:
                    other_data = result
    if accepted_data:
        return accepted_data
    if synonym_data:
        return synonym_data
    return other_data
Ejemplo n.º 4
0
def taxon_info(taxon):
    """
    use pygbif.species to get GBIF taxonKey info
    """
    info = species.name_suggest(taxon)
    return info
Ejemplo n.º 5
0
def search_species(entry):
    # To be used when calling the method

    name_input = entry

    # User input for testing purposes
    # name_input = input("Enter a common species name: ")

    results = {}

    # Uses the pygbif method to find results
    suggest = species.name_suggest(q=name_input, rank='SPECIES', limit=25)

    # Pulls the results from the dataset in json format
    suggest_data = suggest['data']['results']

    # print(suggest_data)

    # Formats the data for pythonic purposes
    data = suggest_data

    # Reads data for each result
    for o in data:
        # Finds the gbif key
        key = o['key']
        # Uses pygbif to find number of occurrences of species key
        occurs = occurrences.count(taxonKey=key)
        # Searches occurrence data for the species key
        occur_search = occurrences.search(taxonKey=key)
        # print('occur search: ' + str(occur_search))

        # for country in countries:
        #     print(country)
        # print(occur_search)

        # Runs if species has occurred more than zero times
        if occurs > 0:
            try:
                # Tries to retrieve scientific name
                canon_name = o['canonicalName']
            except:
                continue
            # Vernacular name init
            vern_name = ''
            # Variable for list of vernacular names
            names = o['vernacularNames']
            # Summary init
            summary = ''
            try:
                # If match found
                print('Scientific name: ' + canon_name)
                print('Vernacular names: ')
                match_found = False
                # Reads from results in matched name
                for name in names:
                    # Variable for vernacular name
                    vern_name = name['vernacularName']
                    # Used if all languages want to be included
                    # print(name['vernacularName'])
                    language = (name['language'])
                    # Can be changed if user wants to select a specific language
                    if language == 'eng':
                        # Checks if vernacular name is matched with search input
                        if name_input in vern_name and match_found is False:
                            match_found = True
                            print(vern_name)
                # If no exact match is found it reads the first vernacular name
                if match_found is False:
                    name_store = []
                    for name in names:
                        vern_name = name['vernacularName']
                        language = (name['language'])
                        if vern_name not in name_store:
                            name_store.append(vern_name)
                            if language == 'eng':
                                print(vern_name)
                # Adds scientific and vernacular name to results dictionary
                results.setdefault(canon_name, []).append(vern_name)

                # print(wikipedia.search(canon_name))
                try:
                    # pulls the wiki page based on canonical name in url (usually works)
                    # desc = wikipedia.page(canon_name, auto_suggest=True)
                    # alternative wiki page including all sections
                    # page = wikipedia.WikipediaPage(canon_name)
                    # pulls the summary page for the species
                    summary = wikipedia.summary(canon_name, sentences=2)
                    # experimental for section pages
                    # sections = page.sections
                    # for section in sections:
                    #     print(section)
                    # github push error, delete this
                    print(summary)
                    results.setdefault(canon_name, []).append(summary)
                    # print(desc.content)
                except:
                    print("No description found")

                print('GBIF Key: ' + str(key))
                print('GBIF Species Page: ' + 'http://www.gbif.org/species/' + str(key))
                print('Count: ' + str(occurs))

                # This reads results for occurences and finds countries the species
                # was observed in and how many occurrences there
                occur_data = occur_search['results']
                countries = {}
                for occur in occur_data:
                    try:
                        country = occur['country']
                        if country not in countries:
                            countries[country] = 1
                        else:
                            countries[country] += 1
                            # print(occur['country'])
                    except:
                        continue
                # for country in countries:
                #     print(country)
                sorted_countries = sorted(countries.items(), key=lambda x: x[1], reverse=True)
                if countries != {}:
                    print('Top 3 Countries Observed: ')
                    for country in sorted_countries[:3]:
                        print(str(country))
                print('\n')

                # return canon_name, vern_name, summary
            except:
                continue

    print(results)
    return results
Ejemplo n.º 6
0
def test_name_suggest():
    "species.name_suggest - basic test"
    res = species.name_suggest(q="Puma concolor")
    assert list == res.__class__
    assert True == all(
        [bool(re.search("Puma concolor", z["canonicalName"])) for z in res])
Ejemplo n.º 7
0
def test_name_suggest_paging():
    "species.name_suggest - paging"
    res = species.name_suggest(q="Aso", limit=3)
    assert list == res.__class__
    assert 3 == len(res)
Ejemplo n.º 8
0
def search_taxa(q):
    res = pd.DataFrame.from_dict(species.name_suggest(q))
    return res