Exemple #1
0
def test_datasets_limit():
    "registry.datasets - limit param"
    res = registry.datasets(limit=1)
    assert dict == res.__class__
    assert 1 == len(res['results'])

    res = registry.datasets(limit=3)
    assert dict == res.__class__
    assert 3 == len(res['results'])
def test_datasets_limit():
    "registry.datasets - limit param"
    res = registry.datasets(limit=1)
    assert dict == res.__class__
    assert 1 == len(res['data'])

    res = registry.datasets(limit=3)
    assert dict == res.__class__
    assert 3 == len(res['data'])
Exemple #3
0
def test_datasets_type():
    "registry.datasets - type param"
    res = registry.datasets(type="OCCURRENCE")
    vv = [x['type'] for x in res['results']]
    assert dict == res.__class__
    assert 100 == len(res['results'])
    assert 'OCCURRENCE' == list(set(vv))[0]
Exemple #4
0
def test_datasets_type():
    "registry.datasets - type param"
    res = registry.datasets(type="OCCURRENCE")
    vv = [x["type"] for x in res["results"]]
    assert dict == res.__class__
    assert 100 == len(res["results"])
    assert "OCCURRENCE" == list(set(vv))[0]
def test_datasets_type():
    "registry.datasets - type param"
    res = registry.datasets(type="OCCURRENCE")
    vv = [ x['type'] for x in res['data'] ]
    assert dict == res.__class__
    assert 100 == len(res['data'])
    assert 'OCCURRENCE' == list(set(vv))[0]
procs = 6
datasetsDir = './datasets/'
indexDir = './index/'

results = occ.search(dwca_extension="http://rs.tdwg.org/dwc/terms/ResourceRelationship", limit=0, facet="datasetKey", facetLimit=1000)

ix = get_index(indexDir)

for r in progressBar(results['facets'][0]['counts'], prefix="Progress", suffix="Complete"):
    datasetKey = r['name']
    dwca_file = f'{datasetsDir}{datasetKey}.zip'

    if not os.path.isfile(dwca_file) :
        try:
            pass
            dataset = registry.datasets(uuid=datasetKey)
            dwca_endpoints = [e for e in dataset['endpoints'] if e['type'] == 'DWC_ARCHIVE']
            if len(dwca_endpoints) > 0 :
                url = dwca_endpoints[0]['url']
                req = requests.get(url, stream=True)

                with open(dwca_file, 'wb') as fd:
                    for chunk in req.iter_content(chunk_size=512) :
                        fd.write(chunk)
        except Exception as e:
            print(e)
            continue

    with DwCAReader(dwca_file) as dwca:
        try:
            for row in dwca:
Exemple #7
0
def test_datasets():
    "registry.datasets - basic test"
    res = registry.datasets()
    assert dict == res.__class__
def get_gbif_metadata(datasetId):
    try:
        return registry.datasets(uuid=datasetId, data='all')
    except Exception as e:
        print(f'Dataset not found {datasetId}')
        return None
def test_datasets():
    "registry.datasets - basic test"
    res = registry.datasets()
    assert dict == res.__class__