def test_datasets_limit(): "registry.datasets - limit param" res = registry.datasets(limit=1) assert dict == res.__class__ assert 1 == len(res['results']) res = registry.datasets(limit=3) assert dict == res.__class__ assert 3 == len(res['results'])
def test_datasets_limit(): "registry.datasets - limit param" res = registry.datasets(limit=1) assert dict == res.__class__ assert 1 == len(res['data']) res = registry.datasets(limit=3) assert dict == res.__class__ assert 3 == len(res['data'])
def test_datasets_type(): "registry.datasets - type param" res = registry.datasets(type="OCCURRENCE") vv = [x['type'] for x in res['results']] assert dict == res.__class__ assert 100 == len(res['results']) assert 'OCCURRENCE' == list(set(vv))[0]
def test_datasets_type(): "registry.datasets - type param" res = registry.datasets(type="OCCURRENCE") vv = [x["type"] for x in res["results"]] assert dict == res.__class__ assert 100 == len(res["results"]) assert "OCCURRENCE" == list(set(vv))[0]
def test_datasets_type(): "registry.datasets - type param" res = registry.datasets(type="OCCURRENCE") vv = [ x['type'] for x in res['data'] ] assert dict == res.__class__ assert 100 == len(res['data']) assert 'OCCURRENCE' == list(set(vv))[0]
procs = 6 datasetsDir = './datasets/' indexDir = './index/' results = occ.search(dwca_extension="http://rs.tdwg.org/dwc/terms/ResourceRelationship", limit=0, facet="datasetKey", facetLimit=1000) ix = get_index(indexDir) for r in progressBar(results['facets'][0]['counts'], prefix="Progress", suffix="Complete"): datasetKey = r['name'] dwca_file = f'{datasetsDir}{datasetKey}.zip' if not os.path.isfile(dwca_file) : try: pass dataset = registry.datasets(uuid=datasetKey) dwca_endpoints = [e for e in dataset['endpoints'] if e['type'] == 'DWC_ARCHIVE'] if len(dwca_endpoints) > 0 : url = dwca_endpoints[0]['url'] req = requests.get(url, stream=True) with open(dwca_file, 'wb') as fd: for chunk in req.iter_content(chunk_size=512) : fd.write(chunk) except Exception as e: print(e) continue with DwCAReader(dwca_file) as dwca: try: for row in dwca:
def test_datasets(): "registry.datasets - basic test" res = registry.datasets() assert dict == res.__class__
def get_gbif_metadata(datasetId): try: return registry.datasets(uuid=datasetId, data='all') except Exception as e: print(f'Dataset not found {datasetId}') return None