def dataverse(action: str, doi: str = None, fileid: str = None): api = NativeApi(BASE_URL, API_TOKEN) PID = 'doi:10.5072/FK2/3OZLV6' if doi: PID = doi if action == 'showfiles': files = api.get_datafiles(PID, ':latest').json() if not fileid: return files if action == 'getfile': if not fileid: df = pd.DataFrame(files['data']) filesindex = {} for i in df.index: filesindex[df.iloc[i].label] = df.iloc[i].dataFile pdfiles = pd.DataFrame(filesindex) FILEID = pdfiles.loc['id'][0] return pdfiles else: FILEID = fileid fileURL = "%s/api/access/datafile/%s" % (BASE_URL, FILEID) df = pd.read_csv(fileURL) data = {} datapoints = json.loads(df.to_json(orient='records')) data['data'] = datapoints return data
def test_token_right_create_dataset_rights(self): BASE_URL = os.getenv("BASE_URL") api_su = NativeApi(BASE_URL, os.getenv("API_TOKEN_SUPERUSER")) api_nru = NativeApi(BASE_URL, os.getenv("API_TOKEN_TEST_NO_RIGHTS")) resp = api_su.get_info_version() assert resp.json()["data"]["version"] == "4.15.1" assert resp.json()["data"]["build"] == "1377-701b56b" resp = api_nru.get_info_version() assert resp.json()["data"]["version"] == "4.15.1" assert resp.json()["data"]["build"] == "1377-701b56b" ds = Dataset() ds.from_json( read_file( os.path.join( BASE_DIR, "tests/data/dataset_upload_min_default.json"))) resp = api_su.create_dataset(":root", ds.json()) pid = resp.json()["data"]["persistentId"] assert resp.json()["status"] == "OK" with pytest.raises(ApiAuthorizationError): resp = api_nru.get_dataset(pid) resp = api_su.delete_dataset(pid) assert resp.json()["status"] == "OK"
def test_token_empty_string(self): BASE_URL = os.getenv("BASE_URL") api = NativeApi(BASE_URL, "") resp = api.get_info_version() assert resp.json()["data"]["version"] == "4.18.1" assert resp.json()["data"]["build"] == "267-a91d370" with pytest.raises(ApiAuthorizationError): ds = Dataset() ds.from_json( read_file( os.path.join( BASE_DIR, "tests/data/dataset_upload_min_default.json"))) api.create_dataset(":root", ds.json())
def test_token_no_rights(self): BASE_URL = os.getenv("BASE_URL") API_TOKEN = os.getenv("API_TOKEN_NO_RIGHTS") api = NativeApi(BASE_URL, API_TOKEN) resp = api.get_info_version() assert resp.json()["data"]["version"] == "4.15.1" assert resp.json()["data"]["build"] == "1377-701b56b" with pytest.raises(ApiAuthorizationError): ds = Dataset() ds.from_json( read_file( os.path.join( BASE_DIR, "tests/data/dataset_upload_min_default.json"))) api.create_dataset(":root", ds.json())
def native_api(monkeypatch): """Fixture, so set up an Api connection. Returns ------- Api Api object. """ monkeypatch.setenv("BASE_URL", "https://demo.dataverse.org") return NativeApi(os.getenv("BASE_URL"))
def collect_data( user_handle: str, parent: str, data_types: List[str], filename: str, create_json: bool, ) -> None: """Collect data of a Dataverse installation. Collect data from a data node down the Dataverse tree-like data structure. Collects the complete data of a Dataverse instance in a tree structure (`tree.json`), containing all Dataverses, Datasets and Datafiles. The file is stored in your instance directory (e. g. `utils/data/instances/dataverse_production`). """ if user_handle == "public": api = NativeApi(config.BASE_URL) else: users = read_json(config.USER_FILENAME) api = NativeApi(config.BASE_URL, users[user_handle]["api-token"]) tree = api.get_children(parent, children_types=data_types) if not os.path.isdir(os.path.join(ROOT_DIR, "data")): os.makedirs(os.path.join(ROOT_DIR, "data")) if not os.path.isdir(os.path.join(ROOT_DIR, "data", "utils")): os.makedirs(os.path.join(ROOT_DIR, "data", "utils")) if not os.path.isdir( os.path.join(ROOT_DIR, "data", "utils", user_handle)): os.makedirs( os.path.join(ROOT_DIR, "data", "utils", user_handle)) write_json(os.path.join(UTILS_DATA_DIR, user_handle, filename), tree) if create_json: generate_data(tree, user_handle, filename)
def datasync(self): native_api = NativeApi(BASE_URL, API_TOKEN) self.ds_id = str( int(self.make_dataset_id(self.REPO).hexdigest(), 16))[:6] ## turn the md5 string into a 6 digits integer metadata = self.make_dataset_metadata(self.REPO) print(metadata) self.ds = Dataset() self.ds.set(metadata) self.ds.displayName = metadata['title'] self.ds.json = metadata print(self.ds.get()) if self.DEBUG: print("[datasync]") print(self.ds) print(self.ds_id) print(self.ds.displayName) self.create_dataset(native_api, self.ds, DV_ALIAS, self.ds_id, BASE_URL) if self.DEBUG: print(metadata) self.upload_files_to_dataverse(self.ds_id, self.urls_found) return True
def remove_testdata( user_handle: str, parent: str, data_types: List[str] = ["dataverses", "datasets"], force: bool = False, parent_data_type: str = "dataverse", remove_parent: bool = False, ) -> None: """Remove testdata. Removes all data created by `create-testdata`. It recursively collects all Dataverses and Datasets from a passed Dataverse down (by default = `science`). If `PRODUCTION` is `true`, this function will not execute, as long as you not add `--force` to the function call. This is to protect from unwanted changes on a production instance. """ if config.PRODUCTION and not force: print( "Delete testdata on a PRODUCTION instance not allowed. Use --force to force it." ) sys.exit() user = read_json(config.USER_FILENAME)[user_handle] api = NativeApi(config.BASE_URL, user["api-token"]) # Clean up data = api.get_children(parent, children_types=data_types) dataverses, datasets, = dataverse_tree_walker(data) if parent_data_type == "dataverse" and remove_parent: dataverses.append({"dataverse_alias": parent}) for ds in datasets: api.destroy_dataset(ds["pid"]) for dv in dataverses: api.delete_dataverse(dv["dataverse_alias"])
def __init__(self, host, api_token=None): self._host = host self.api_token = api_token self.api = Api(host, api_token=api_token) self.native_api = NativeApi(host, api_token=api_token) self.data_access_api = DataAccessApi(host, api_token=api_token)
class DataverseClient(object): def __init__(self, host, api_token=None): self._host = host self.api_token = api_token self.api = Api(host, api_token=api_token) self.native_api = NativeApi(host, api_token=api_token) self.data_access_api = DataAccessApi(host, api_token=api_token) def get_ddi(self, doi, format=dv_static.EXPORTER_FORMAT_DDI): """ Get DDI metadata file """ response = self.native_api.get_dataset_export(doi, format) return DDI(response.content) def get_user_info(self, user_api_token=None): """ Placeholder until pyDataverse API is updated """ api_token = user_api_token if user_api_token else self.api_token # remove any trailing "/" ye_host = RegisteredDataverse.format_dv_url(self._host) #while ye_host.endswith('/'): # ye_host = ye_host[:-1] # format url dv_url = f'{ye_host}/api/v1/users/:me' # make the request headers = {'X-Dataverse-key': api_token} try: response = requests.get(dv_url, headers=headers) except ConnectionError as err_obj: return err_resp(f'Failed to connect. {err_obj}') if response.status_code == 200: if not response.content: # In this instance the response content is an empty string or None -- shouldn't happen... # return err_resp(f"Dataverse returned an HTTP 200 status code but failed to return a response.") resp_json = response.json() dv_status = resp_json.get(dv_static.DV_KEY_STATUS) if not dv_status: return err_resp(f"Dataverse response failed to return a 'status'.") if dv_status == dv_static.STATUS_VAL_ERROR: user_msg = resp_json.get(dv_static.DV_KEY_MESSAGE, '(No message from Dataverse)') return err_resp(f"Dataverse error: {user_msg}") return ok_resp(response.json()) try: json_resp = response.json() if 'message' in json_resp: return err_resp(json_resp['message']) except ValueError: pass return err_resp(f'Status code: {response.status_code} {response.text}') def get_schema_org(self, doi): """ Get schema.org data """ return self.get_dataset_export_json(doi, dv_static.EXPORTER_FORMAT_SCHEMA_ORG) def get_dataset_export_json(self, doi, format_type): """ Get dataset export """ try: response = self.native_api.get_dataset_export(doi, format_type) except ConnectionError as err_obj: return err_resp(f'Failed to connect. {err_obj}') return response
def create_testdata(config_file: str, force: bool) -> None: """Create testdata defined in a config file. Creates a pre-defined set of testdata on your instance. By default, the function uses the AUSSDA test data repository, which is so far not publicly available. If `PRODUCTION` is `true`, this function will not execute, as long as you not add `--force` to the function call. This is to protect from unwanted changes on a production instance. """ # Init if config.PRODUCTION and not force: print( "Create testdata on a PRODUCTION instance not allowed. Use --force to force it." ) sys.exit() pid_idx = [] users = read_json(config.USER_FILENAME) workflow = read_json(os.path.join(ROOT_DIR, config_file)) # Dataverses for dv_conf in workflow["dataverses"]: dv_alias = None if "create" in dv_conf: api = NativeApi( config.BASE_URL, users[dv_conf["create"]["user-handle"]]["api-token"]) dv = Dataverse() dv_filename = os.path.join(ROOT_DIR, dv_conf["create"]["metadata-filename"]) dv.from_json(read_file(dv_filename)) if "update" in dv_conf["create"]: for key, val in dv_conf["create"]["update"].items(): kwargs = {key: val} dv.set(kwargs) dv_alias = dv.get()["alias"] resp = api.create_dataverse(dv_conf["create"]["parent"], dv.json()) if "publish" in dv_conf: api = NativeApi( config.BASE_URL, users[dv_conf["publish"]["user-handle"]]["api-token"]) if not dv_alias and "alias" in dv_conf["publish"]: dv_alias = dv_conf["publish"]["alias"] resp = api.publish_dataverse(dv_alias) # Datasets for ds_conf in workflow["datasets"]: pid = None if "create" in ds_conf: api = NativeApi( config.BASE_URL, users[ds_conf["create"]["user-handle"]]["api-token"]) ds = Dataset() ds_filename = os.path.join(ROOT_DIR, ds_conf["create"]["metadata-filename"]) ds.from_json(read_file(ds_filename)) if "update" in ds_conf["create"]: for key, val in ds_conf["create"]["update"].items(): kwargs = {key: val} ds.set(kwargs) resp = api.create_dataset(dv_alias, ds.json()) pid = resp.json()["data"]["persistentId"] pid_idx.append(pid) if "publish" in ds_conf: if not pid: print("ERROR: PID missing!") sys.exit() api = NativeApi( config.BASE_URL, users[ds_conf["publish"]["user-handle"]]["api-token"]) resp = api.publish_dataset(pid, release_type="major") # Datafiles for dataset_id, ds_datafiles in workflow["datafiles"].items(): if int(dataset_id) == workflow["datasets"][int(dataset_id)]["id"]: pid = pid_idx[int(dataset_id)] else: print("ERROR: Dataset ID not matching.") sys.exit() for df_conf in ds_datafiles: if "upload" in df_conf: api = NativeApi( config.BASE_URL, users[df_conf["upload"]["user-handle"]]["api-token"], ) metadata = read_json(df_conf["upload"]["metadata-filename"]) df = Datafile() df.set(metadata) if "update" in df_conf["upload"]: for key, val in df_conf["upload"]["update"].items(): kwargs = {key: val} df.set(kwargs) df.set({"pid": pid}) filename = df_conf["upload"]["filename"] resp = api.upload_datafile(pid, filename, df.json()) if filename[-4:] == ".sav" or filename[-4:] == ".dta": sleep(30) else: sleep(3) if "publish-dataset" in df_conf: api = NativeApi( config.BASE_URL, users[df_conf["publish-dataset"]["user-handle"]]["api-token"], ) if df_conf["publish-dataset"]: resp = api.publish_dataset(pid, release_type="major")
def native_api(config): """Initialize pyDataverse Native Api object.""" yield NativeApi(config.BASE_URL)
def test_api_connect_base_url_wrong(self): """Test native_api connection with wrong `base_url`.""" # None with pytest.raises(ApiUrlError): NativeApi(None)
print("uploading ", os.path.basename(os.path.normpath(folder)) + os.sep + file) # make POST request to target URL r = requests.post(url_persistent_id, data=payload, files=files) # store upload status status = r.json()["status"] return status # %% run every 5 seconds until upload is successful (circumventing denied URL requests) # create a NativeApi instance api = NativeApi(dataverse_server, api_key) # declare a flag to track upload status flag = None while flag != "OK": try: flag = upload_files_to_dataverse(args.InputFolder, file_list, file_description) except Exception as e: print(e) print("... starting again") time.sleep(5) # make a new release