예제 #1
0
def dataverse(action: str, doi: str = None, fileid: str = None):
    api = NativeApi(BASE_URL, API_TOKEN)
    PID = 'doi:10.5072/FK2/3OZLV6'
    if doi:
        PID = doi
    if action == 'showfiles':
        files = api.get_datafiles(PID, ':latest').json()
        if not fileid:
            return files
    if action == 'getfile':
        if not fileid:
            df = pd.DataFrame(files['data'])
            filesindex = {}
            for i in df.index:
                filesindex[df.iloc[i].label] = df.iloc[i].dataFile
            pdfiles = pd.DataFrame(filesindex)
            FILEID = pdfiles.loc['id'][0]
            return pdfiles
        else:
            FILEID = fileid
            fileURL = "%s/api/access/datafile/%s" % (BASE_URL, FILEID)
            df = pd.read_csv(fileURL)
            data = {}
            datapoints = json.loads(df.to_json(orient='records'))
            data['data'] = datapoints
            return data
예제 #2
0
        def test_token_right_create_dataset_rights(self):
            BASE_URL = os.getenv("BASE_URL")
            api_su = NativeApi(BASE_URL, os.getenv("API_TOKEN_SUPERUSER"))
            api_nru = NativeApi(BASE_URL,
                                os.getenv("API_TOKEN_TEST_NO_RIGHTS"))

            resp = api_su.get_info_version()
            assert resp.json()["data"]["version"] == "4.15.1"
            assert resp.json()["data"]["build"] == "1377-701b56b"
            resp = api_nru.get_info_version()
            assert resp.json()["data"]["version"] == "4.15.1"
            assert resp.json()["data"]["build"] == "1377-701b56b"

            ds = Dataset()
            ds.from_json(
                read_file(
                    os.path.join(
                        BASE_DIR,
                        "tests/data/dataset_upload_min_default.json")))
            resp = api_su.create_dataset(":root", ds.json())
            pid = resp.json()["data"]["persistentId"]
            assert resp.json()["status"] == "OK"

            with pytest.raises(ApiAuthorizationError):
                resp = api_nru.get_dataset(pid)

            resp = api_su.delete_dataset(pid)
            assert resp.json()["status"] == "OK"
예제 #3
0
        def test_token_empty_string(self):
            BASE_URL = os.getenv("BASE_URL")
            api = NativeApi(BASE_URL, "")
            resp = api.get_info_version()
            assert resp.json()["data"]["version"] == "4.18.1"
            assert resp.json()["data"]["build"] == "267-a91d370"

            with pytest.raises(ApiAuthorizationError):
                ds = Dataset()
                ds.from_json(
                    read_file(
                        os.path.join(
                            BASE_DIR,
                            "tests/data/dataset_upload_min_default.json")))
                api.create_dataset(":root", ds.json())
예제 #4
0
        def test_token_no_rights(self):
            BASE_URL = os.getenv("BASE_URL")
            API_TOKEN = os.getenv("API_TOKEN_NO_RIGHTS")
            api = NativeApi(BASE_URL, API_TOKEN)
            resp = api.get_info_version()
            assert resp.json()["data"]["version"] == "4.15.1"
            assert resp.json()["data"]["build"] == "1377-701b56b"

            with pytest.raises(ApiAuthorizationError):
                ds = Dataset()
                ds.from_json(
                    read_file(
                        os.path.join(
                            BASE_DIR,
                            "tests/data/dataset_upload_min_default.json")))
                api.create_dataset(":root", ds.json())
예제 #5
0
def native_api(monkeypatch):
    """Fixture, so set up an Api connection.

    Returns
    -------
    Api
        Api object.

    """
    monkeypatch.setenv("BASE_URL", "https://demo.dataverse.org")
    return NativeApi(os.getenv("BASE_URL"))
예제 #6
0
def collect_data(
    user_handle: str,
    parent: str,
    data_types: List[str],
    filename: str,
    create_json: bool,
) -> None:
    """Collect data of a Dataverse installation.

    Collect data from a data node down the Dataverse
    tree-like data structure.

    Collects the complete data of a Dataverse instance in
    a tree structure (`tree.json`), containing all
    Dataverses, Datasets and Datafiles. The file is
    stored in your instance directory (e. g.
    `utils/data/instances/dataverse_production`).

    """

    if user_handle == "public":
        api = NativeApi(config.BASE_URL)
    else:
        users = read_json(config.USER_FILENAME)
        api = NativeApi(config.BASE_URL, users[user_handle]["api-token"])
    tree = api.get_children(parent, children_types=data_types)
    if not os.path.isdir(os.path.join(ROOT_DIR, "data")):
        os.makedirs(os.path.join(ROOT_DIR, "data"))
        if not os.path.isdir(os.path.join(ROOT_DIR, "data", "utils")):
            os.makedirs(os.path.join(ROOT_DIR, "data", "utils"))
            if not os.path.isdir(
                    os.path.join(ROOT_DIR, "data", "utils", user_handle)):
                os.makedirs(
                    os.path.join(ROOT_DIR, "data", "utils", user_handle))
    write_json(os.path.join(UTILS_DATA_DIR, user_handle, filename), tree)
    if create_json:
        generate_data(tree, user_handle, filename)
 def datasync(self):
     native_api = NativeApi(BASE_URL, API_TOKEN)
     self.ds_id = str(
         int(self.make_dataset_id(self.REPO).hexdigest(),
             16))[:6]  ## turn the md5 string into a 6 digits integer
     metadata = self.make_dataset_metadata(self.REPO)
     print(metadata)
     self.ds = Dataset()
     self.ds.set(metadata)
     self.ds.displayName = metadata['title']
     self.ds.json = metadata
     print(self.ds.get())
     if self.DEBUG:
         print("[datasync]")
         print(self.ds)
         print(self.ds_id)
         print(self.ds.displayName)
     self.create_dataset(native_api, self.ds, DV_ALIAS, self.ds_id,
                         BASE_URL)
     if self.DEBUG:
         print(metadata)
     self.upload_files_to_dataverse(self.ds_id, self.urls_found)
     return True
예제 #8
0
def remove_testdata(
    user_handle: str,
    parent: str,
    data_types: List[str] = ["dataverses", "datasets"],
    force: bool = False,
    parent_data_type: str = "dataverse",
    remove_parent: bool = False,
) -> None:
    """Remove testdata.

    Removes all data created by `create-testdata`.
    It recursively collects all Dataverses and Datasets
    from a passed Dataverse down (by default =
    `science`). If `PRODUCTION` is `true`, this function
    will not execute, as long as you not add `--force`
    to the function call. This is to protect from
    unwanted changes on a production instance.

    """
    if config.PRODUCTION and not force:
        print(
            "Delete testdata on a PRODUCTION instance not allowed. Use --force to force it."
        )
        sys.exit()

    user = read_json(config.USER_FILENAME)[user_handle]
    api = NativeApi(config.BASE_URL, user["api-token"])

    # Clean up
    data = api.get_children(parent, children_types=data_types)
    dataverses, datasets, = dataverse_tree_walker(data)
    if parent_data_type == "dataverse" and remove_parent:
        dataverses.append({"dataverse_alias": parent})
    for ds in datasets:
        api.destroy_dataset(ds["pid"])
    for dv in dataverses:
        api.delete_dataverse(dv["dataverse_alias"])
예제 #9
0
 def __init__(self, host, api_token=None):
     self._host = host
     self.api_token = api_token
     self.api = Api(host, api_token=api_token)
     self.native_api = NativeApi(host, api_token=api_token)
     self.data_access_api = DataAccessApi(host, api_token=api_token)
예제 #10
0
class DataverseClient(object):

    def __init__(self, host, api_token=None):
        self._host = host
        self.api_token = api_token
        self.api = Api(host, api_token=api_token)
        self.native_api = NativeApi(host, api_token=api_token)
        self.data_access_api = DataAccessApi(host, api_token=api_token)

    def get_ddi(self, doi, format=dv_static.EXPORTER_FORMAT_DDI):
        """
        Get DDI metadata file
        """
        response = self.native_api.get_dataset_export(doi, format)
        return DDI(response.content)

    def get_user_info(self, user_api_token=None):
        """
        Placeholder until pyDataverse API is updated
        """
        api_token = user_api_token if user_api_token else self.api_token
        # remove any trailing "/"
        ye_host = RegisteredDataverse.format_dv_url(self._host)
        #while ye_host.endswith('/'):
        #    ye_host = ye_host[:-1]

        # format url
        dv_url = f'{ye_host}/api/v1/users/:me'

        # make the request
        headers = {'X-Dataverse-key': api_token}
        try:
            response = requests.get(dv_url, headers=headers)
        except ConnectionError as err_obj:
            return err_resp(f'Failed to connect. {err_obj}')

        if response.status_code == 200:
            if not response.content:
                # In this instance the response content is an empty string or None -- shouldn't happen...
                #
                return err_resp(f"Dataverse returned an HTTP 200 status code but failed to return a response.")

            resp_json = response.json()
            dv_status = resp_json.get(dv_static.DV_KEY_STATUS)
            if not dv_status:
                return err_resp(f"Dataverse response failed to return a 'status'.")

            if dv_status == dv_static.STATUS_VAL_ERROR:
                user_msg = resp_json.get(dv_static.DV_KEY_MESSAGE,
                                         '(No message from Dataverse)')
                return err_resp(f"Dataverse error: {user_msg}")

            return ok_resp(response.json())

        try:
            json_resp = response.json()
            if 'message' in json_resp:
                return err_resp(json_resp['message'])
        except ValueError:
            pass
        return err_resp(f'Status code: {response.status_code} {response.text}')

    def get_schema_org(self, doi):
        """
        Get schema.org data
        """
        return self.get_dataset_export_json(doi, dv_static.EXPORTER_FORMAT_SCHEMA_ORG)

    def get_dataset_export_json(self, doi, format_type):
        """
        Get dataset export
        """
        try:
            response = self.native_api.get_dataset_export(doi, format_type)
        except ConnectionError as err_obj:
            return err_resp(f'Failed to connect. {err_obj}')
        return response
예제 #11
0
def create_testdata(config_file: str, force: bool) -> None:
    """Create testdata defined in a config file.

    Creates a pre-defined set of testdata on your
    instance. By default, the function uses the
    AUSSDA test data repository, which is so far not
    publicly available. If `PRODUCTION` is `true`,
    this function will not execute, as long as you
    not add `--force` to the function call. This is
    to protect from unwanted changes on a production
    instance.

    """
    # Init
    if config.PRODUCTION and not force:
        print(
            "Create testdata on a PRODUCTION instance not allowed. Use --force to force it."
        )
        sys.exit()
    pid_idx = []
    users = read_json(config.USER_FILENAME)
    workflow = read_json(os.path.join(ROOT_DIR, config_file))

    # Dataverses
    for dv_conf in workflow["dataverses"]:
        dv_alias = None
        if "create" in dv_conf:
            api = NativeApi(
                config.BASE_URL,
                users[dv_conf["create"]["user-handle"]]["api-token"])
            dv = Dataverse()
            dv_filename = os.path.join(ROOT_DIR,
                                       dv_conf["create"]["metadata-filename"])
            dv.from_json(read_file(dv_filename))
            if "update" in dv_conf["create"]:
                for key, val in dv_conf["create"]["update"].items():
                    kwargs = {key: val}
                    dv.set(kwargs)
            dv_alias = dv.get()["alias"]
            resp = api.create_dataverse(dv_conf["create"]["parent"], dv.json())

        if "publish" in dv_conf:
            api = NativeApi(
                config.BASE_URL,
                users[dv_conf["publish"]["user-handle"]]["api-token"])
            if not dv_alias and "alias" in dv_conf["publish"]:
                dv_alias = dv_conf["publish"]["alias"]
            resp = api.publish_dataverse(dv_alias)

    # Datasets
    for ds_conf in workflow["datasets"]:
        pid = None
        if "create" in ds_conf:
            api = NativeApi(
                config.BASE_URL,
                users[ds_conf["create"]["user-handle"]]["api-token"])
            ds = Dataset()
            ds_filename = os.path.join(ROOT_DIR,
                                       ds_conf["create"]["metadata-filename"])
            ds.from_json(read_file(ds_filename))
            if "update" in ds_conf["create"]:
                for key, val in ds_conf["create"]["update"].items():
                    kwargs = {key: val}
                    ds.set(kwargs)
            resp = api.create_dataset(dv_alias, ds.json())
            pid = resp.json()["data"]["persistentId"]
            pid_idx.append(pid)

        if "publish" in ds_conf:
            if not pid:
                print("ERROR: PID missing!")
                sys.exit()
            api = NativeApi(
                config.BASE_URL,
                users[ds_conf["publish"]["user-handle"]]["api-token"])
            resp = api.publish_dataset(pid, release_type="major")

    # Datafiles
    for dataset_id, ds_datafiles in workflow["datafiles"].items():
        if int(dataset_id) == workflow["datasets"][int(dataset_id)]["id"]:
            pid = pid_idx[int(dataset_id)]
        else:
            print("ERROR: Dataset ID not matching.")
            sys.exit()
        for df_conf in ds_datafiles:
            if "upload" in df_conf:
                api = NativeApi(
                    config.BASE_URL,
                    users[df_conf["upload"]["user-handle"]]["api-token"],
                )
                metadata = read_json(df_conf["upload"]["metadata-filename"])
                df = Datafile()
                df.set(metadata)
                if "update" in df_conf["upload"]:
                    for key, val in df_conf["upload"]["update"].items():
                        kwargs = {key: val}
                        df.set(kwargs)
                df.set({"pid": pid})
                filename = df_conf["upload"]["filename"]
                resp = api.upload_datafile(pid, filename, df.json())
                if filename[-4:] == ".sav" or filename[-4:] == ".dta":
                    sleep(30)
                else:
                    sleep(3)
        if "publish-dataset" in df_conf:
            api = NativeApi(
                config.BASE_URL,
                users[df_conf["publish-dataset"]["user-handle"]]["api-token"],
            )
            if df_conf["publish-dataset"]:
                resp = api.publish_dataset(pid, release_type="major")
예제 #12
0
def native_api(config):
    """Initialize pyDataverse Native Api object."""
    yield NativeApi(config.BASE_URL)
예제 #13
0
 def test_api_connect_base_url_wrong(self):
     """Test native_api connection with wrong `base_url`."""
     # None
     with pytest.raises(ApiUrlError):
         NativeApi(None)
예제 #14
0
        print("uploading ",
              os.path.basename(os.path.normpath(folder)) + os.sep + file)

        # make POST request to target URL
        r = requests.post(url_persistent_id, data=payload, files=files)

        # store upload status
        status = r.json()["status"]

        return status


# %% run every 5 seconds until upload is successful (circumventing denied URL requests)

# create a NativeApi instance
api = NativeApi(dataverse_server, api_key)

# declare a flag to track upload status
flag = None

while flag != "OK":

    try:
        flag = upload_files_to_dataverse(args.InputFolder, file_list,
                                         file_description)
    except Exception as e:
        print(e)
        print("... starting again")
        time.sleep(5)

# make a new release