Example #1
0
    def test_api_connect_base_url_wrong(self):
        """Test api connection with wrong `base_url`."""
        # wrong string
        with pytest.raises(ApiResponseError):
            base_url = 'http://wikipedia.org'
            api = Api(base_url)
            sleep(SLEEP_TIME)

            assert not api.api_token
            assert api.api_version == 'v1'
            assert api.base_url == 'http://wikipedia.org'
            assert api.native_api_base_url == 'http://wikipedia.org/api/v1'
            assert api.status == 'ERROR'

        # None
        with pytest.raises(ApiUrlError):
            base_url = None
            api = Api(base_url)
            sleep(SLEEP_TIME)

            assert not api.api_token
            assert api.api_version == 'v1'
            assert not api.base_url
            assert not api.native_api_base_url
            assert api.status == 'ERROR'
Example #2
0
def home_view(request, *args, **kwargs):
    if request.method == "POST":

        base_url = 'https://demo.dataverse.org'
        api_token = "bcd0d879-592d-425f-9547-a112eb367c17"
        api = Api(base_url, api_token=api_token)
        name = request.POST.get("dataverse")
        identifier = request.POST.get("identifier")
        dataverseType = request.POST.get("dataverseType")

        email = request.POST.get("email")
        metaData = '''{
          "name": "''' + name + '''",
          "alias": "''' + identifier + '''",
          "dataverseContacts":  [
            {
              "contactEmail": "''' + email + '''"
            }
          ]
        }'''
        response = api.create_dataverse(identifier="khaled",
                                        metadata=metaData,
                                        auth=True)
        print(response.status_code)
    return render(request, "home.html", {})
Example #3
0
def api_connection():
    """Fixture, so set up an Api connection.

    Returns
    -------
    Api
        Api object.

    """
    api_token = os.environ['API_TOKEN']
    base_url = os.environ['BASE_URL']
    return Api(base_url, api_token)
Example #4
0
class DataverseClient(object):

    def __init__(self, host, api_token=None):
        self._host = host
        self.api = Api(host, api_token=api_token)

    def get_ddi(self, doi, format='ddi'):
        """
        Get DDI metadata file
        """
        response = self.api.get_dataset_export(doi, format)
        return DDI(response.content)
Example #5
0
    def test_api_connect(self):
        """Test successfull connection without api_token."""
        api = Api(os.environ['BASE_URL'])
        sleep(SLEEP_TIME)

        assert isinstance(api, Api)
        assert not api.api_token
        assert api.api_version == 'v1'
        assert isinstance(api.conn_started, datetime)
        assert api.base_url == os.environ['BASE_URL']
        assert api.native_api_base_url == '{0}/api/{1}'.format(
            os.environ['BASE_URL'], api.api_version)
        assert api.status == 'OK'
def test_connection_to_api():
    """Test setup of Dataverse API connection."""
    base_url = 'https://' + os.environ.get('HOST')
    api_token = os.environ.get('API_AUTH_TOKEN')
    dataverse_api_version = os.environ.get('DATAVERSE_API_VERSION')
    dataverse_version = os.environ.get('DATAVERSE_VERSION')
    api = Api(base_url, api_token)
    assert isinstance(api, Api)
    assert api.api_token == api_token
    assert api.api_version is not None
    assert isinstance(api.conn_started, datetime)
    assert api.base_url == 'https://' + os.environ.get('HOST')
    assert api.api_version == dataverse_api_version
    assert api.native_api_base_url == api.base_url + '/api/v1'
    assert api.dataverse_version == dataverse_version
Example #7
0
from pyDataverse.api import Api
import json
import dvconfig
from urllib.parse import urlparse,parse_qs
base_url = dvconfig.base_url
api_token = dvconfig.api_token
api = Api(base_url, api_token)
print('API status: ' +api.status)

def main():
    #dsurl = 'https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.5072/FK2/U6AEZM'
    dsurl = 'https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.5072/FK2/PPPORT'
    print(dsurl)
    o = urlparse(dsurl)
    hostname = o.netloc
    doi = parse_qs(o.query)['persistentId'][0]
    curl_native_json = "curl '" + 'https://' + hostname + '/api/datasets/export?exporter=dataverse_json&persistentId=' + doi + "' | jq ."
    print(curl_native_json)

def find_children(dataverse_database_id):
    pass

if __name__ == '__main__':
    main()
Example #8
0
 def connect(self):
     dataverse_api = Api(self.base_url, self.api_token)
     return dataverse_api
Example #9
0
from pyDataverse.api import Api
import json
import dvconfig
base_url = dvconfig.base_url
api_token = dvconfig.api_token
api = Api(base_url, api_token)
username = '******'
password = '******'
endpoint = '/builtin-users/' + username + '/api-token'
params = {}
params['password'] = password
resp = api.get_request(endpoint, params=params, auth=True)
api_token = resp.json()['data']['message']
print(api_token)
Example #10
0
## DAY 1

from pyDataverse.api import Api
from pyDataverse.models import Dataverse
import pandas as pd
import numpy as np
import requests
from functools import reduce
import matplotlib.pyplot as plt
import math

## Acquring data from APIs
# establish connection
base_url = 'https://dataverse.harvard.edu/'
api = Api(base_url)
print(api.status)

# get the digital object identifier for the Harvard Dataverse dataset
DOI = "doi:10.7910/DVN/HIDLTK"

# retrieve the contents of the dataset
covid = api.get_dataset(DOI)

covid_files_list = covid.json()['data']['latestVersion']['files']

for fileObject in covid_files_list:
    print("File name is {}; id is {}".format(
        fileObject["dataFile"]["filename"], fileObject["dataFile"]["id"]))

# get data file
US_states_cases_file = api.get_datafile("4201597")
Example #11
0
 def __init__(self, host, api_token=None):
     self._host = host
     self.api = Api(host, api_token=api_token)
from pyDataverse.api import Api
import json
import dvconfig
base_url = dvconfig.base_url
api_token = dvconfig.api_token
api = Api(base_url, api_token)
print(api.status)
dataset_json = 'data/dataverses/open-source-at-harvard/datasets/open-source-at-harvard/open-source-at-harvard.json'
with open(dataset_json) as f:
    metadata = json.load(f)
dataverse = 'open-source-at-harvard'
resp = api.create_dataset(dataverse, json.dumps(metadata))
print(resp)
dataset_pid = resp.json()['data']['persistentId']
tabular_file = 'data/dataverses/open-source-at-harvard/datasets/open-source-at-harvard/files/2019-02-25.tsv'
resp = api.upload_file(dataset_pid, tabular_file)
print(resp)
from pyDataverse.api import Api
import json
import dvconfig
import os
import time
import requests
from io import StringIO
base_url = dvconfig.base_url
api_token = dvconfig.api_token
paths = dvconfig.sample_data
api = Api(base_url, api_token)
print(api.status)


# TODO limit amount of recursion
def check_dataset_lock(dataset_dbid):
    query_str = '/datasets/' + str(dataset_dbid) + '/locks'
    params = {}
    resp = api.get_request(query_str, params=params, auth=True)
    locks = resp.json()['data']
    if (locks):
        print('Lock found for dataset id ' + str(dataset_dbid) +
              '... sleeping...')
        time.sleep(2)
        check_dataset_lock(dataset_dbid)


resp = api.get_dataverse(':root')
buff = StringIO("")
if (resp.status_code == 401):
    print('Publishing root dataverse.')
Example #14
0
import connexion
import os
import lxml.etree as ET
import urllib.request
import zipfile
import logging

from dct_server import config

from pyDataverse.api import Api
from datetime import datetime

api = Api(config.DATAVERSE_BASE_URL)


def convert_ddi(ddi_file,
                dv_target,
                api_token,
                xsl_url,
                author_name=None,
                author_affiliation=None,
                contact_name=None,
                contact_email=None,
                subject=None):  # noqa: E501
    """Convert DDI and ingest it to target dataverse

     # noqa: E501

    :param ddi_file: 
    :type ddi_file: strstr
    :param dv_target: The target of dataverse alias or id (e.g. root)
Example #15
0
from pyDataverse.api import Api
import json
import dvconfig

base_url = dvconfig.base_url
api_token = dvconfig.api_token
api = Api(base_url, api_token)
print(api.status)
dv_json = 'data/dataverses/open-source-at-harvard/open-source-at-harvard.json'
with open(dv_json) as f:
    metadata = json.load(f)
print(metadata)
# FIXME: Why is "identifier" required?
identifier = metadata['alias']
parentdv = ':root'
resp = api.create_dataverse(identifier, json.dumps(metadata), parent=parentdv)
print(resp)
def get_fsp_data_through_api(base_url, identifier):
    '''
    Takes base URL and identifier of the FSP data,
    and returns the Pandas dataframe of the file

    Input
        base_url (str): URL of the website
        identifier (str): identifier of the desired data file

    Output
        df (Pandas dataframe): dataframe of the FSP data
    '''

    dtype_col = {
        'FormName': 'str',
        'County': 'str',
        'GPSLatitude': 'float32',
        'GPSLongitude': 'float32'
    }
    geo_columns = list(dtype_col.keys())

    api = Api(base_url)
    resp_dataset = api.get_dataset(identifier)

    files = json.loads(resp_dataset.text)['data']['latestVersion']['files']
    df = pd.DataFrame({col: [] for col in geo_columns})

    for file in files:
        file_id = file['dataFile']['id']
        resp_datafile = api.get_datafile(file_id)
        file_extension = file['dataFile']['filename'].split('.')[-1]
        if file_extension == 'tab':
            rows = resp_datafile.text.split('\n')
            headers = rows[0].split('\t')
            data_rows = \
            [row.replace('"', '').split('\t')
             for row in rows[1:] if row != ''
             and row.split('\t')[headers.index('GPSLatitude')] != '']
            df_file = \
            pd.DataFrame(data_rows,
                         columns=headers)[geo_columns].astype(dtype_col)
        elif file_extension == 'xlsx':
            workbook = xlrd.open_workbook(file_contents=resp_datafile.content)
            worksheet = workbook.sheet_by_index(0)
            col_names = [
                col_name.replace(" ", "")
                for col_name in worksheet.row_values(0)
            ]
            df_file = pd.DataFrame({col: [] for col in geo_columns})
            for col in geo_columns:
                data_col = worksheet.col_values(col_names.index(col),
                                                start_rowx=1)
                for idx_data, data in enumerate(data_col):
                    if type(data) == str:
                        data_col[idx_data] = data.replace('"', '')
                    if data in ['', '--']:
                        data_col[idx_data] = 'nan'
                df_file[col] = pd.Series(data_col, dtype=dtype_col[col])

        df = df.append(df_file[df_file['County'] != 'nan'], ignore_index=True)

    df['geometry'] = \
    df.apply(lambda x: Point(float(x['GPSLongitude']),
                             float(x['GPSLatitude'])), axis=1)

    return df
Example #17
0
 def __init__(self, host, api_token=None):
     self._host = host
     self.api_token = api_token
     self.api = Api(host, api_token=api_token)
     self.native_api = NativeApi(host, api_token=api_token)
     self.data_access_api = DataAccessApi(host, api_token=api_token)
Example #18
0
if __name__ == '__main__':
    DOWNLOAD_DATA = False
    UPLOAD_DATA = False

    # Download data
    if DOWNLOAD_DATA:
        # Setup working directory
        if not os.path.isdir(ROOT_DIR + '/data/down'):
            os.mkdir(ROOT_DIR + '/data/down')
        down_dir = ROOT_DIR + '/data/down'

        # Connect to Dataverse Api
        api_token_down = os.environ["API_TOKEN_DOWN"]
        api_host_down = os.environ["API_HOST_DOWN"]
        api_down = Api(api_host_down, api_token=api_token_down)

        # Extract dois and dataverse of the datastes.
        datasets_list = read_datasets_csv(ROOT_DIR + '/data/datasets.csv')
        dv_list = [ds['dataverse'] for ds in datasets_list]
        dv_list = set(dv_list)

        # Create directories for all dataverses and download the metadata
        for dv in dv_list:
            down_dataverse_dir = down_dir + '/dv_{0}'.format(dv)
            if not os.path.isdir(down_dataverse_dir):
                os.mkdir(down_dataverse_dir)
            resp_dv = api_down.get_dataverse(dv)
            write_file(down_dataverse_dir + '/dv_' + dv + '_metadata.json',
                       json.dumps(resp_dv['data']))