def test_api_connect_base_url_wrong(self): """Test api connection with wrong `base_url`.""" # wrong string with pytest.raises(ApiResponseError): base_url = 'http://wikipedia.org' api = Api(base_url) sleep(SLEEP_TIME) assert not api.api_token assert api.api_version == 'v1' assert api.base_url == 'http://wikipedia.org' assert api.native_api_base_url == 'http://wikipedia.org/api/v1' assert api.status == 'ERROR' # None with pytest.raises(ApiUrlError): base_url = None api = Api(base_url) sleep(SLEEP_TIME) assert not api.api_token assert api.api_version == 'v1' assert not api.base_url assert not api.native_api_base_url assert api.status == 'ERROR'
def home_view(request, *args, **kwargs): if request.method == "POST": base_url = 'https://demo.dataverse.org' api_token = "bcd0d879-592d-425f-9547-a112eb367c17" api = Api(base_url, api_token=api_token) name = request.POST.get("dataverse") identifier = request.POST.get("identifier") dataverseType = request.POST.get("dataverseType") email = request.POST.get("email") metaData = '''{ "name": "''' + name + '''", "alias": "''' + identifier + '''", "dataverseContacts": [ { "contactEmail": "''' + email + '''" } ] }''' response = api.create_dataverse(identifier="khaled", metadata=metaData, auth=True) print(response.status_code) return render(request, "home.html", {})
def api_connection(): """Fixture, so set up an Api connection. Returns ------- Api Api object. """ api_token = os.environ['API_TOKEN'] base_url = os.environ['BASE_URL'] return Api(base_url, api_token)
class DataverseClient(object): def __init__(self, host, api_token=None): self._host = host self.api = Api(host, api_token=api_token) def get_ddi(self, doi, format='ddi'): """ Get DDI metadata file """ response = self.api.get_dataset_export(doi, format) return DDI(response.content)
def test_api_connect(self): """Test successfull connection without api_token.""" api = Api(os.environ['BASE_URL']) sleep(SLEEP_TIME) assert isinstance(api, Api) assert not api.api_token assert api.api_version == 'v1' assert isinstance(api.conn_started, datetime) assert api.base_url == os.environ['BASE_URL'] assert api.native_api_base_url == '{0}/api/{1}'.format( os.environ['BASE_URL'], api.api_version) assert api.status == 'OK'
def test_connection_to_api(): """Test setup of Dataverse API connection.""" base_url = 'https://' + os.environ.get('HOST') api_token = os.environ.get('API_AUTH_TOKEN') dataverse_api_version = os.environ.get('DATAVERSE_API_VERSION') dataverse_version = os.environ.get('DATAVERSE_VERSION') api = Api(base_url, api_token) assert isinstance(api, Api) assert api.api_token == api_token assert api.api_version is not None assert isinstance(api.conn_started, datetime) assert api.base_url == 'https://' + os.environ.get('HOST') assert api.api_version == dataverse_api_version assert api.native_api_base_url == api.base_url + '/api/v1' assert api.dataverse_version == dataverse_version
from pyDataverse.api import Api import json import dvconfig from urllib.parse import urlparse,parse_qs base_url = dvconfig.base_url api_token = dvconfig.api_token api = Api(base_url, api_token) print('API status: ' +api.status) def main(): #dsurl = 'https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.5072/FK2/U6AEZM' dsurl = 'https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.5072/FK2/PPPORT' print(dsurl) o = urlparse(dsurl) hostname = o.netloc doi = parse_qs(o.query)['persistentId'][0] curl_native_json = "curl '" + 'https://' + hostname + '/api/datasets/export?exporter=dataverse_json&persistentId=' + doi + "' | jq ." print(curl_native_json) def find_children(dataverse_database_id): pass if __name__ == '__main__': main()
def connect(self): dataverse_api = Api(self.base_url, self.api_token) return dataverse_api
from pyDataverse.api import Api import json import dvconfig base_url = dvconfig.base_url api_token = dvconfig.api_token api = Api(base_url, api_token) username = '******' password = '******' endpoint = '/builtin-users/' + username + '/api-token' params = {} params['password'] = password resp = api.get_request(endpoint, params=params, auth=True) api_token = resp.json()['data']['message'] print(api_token)
## DAY 1 from pyDataverse.api import Api from pyDataverse.models import Dataverse import pandas as pd import numpy as np import requests from functools import reduce import matplotlib.pyplot as plt import math ## Acquring data from APIs # establish connection base_url = 'https://dataverse.harvard.edu/' api = Api(base_url) print(api.status) # get the digital object identifier for the Harvard Dataverse dataset DOI = "doi:10.7910/DVN/HIDLTK" # retrieve the contents of the dataset covid = api.get_dataset(DOI) covid_files_list = covid.json()['data']['latestVersion']['files'] for fileObject in covid_files_list: print("File name is {}; id is {}".format( fileObject["dataFile"]["filename"], fileObject["dataFile"]["id"])) # get data file US_states_cases_file = api.get_datafile("4201597")
def __init__(self, host, api_token=None): self._host = host self.api = Api(host, api_token=api_token)
from pyDataverse.api import Api import json import dvconfig base_url = dvconfig.base_url api_token = dvconfig.api_token api = Api(base_url, api_token) print(api.status) dataset_json = 'data/dataverses/open-source-at-harvard/datasets/open-source-at-harvard/open-source-at-harvard.json' with open(dataset_json) as f: metadata = json.load(f) dataverse = 'open-source-at-harvard' resp = api.create_dataset(dataverse, json.dumps(metadata)) print(resp) dataset_pid = resp.json()['data']['persistentId'] tabular_file = 'data/dataverses/open-source-at-harvard/datasets/open-source-at-harvard/files/2019-02-25.tsv' resp = api.upload_file(dataset_pid, tabular_file) print(resp)
from pyDataverse.api import Api import json import dvconfig import os import time import requests from io import StringIO base_url = dvconfig.base_url api_token = dvconfig.api_token paths = dvconfig.sample_data api = Api(base_url, api_token) print(api.status) # TODO limit amount of recursion def check_dataset_lock(dataset_dbid): query_str = '/datasets/' + str(dataset_dbid) + '/locks' params = {} resp = api.get_request(query_str, params=params, auth=True) locks = resp.json()['data'] if (locks): print('Lock found for dataset id ' + str(dataset_dbid) + '... sleeping...') time.sleep(2) check_dataset_lock(dataset_dbid) resp = api.get_dataverse(':root') buff = StringIO("") if (resp.status_code == 401): print('Publishing root dataverse.')
import connexion import os import lxml.etree as ET import urllib.request import zipfile import logging from dct_server import config from pyDataverse.api import Api from datetime import datetime api = Api(config.DATAVERSE_BASE_URL) def convert_ddi(ddi_file, dv_target, api_token, xsl_url, author_name=None, author_affiliation=None, contact_name=None, contact_email=None, subject=None): # noqa: E501 """Convert DDI and ingest it to target dataverse # noqa: E501 :param ddi_file: :type ddi_file: strstr :param dv_target: The target of dataverse alias or id (e.g. root)
from pyDataverse.api import Api import json import dvconfig base_url = dvconfig.base_url api_token = dvconfig.api_token api = Api(base_url, api_token) print(api.status) dv_json = 'data/dataverses/open-source-at-harvard/open-source-at-harvard.json' with open(dv_json) as f: metadata = json.load(f) print(metadata) # FIXME: Why is "identifier" required? identifier = metadata['alias'] parentdv = ':root' resp = api.create_dataverse(identifier, json.dumps(metadata), parent=parentdv) print(resp)
def get_fsp_data_through_api(base_url, identifier): ''' Takes base URL and identifier of the FSP data, and returns the Pandas dataframe of the file Input base_url (str): URL of the website identifier (str): identifier of the desired data file Output df (Pandas dataframe): dataframe of the FSP data ''' dtype_col = { 'FormName': 'str', 'County': 'str', 'GPSLatitude': 'float32', 'GPSLongitude': 'float32' } geo_columns = list(dtype_col.keys()) api = Api(base_url) resp_dataset = api.get_dataset(identifier) files = json.loads(resp_dataset.text)['data']['latestVersion']['files'] df = pd.DataFrame({col: [] for col in geo_columns}) for file in files: file_id = file['dataFile']['id'] resp_datafile = api.get_datafile(file_id) file_extension = file['dataFile']['filename'].split('.')[-1] if file_extension == 'tab': rows = resp_datafile.text.split('\n') headers = rows[0].split('\t') data_rows = \ [row.replace('"', '').split('\t') for row in rows[1:] if row != '' and row.split('\t')[headers.index('GPSLatitude')] != ''] df_file = \ pd.DataFrame(data_rows, columns=headers)[geo_columns].astype(dtype_col) elif file_extension == 'xlsx': workbook = xlrd.open_workbook(file_contents=resp_datafile.content) worksheet = workbook.sheet_by_index(0) col_names = [ col_name.replace(" ", "") for col_name in worksheet.row_values(0) ] df_file = pd.DataFrame({col: [] for col in geo_columns}) for col in geo_columns: data_col = worksheet.col_values(col_names.index(col), start_rowx=1) for idx_data, data in enumerate(data_col): if type(data) == str: data_col[idx_data] = data.replace('"', '') if data in ['', '--']: data_col[idx_data] = 'nan' df_file[col] = pd.Series(data_col, dtype=dtype_col[col]) df = df.append(df_file[df_file['County'] != 'nan'], ignore_index=True) df['geometry'] = \ df.apply(lambda x: Point(float(x['GPSLongitude']), float(x['GPSLatitude'])), axis=1) return df
def __init__(self, host, api_token=None): self._host = host self.api_token = api_token self.api = Api(host, api_token=api_token) self.native_api = NativeApi(host, api_token=api_token) self.data_access_api = DataAccessApi(host, api_token=api_token)
if __name__ == '__main__': DOWNLOAD_DATA = False UPLOAD_DATA = False # Download data if DOWNLOAD_DATA: # Setup working directory if not os.path.isdir(ROOT_DIR + '/data/down'): os.mkdir(ROOT_DIR + '/data/down') down_dir = ROOT_DIR + '/data/down' # Connect to Dataverse Api api_token_down = os.environ["API_TOKEN_DOWN"] api_host_down = os.environ["API_HOST_DOWN"] api_down = Api(api_host_down, api_token=api_token_down) # Extract dois and dataverse of the datastes. datasets_list = read_datasets_csv(ROOT_DIR + '/data/datasets.csv') dv_list = [ds['dataverse'] for ds in datasets_list] dv_list = set(dv_list) # Create directories for all dataverses and download the metadata for dv in dv_list: down_dataverse_dir = down_dir + '/dv_{0}'.format(dv) if not os.path.isdir(down_dataverse_dir): os.mkdir(down_dataverse_dir) resp_dv = api_down.get_dataverse(dv) write_file(down_dataverse_dir + '/dv_' + dv + '_metadata.json', json.dumps(resp_dv['data']))