def query(self, dataset_key, query, query_type="sql"): """Query an existing dataset Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id or of a url query : str SQL or SPARQL query query_type : {'sql', 'sparql'}, optional The type of the query. Must be either 'sql' or 'sparql'. Returns ------- Results Object containing the results of the query Raises ------ RuntimeError If a server error occurs """ # TODO Move network request to RestApiClient owner_id, dataset_id = parse_dataset_key(dataset_key) params = {"query": query} url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host, query_type, owner_id, dataset_id) headers = { 'User-Agent': _user_agent(), 'Accept': 'application/sparql-results+json', 'Authorization': 'Bearer {0}'.format(self._config.auth_token) } response = requests.get(url, params=params, headers=headers) if response.status_code == 200: return QueryResults(response.json()) raise RuntimeError('Error executing query: {}'.format( response.content))
def test__user_agent(): from datadotworld import __version__ assert_that(util._user_agent(), equal_to('data.world-py - {}'.format(__version__)))
def download_endpoint(request): assert request.headers.get('User-Agent') == _user_agent() return 200, {}, "this is the test."
def upload_endpoint(request): assert request.headers.get('User-Agent') == _user_agent() return 400, {}, json.dumps({})
def upload_endpoint(request): assert "a,b\r\n42,17\r\n420,178\r\n" == \ ''.join([chunk.decode('utf-8') for chunk in request.body]) assert request.headers.get('User-Agent') == _user_agent() return 200, {}, json.dumps({})
def download_endpoint(request): assert request.headers.get('User-Agent') == _user_agent() return 200, {}, '{"A":"1", "B":"2", "C":"3"}\n' \ '{"A":"4", "B":"5", "C":"6"}\n'
def download_endpoint(request): assert request.headers.get('User-Agent') == _user_agent() return 200, {}, "A,B,C\n1,2,3\n4,5,6"
def download_endpoint(request): assert request.headers.get('User-Agent') == _user_agent() return 400, {}, json.dumps({'message': 'bad request'})
def download_endpoint(request): assert request.headers.get('User-Agent') == _user_agent() return 200, {}, struct.pack('BBBB', 0, 1, 254, 255)
def download_datapackage(self, dataset_key, dest_dir): """ Download and unzip a dataset's datapackage Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id dest_dir : str or path Directory under which datapackage should be saved Returns ------- path Location of the datapackage descriptor (datapackage.json) in the local filesystem Raises ------ RestApiException If a server error occurs Examples >>> import datadotworld as dw >>> api_client = dw.api_client() >>> datapackage_descriptor = api_client.download_datapackage( ... 'jonloyens/an-intro-to-dataworld-dataset', '/tmp/test') >>> datapackage_descriptor '/tmp/test/datapackage.json' """ if path.isdir(dest_dir): raise ValueError('dest_dir must be a new directory, ' 'but {} already exists'.format(dest_dir)) owner_id, dataset_id = parse_dataset_key(dataset_key) url = "{0}://{1}/datapackage/{2}/{3}".format(self._protocol, self._download_host, owner_id, dataset_id) headers = { 'User-Agent': _user_agent(), 'Authorization': 'Bearer {0}'.format(self._config.auth_token) } try: response = requests.get(url, headers=headers, stream=True) response.raise_for_status() except requests.RequestException as e: raise RestApiError(cause=e) unzip_dir = path.join(self._config.tmp_dir, str(uuid.uuid4())) os.makedirs(unzip_dir) zip_file = path.join(unzip_dir, 'dataset.zip') with open(zip_file, 'wb') as f: for data in response.iter_content(chunk_size=4096): f.write(data) zip_obj = zipfile.ZipFile(zip_file) zip_obj.extractall(path=unzip_dir) # Find where datapackage.json is within expanded files unzipped_descriptor = glob.glob( '{}/**/datapackage.json'.format(unzip_dir)) if not unzipped_descriptor: raise RuntimeError( 'Zip file did not contain a datapackage manifest.') unzipped_dir = path.dirname(unzipped_descriptor[0]) shutil.move(unzipped_dir, dest_dir) shutil.rmtree(unzip_dir, ignore_errors=True) return path.join(dest_dir, 'datapackage.json')
def query(self, dataset_key, query, query_type="sql", parameters=None): """Query an existing dataset Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id or of a url query : str SQL or SPARQL query query_type : {'sql', 'sparql'}, optional The type of the query. Must be either 'sql' or 'sparql'. parameters: query parameters, optional parameters to the query - if SPARQL query, this should be a dict containing named parameters, if SQL query, then this should be a list containing positional parameters. Boolean values will be converted to xsd:boolean, Integer values to xsd:integer, and other Numeric values to xsd:decimal. anything else is treated as a String literal Returns ------- Results Object containing the results of the query Raises ------ RuntimeError If a server error occurs """ # TODO Move network request to RestApiClient owner_id, dataset_id = parse_dataset_key(dataset_key) params = {"query": query} if parameters and query_type == "sparql": # if SPARQL, then the parameters should be a Mapping containing # named parameters params["parameters"] = ",".join([ "{}={}".format(k, convert_to_sparql_literal(parameters[k])) for k in parameters.keys() ]) elif parameters and query_type == "sql": # if SQL, then the parameters should be an array with positional # parameters, need to unwind them to $data_world_paramN for each # 0-indexed position N parameters = { "$data_world_param{}".format(i): x for i, x in enumerate(parameters) } params["parameters"] = ",".join([ "{}={}".format(k, convert_to_sparql_literal(parameters[k])) for k in parameters.keys() ]) url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host, query_type, owner_id, dataset_id) headers = { 'User-Agent': _user_agent(), 'Accept': 'application/sparql-results+json', 'Authorization': 'Bearer {0}'.format(self._config.auth_token) } response = requests.get(url, params=params, headers=headers) if response.status_code == 200: return QueryResults(response.json()) raise RuntimeError('Error executing query: {}'.format( response.content))
def query(self, dataset_key, query, query_type="sql", parameters=None): """Query an existing dataset Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id or of a url query : str SQL or SPARQL query query_type : {'sql', 'sparql'}, optional The type of the query. Must be either 'sql' or 'sparql'. parameters: query parameters, optional parameters to the query - if SPARQL query, this should be a dict containing named parameters, if SQL query, then this should be a list containing positional parameters. Boolean values will be converted to xsd:boolean, Integer values to xsd:integer, and other Numeric values to xsd:decimal. anything else is treated as a String literal Returns ------- Results Object containing the results of the query Raises ------ RuntimeError If a server error occurs """ # TODO Move network request to RestApiClient owner_id, dataset_id = parse_dataset_key(dataset_key) params = { "query": query } if parameters and query_type == "sparql": # if SPARQL, then the parameters should be a Mapping containing # named parameters params["parameters"] = ",".join( ["{}={}".format(k, convert_to_sparql_literal(parameters[k])) for k in parameters.keys()]) elif parameters and query_type == "sql": # if SQL, then the parameters should be an array with positional # parameters, need to unwind them to $data_world_paramN for each # 0-indexed position N parameters = {"$data_world_param{}".format(i): x for i, x in enumerate(parameters)} params["parameters"] = ",".join(["{}={}".format( k, convert_to_sparql_literal(parameters[k])) for k in parameters.keys()]) url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host, query_type, owner_id, dataset_id) headers = { 'User-Agent': _user_agent(), 'Accept': 'application/sparql-results+json', 'Authorization': 'Bearer {0}'.format(self._config.auth_token) } response = requests.get(url, params=params, headers=headers) if response.status_code == 200: return QueryResults(response.json()) raise RuntimeError( 'Error executing query: {}'.format(response.content))