コード例 #1
0
    def query(self, dataset_key, query, query_type="sql"):
        """Query an existing dataset

        Parameters
        ----------
        dataset_key : str
            Dataset identifier, in the form of owner/id or of a url
        query : str
            SQL or SPARQL query
        query_type : {'sql', 'sparql'}, optional
            The type of the query. Must be either 'sql' or 'sparql'.

        Returns
        -------
        Results
            Object containing the results of the query

        Raises
        ------
        RuntimeError
            If a server error occurs
        """
        # TODO Move network request to RestApiClient
        owner_id, dataset_id = parse_dataset_key(dataset_key)
        params = {"query": query}
        url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host,
                                             query_type, owner_id, dataset_id)
        headers = {
            'User-Agent': _user_agent(),
            'Accept': 'application/sparql-results+json',
            'Authorization': 'Bearer {0}'.format(self._config.auth_token)
        }
        response = requests.get(url, params=params, headers=headers)
        if response.status_code == 200:
            return QueryResults(response.json())
        raise RuntimeError('Error executing query: {}'.format(
            response.content))
コード例 #2
0
ファイル: test_util.py プロジェクト: tkhanna42/data.world-py
def test__user_agent():
    from datadotworld import __version__
    assert_that(util._user_agent(),
                equal_to('data.world-py - {}'.format(__version__)))
コード例 #3
0
ファイル: test_files.py プロジェクト: saint-han/data.world-py
 def download_endpoint(request):
     assert request.headers.get('User-Agent') == _user_agent()
     return 200, {}, "this is the test."
コード例 #4
0
ファイル: test_files.py プロジェクト: saint-han/data.world-py
 def upload_endpoint(request):
     assert request.headers.get('User-Agent') == _user_agent()
     return 400, {}, json.dumps({})
コード例 #5
0
ファイル: test_files.py プロジェクト: saint-han/data.world-py
 def upload_endpoint(request):
     assert "a,b\r\n42,17\r\n420,178\r\n" == \
            ''.join([chunk.decode('utf-8') for chunk in request.body])
     assert request.headers.get('User-Agent') == _user_agent()
     return 200, {}, json.dumps({})
コード例 #6
0
ファイル: test_files.py プロジェクト: saint-han/data.world-py
 def download_endpoint(request):
     assert request.headers.get('User-Agent') == _user_agent()
     return 200, {}, '{"A":"1", "B":"2", "C":"3"}\n' \
                     '{"A":"4", "B":"5", "C":"6"}\n'
コード例 #7
0
ファイル: test_files.py プロジェクト: saint-han/data.world-py
 def download_endpoint(request):
     assert request.headers.get('User-Agent') == _user_agent()
     return 200, {}, "A,B,C\n1,2,3\n4,5,6"
コード例 #8
0
ファイル: test_files.py プロジェクト: saint-han/data.world-py
 def download_endpoint(request):
     assert request.headers.get('User-Agent') == _user_agent()
     return 400, {}, json.dumps({'message': 'bad request'})
コード例 #9
0
ファイル: test_files.py プロジェクト: saint-han/data.world-py
 def download_endpoint(request):
     assert request.headers.get('User-Agent') == _user_agent()
     return 200, {}, struct.pack('BBBB', 0, 1, 254, 255)
コード例 #10
0
    def download_datapackage(self, dataset_key, dest_dir):
        """
        Download and unzip a dataset's datapackage

        Parameters
        ----------
        dataset_key : str
            Dataset identifier, in the form of owner/id
        dest_dir : str or path
            Directory under which datapackage should be saved

        Returns
        -------
        path
            Location of the datapackage descriptor (datapackage.json) in the
            local filesystem

        Raises
        ------
        RestApiException
            If a server error occurs

        Examples
        >>> import datadotworld as dw
        >>> api_client = dw.api_client()
        >>> datapackage_descriptor = api_client.download_datapackage(
        ...     'jonloyens/an-intro-to-dataworld-dataset', '/tmp/test')
        >>> datapackage_descriptor
        '/tmp/test/datapackage.json'
        """
        if path.isdir(dest_dir):
            raise ValueError('dest_dir must be a new directory, '
                             'but {} already exists'.format(dest_dir))

        owner_id, dataset_id = parse_dataset_key(dataset_key)
        url = "{0}://{1}/datapackage/{2}/{3}".format(self._protocol,
                                                     self._download_host,
                                                     owner_id, dataset_id)
        headers = {
            'User-Agent': _user_agent(),
            'Authorization': 'Bearer {0}'.format(self._config.auth_token)
        }

        try:
            response = requests.get(url, headers=headers, stream=True)
            response.raise_for_status()
        except requests.RequestException as e:
            raise RestApiError(cause=e)

        unzip_dir = path.join(self._config.tmp_dir, str(uuid.uuid4()))
        os.makedirs(unzip_dir)

        zip_file = path.join(unzip_dir, 'dataset.zip')

        with open(zip_file, 'wb') as f:
            for data in response.iter_content(chunk_size=4096):
                f.write(data)

        zip_obj = zipfile.ZipFile(zip_file)
        zip_obj.extractall(path=unzip_dir)

        # Find where datapackage.json is within expanded files
        unzipped_descriptor = glob.glob(
            '{}/**/datapackage.json'.format(unzip_dir))
        if not unzipped_descriptor:
            raise RuntimeError(
                'Zip file did not contain a datapackage manifest.')

        unzipped_dir = path.dirname(unzipped_descriptor[0])

        shutil.move(unzipped_dir, dest_dir)
        shutil.rmtree(unzip_dir, ignore_errors=True)

        return path.join(dest_dir, 'datapackage.json')
コード例 #11
0
    def query(self, dataset_key, query, query_type="sql", parameters=None):
        """Query an existing dataset

        Parameters
        ----------
        dataset_key : str
            Dataset identifier, in the form of owner/id or of a url
        query : str
            SQL or SPARQL query
        query_type : {'sql', 'sparql'}, optional
            The type of the query. Must be either 'sql' or 'sparql'.
        parameters: query parameters, optional
            parameters to the query - if SPARQL query, this should be a dict
            containing named parameters, if SQL query, then this should be a
            list containing positional parameters.  Boolean values will be
            converted to xsd:boolean, Integer values to xsd:integer, and other
            Numeric values to xsd:decimal. anything else is treated as a String
            literal

        Returns
        -------
        Results
            Object containing the results of the query

        Raises
        ------
        RuntimeError
            If a server error occurs
        """
        # TODO Move network request to RestApiClient
        owner_id, dataset_id = parse_dataset_key(dataset_key)
        params = {"query": query}
        if parameters and query_type == "sparql":
            # if SPARQL, then the parameters should be a Mapping containing
            # named parameters
            params["parameters"] = ",".join([
                "{}={}".format(k, convert_to_sparql_literal(parameters[k]))
                for k in parameters.keys()
            ])
        elif parameters and query_type == "sql":
            # if SQL, then the parameters should be an array with positional
            # parameters, need to unwind them to $data_world_paramN for each
            # 0-indexed position N
            parameters = {
                "$data_world_param{}".format(i): x
                for i, x in enumerate(parameters)
            }
            params["parameters"] = ",".join([
                "{}={}".format(k, convert_to_sparql_literal(parameters[k]))
                for k in parameters.keys()
            ])
        url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host,
                                             query_type, owner_id, dataset_id)
        headers = {
            'User-Agent': _user_agent(),
            'Accept': 'application/sparql-results+json',
            'Authorization': 'Bearer {0}'.format(self._config.auth_token)
        }
        response = requests.get(url, params=params, headers=headers)
        if response.status_code == 200:
            return QueryResults(response.json())
        raise RuntimeError('Error executing query: {}'.format(
            response.content))
コード例 #12
0
    def query(self, dataset_key, query, query_type="sql", parameters=None):
        """Query an existing dataset

        Parameters
        ----------
        dataset_key : str
            Dataset identifier, in the form of owner/id or of a url
        query : str
            SQL or SPARQL query
        query_type : {'sql', 'sparql'}, optional
            The type of the query. Must be either 'sql' or 'sparql'.
        parameters: query parameters, optional
            parameters to the query - if SPARQL query, this should be a dict
            containing named parameters, if SQL query, then this should be a
            list containing positional parameters.  Boolean values will be
            converted to xsd:boolean, Integer values to xsd:integer, and other
            Numeric values to xsd:decimal. anything else is treated as a String
            literal

        Returns
        -------
        Results
            Object containing the results of the query

        Raises
        ------
        RuntimeError
            If a server error occurs
        """
        # TODO Move network request to RestApiClient
        owner_id, dataset_id = parse_dataset_key(dataset_key)
        params = {
            "query": query
        }
        if parameters and query_type == "sparql":
            # if SPARQL, then the parameters should be a Mapping containing
            # named parameters
            params["parameters"] = ",".join(
                ["{}={}".format(k, convert_to_sparql_literal(parameters[k]))
                 for k in parameters.keys()])
        elif parameters and query_type == "sql":
            # if SQL, then the parameters should be an array with positional
            # parameters, need to unwind them to $data_world_paramN for each
            # 0-indexed position N
            parameters = {"$data_world_param{}".format(i): x
                          for i, x in enumerate(parameters)}
            params["parameters"] = ",".join(["{}={}".format(
                k, convert_to_sparql_literal(parameters[k]))
                                             for k in parameters.keys()])
        url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host,
                                             query_type, owner_id, dataset_id)
        headers = {
            'User-Agent': _user_agent(),
            'Accept': 'application/sparql-results+json',
            'Authorization': 'Bearer {0}'.format(self._config.auth_token)
        }
        response = requests.get(url, params=params, headers=headers)
        if response.status_code == 200:
            return QueryResults(response.json())
        raise RuntimeError(
            'Error executing query: {}'.format(response.content))