def append_stream(self, owner, dataset, stream, records):
        """Append records to a stream in a data.world dataset

        :param owner: User or organization ID of the owner of the dataset
        :type owner: str
        :param dataset: Dataset ID
        :type dataset: str
        :param stream: Stream ID
        :type stream: str
        :param records: Objects to be appended to the stream
        :type records: iterable

        :raises ApiError: Failure invoking data.world API
        """
        with metrics.http_request_timer('append') as t:
            t.tags['stream'] = stream

            try:
                self._session.post('{}/streams/{}/{}/{}'.format(
                    self._api_url, owner, dataset, stream),
                                   data=to_jsonlines(records).encode('utf-8'),
                                   headers={
                                       'Content-Type':
                                       'application/json-l; charset=utf-8'
                                   }).raise_for_status()
            except RequestException as e:
                raise convert_requests_exception(e)
    def truncate_stream_records(self, owner, dataset, stream):
        """Truncates records of a stream in a data.world dataset

        :param owner: User or organization ID of the owner of the dataset
        :type owner: str
        :param dataset: Dataset ID
        :type dataset: str
        :param stream: Stream ID
        :type stream: str

        :returns: Response object
        :rtype: object

        :raises ApiError: Failure invoking data.world API
        """
        with metrics.http_request_timer('truncate_stream_records'):
            try:
                resp = self._session.delete(
                    '{}/streams/{}/{}/{}/records'.format(
                        self._api_url, owner, dataset, stream),
                    timeout=(self._conn_timeout, self._read_timeout))
                resp.raise_for_status()
                return resp.json()
            except RequestException as e:
                raise convert_requests_exception(e)
    def set_stream_schema(self, owner, dataset, stream, **kwargs):
        """Sets schema of a stream in a data.world dataset

        :param owner: User or organization ID of the owner of the dataset
        :type owner: str
        :param dataset: Dataset ID
        :type dataset: str
        :param stream: Stream ID
        :type stream: str
        :param kwargs: Schema properties (primaryKeyFields, sequenceField,
        updateMethod)
        :type kwargs: dict

        :returns: Response object
        :rtype: object

        :raises ApiError: Failure invoking data.world API
        """
        with metrics.http_request_timer('set_stream_schema'):
            try:
                resp = self._session.patch('{}/streams/{}/{}/{}/schema'.format(
                    self._api_url, owner, dataset, stream),
                                           json=kwargs,
                                           timeout=(self._conn_timeout,
                                                    self._read_timeout))
                resp.raise_for_status()
                return resp.json()
            except RequestException as e:
                raise convert_requests_exception(e)
    def create_dataset(self, owner, dataset, **kwargs):
        """Create a new dataset

        :param owner: User or organization ID of the owner of the dataset
        :type owner: str
        :param dataset: Dataset ID
        :type dataset: str
        :param kwargs: Dataset properties
        :type kwargs: dict

        :returns: Response object
        :rtype: object

        :raises ApiError: Failure invoking data.world API

        .. seealso:: `Dataset properties
            <https://apidocs.data.world/v0/models/datasetcreaterequest>`_
        """
        with metrics.http_request_timer('create_dataset'):
            try:
                resp = self._session.put(
                    '{}/datasets/{}/{}'.format(self._api_url, owner, dataset),
                    json=kwargs,
                    timeout=(self._conn_timeout, self._read_timeout))
                resp.raise_for_status()
                return resp.json()
            except RequestException as e:
                raise convert_requests_exception(e)
Exemple #5
0
def test_convert_requests_exception_offline():
    responses.add('GET', 'https://acme.inc/api', body=rqex.ConnectionError())
    with pytest.raises(ConnectionError):
        try:
            requests.get('https://acme.inc/api').raise_for_status()
        except rqex.ConnectionError as e:
            raise convert_requests_exception(e)
Exemple #6
0
def test_convert_requests_exception(status_code, expected_error):
    responses.add('GET', 'https://acme.inc/api', status=status_code)
    with pytest.raises(expected_error):
        try:
            requests.get('https://acme.inc/api').raise_for_status()
        except rqex.HTTPError as e:
            raise convert_requests_exception(e)
    def connection_check(self):
        """Verify network connectivity

        Ensures that the client can communicate with data.world's API
        """
        with metrics.http_request_timer('user'):
            try:
                self._session.get(
                    '{}/user'.format(self._api_url),
                    timeout=(self._conn_timeout,
                             self._read_timeout)).raise_for_status()
            except RequestException as e:
                raise convert_requests_exception(e)
    def get_dataset(self, owner, dataset):
        """Fetch dataset info

        :param owner: User or organization ID of the owner of the dataset
        :type owner: str
        :param dataset: Dataset ID
        :type dataset: str

        :returns: Dataset object
        :rtype: object

        :raises ApiError: Failure invoking data.world API
        """
        with metrics.http_request_timer('dataset'):
            try:
                resp = self._session.get(
                    '{}/datasets/{}/{}'.format(self._api_url, owner, dataset),
                    timeout=(self._conn_timeout, self._read_timeout))
                resp.raise_for_status()
                return resp.json()
            except RequestException as e:
                raise convert_requests_exception(e)
    def sync(self, owner, dataset):
        """Triggers ingest of streamed records

        :param owner: User or organization ID of the owner of the dataset
        :type owner: str
        :param dataset: Dataset ID
        :type dataset: str

        :returns: Response object
        :rtype: object

        :raises ApiError: Failure invoking data.world API
        """
        with metrics.http_request_timer('sync'):
            try:
                resp = self._session.post(
                    '{}/datasets/{}/{}/sync'.format(self._api_url, owner,
                                                    dataset),
                    timeout=(self._conn_timeout, self._read_timeout))
                resp.raise_for_status()
                return resp.json()
            except RequestException as e:
                raise convert_requests_exception(e)
    def get_current_version(self, owner, dataset, stream):
        """Returns version of a sample record from a given stream

        :param owner: User or organization ID of the owner of the dataset
        :type owner: str
        :param dataset: Dataset ID
        :type dataset: str
        :param stream: Stream ID
        :type stream: str

        :returns: Response object
        :rtype: object

        :raises ApiError: Failure invoking data.world API
        """
        with metrics.http_request_timer('fetch_latest_version'):
            try:
                resp = self._session.get(
                    '{}/sql/{}/{}'.format(self._api_url, owner, dataset),
                    params={
                        'query':
                        'SELECT * '
                        'FROM `{}`.`{}`.`{}` '
                        'LIMIT 1'.format(owner, dataset, to_table_name(stream))
                    },
                    timeout=(self._conn_timeout, self._read_timeout))
                resp.raise_for_status()
                rows = resp.json()
                return (None
                        if len(rows) == 0 else rows[0].get('singer_version'))
            except RequestException as e:
                if e.response.status_code == 400:
                    logger.warn('Unable fetch latest version. '
                                'Expected if table doesn\'t exist yet. '
                                'Server message: {}'.format(e.response.text))
                    return None
                raise convert_requests_exception(e)