Exemple #1
0
    def __mark_job_run_as(self, run_id, action, at=None):
        Utils.is_valid_uuid(run_id, 'run_id')

        return self._post(
            self._url('/jobs/runs/{0}/{1}?at={2}', run_id, action,
                      at if at else Utils.utc_now())
        )
Exemple #2
0
    def get_job(self, namespace_name, job_name):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(job_name, 'job_name')

        return self._get(
            self._url('/namespaces/{0}/jobs/{1}', namespace_name, job_name)
        )
Exemple #3
0
    def get_dataset(self, namespace_name, dataset_name):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(dataset_name, 'dataset_name')

        return self._get(
            self._url('/namespaces/{0}/datasets/{1}', namespace_name,
                      dataset_name))
Exemple #4
0
    def create_job_run(self, namespace_name, job_name, run_id=None,
                       nominal_start_time=None,
                       nominal_end_time=None, run_args=None,
                       mark_as_running=False):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(job_name, 'job_name')

        payload = {}

        if run_id:
            payload['id'] = run_id

        if nominal_start_time:
            payload['nominalStartTime'] = nominal_start_time

        if nominal_end_time:
            payload['nominalEndTime'] = nominal_end_time

        if run_args:
            payload['args'] = run_args

        response = self._post(
            self._url('/namespaces/{0}/jobs/{1}/runs',
                      namespace_name, job_name),
            payload=payload)

        if mark_as_running:
            response = self.mark_job_run_as_started(run_id)

        return response
Exemple #5
0
def test_mk_fields_from():
    fields_name_error = [{}]
    fields_valid = [{
        "name": "flight_id",
        "type": "INTEGER",
        "description": "flight id",
        "tags": ["tag1", "tag2"]
    }, {
        "name": "flight_name",
        "type": "VARCHAR",
        "description": "flight name",
        "tags": ["tag3", "tag4"]
    }, {
        "name": "flight_date",
        "type": "TIMESTAMP",
        "description": "flight date"
    }]
    new_fields_valid = [{
        "name": "flight_id",
        "type": "INTEGER",
        "description": "flight id",
        "tags": ["tag1", "tag2"]
    }, {
        "name": "flight_name",
        "type": "VARCHAR",
        "description": "flight name",
        "tags": ["tag3", "tag4"]
    }, {
        "name": "flight_date",
        "type": "TIMESTAMP",
        "description": "flight date"
    }]
    assert Utils.mk_fields_from(fields=fields_valid) == new_fields_valid
    with pytest.raises(ValueError):
        Utils.mk_fields_from(fields=fields_name_error)
Exemple #6
0
    def list_jobs(self, namespace_name, limit=None, offset=None):
        Utils.check_name_length(namespace_name, 'namespace_name')

        return self._get(self._url('/namespaces/{0}/jobs', namespace_name),
                         params={
                             'limit': limit,
                             'offset': offset
                         })
Exemple #7
0
    def __init__(self, url, timeout_ms=None, api_key: str = None):
        self._timeout = Utils.to_seconds(timeout_ms or os.environ.get(
            'MARQUEZ_TIMEOUT_MS', DEFAULT_TIMEOUT_MS)
        )
        self._api_base = f"{url}{API_PATH_V1}"

        if api_key:
            Utils.add_auth_to(_HEADERS, api_key)
Exemple #8
0
    def list_datasets(self, namespace_name, limit=None, offset=None):
        Utils.check_name_length(namespace_name, 'namespace_name')

        return self._get(self._url('/namespaces/{0}/datasets', namespace_name),
                         params={
                             'limit': limit or DEFAULT_LIMIT,
                             'offset': offset or DEFAULT_OFFSET
                         })
Exemple #9
0
    def tag_dataset(self, namespace_name, dataset_name, tag_name):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(dataset_name, 'dataset_name')

        if not tag_name:
            raise ValueError('tag_name must not be None')

        return self._post(
            self._url('/namespaces/{0}/datasets/{1}/tags/{2}', namespace_name,
                      dataset_name, tag_name))
Exemple #10
0
    def list_job_runs(self, namespace_name, job_name, limit=None, offset=None):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(job_name, 'job_name')

        return self._get(self._url('/namespaces/{0}/jobs/{1}/runs',
                                   namespace_name, job_name),
                         params={
                             'limit': limit or DEFAULT_LIMIT,
                             'offset': offset or DEFAULT_OFFSET
                         })
Exemple #11
0
    def get_dataset_version(self, namespace_name, dataset_name, version):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(dataset_name, 'dataset_name')

        if not version:
            raise ValueError('version must not be None')

        return self._get(
            self._url('/namespaces/{0}/datasets/{1}/versions/{2}',
                      namespace_name, dataset_name, version))
Exemple #12
0
    def create_namespace(self, namespace_name, owner_name, description=None):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(owner_name, 'owner_name')

        payload = {'ownerName': owner_name}

        if description:
            payload['description'] = description

        return self._put(self._url('/namespaces/{0}', namespace_name),
                         payload=payload)
Exemple #13
0
    def post(self, path, headers, payload=None):
        if self._api_key:
            Utils.add_auth_to(headers, self._api_key)

        response = requests.post(
            url=f"{self._api_base}{path}",
            headers=headers,
            json=payload,
            timeout=self._timeout
        )

        return self._response(response, as_json=True)
Exemple #14
0
def test_new_client():
    os.environ['MARQUEZ_API_KEY'] = API_KEY

    from marquez_client.client import _USER_AGENT, _HEADERS
    headers_with_auth = {'User-Agent': _USER_AGENT}

    # Add API key to headers
    Utils.add_auth_to(headers_with_auth, API_KEY)

    client = Clients.new_client()
    assert client._api_base == API_BASE
    assert _HEADERS == headers_with_auth

    del os.environ['MARQUEZ_API_KEY']
Exemple #15
0
    def create_source(self, source_name, source_type, connection_url,
                      description=None):
        Utils.check_name_length(source_name, 'source_name')

        Utils.is_valid_connection_url(connection_url)

        payload = {
            'type': source_type.upper(),
            'connectionUrl': connection_url
        }

        if description:
            payload['description'] = description

        return self._put(self._url('/sources/{0}', source_name),
                         payload=payload)
Exemple #16
0
    def __init__(self, url, timeout_ms=None):
        self._timeout = Utils.to_seconds(
            timeout_ms
            or os.environ.get('MARQUEZ_TIMEOUT_MS', DEFAULT_TIMEOUT_MS))

        self._api_base = f'{url}{_API_PATH}'

        log.debug(self._api_base)
Exemple #17
0
    def tag_dataset_field(self, namespace_name, dataset_name, field_name,
                          tag_name):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(dataset_name, 'dataset_name')
        Utils.check_name_length(field_name, 'field_name')
        Utils.check_name_length(tag_name, 'tag_name')

        return self._post(
            self._url('/namespaces/{0}/datasets/{1}/fields/{2}/tags/{3}',
                      namespace_name, dataset_name, field_name, tag_name))
Exemple #18
0
    def create_dataset(self,
                       namespace_name,
                       dataset_name,
                       dataset_type,
                       physical_name,
                       source_name,
                       description=None,
                       run_id=None,
                       schema_location=None,
                       fields=None,
                       tags=None):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(dataset_name, 'dataset_name')
        Utils.is_instance_of(dataset_type, DatasetType)

        if dataset_type == DatasetType.STREAM:
            MarquezClient._is_none(schema_location, 'schema_location')

        Utils.check_name_length(physical_name, 'physical_name')
        Utils.check_name_length(source_name, 'source_name')

        payload = {
            'type': dataset_type.value,
            'physicalName': physical_name,
            'sourceName': source_name,
        }

        if description:
            payload['description'] = description

        if run_id:
            payload['runId'] = run_id

        if fields:
            payload['fields'] = fields

        if tags:
            payload['tags'] = tags

        if schema_location:
            payload['schemaLocation'] = schema_location

        return self._put(self._url('/namespaces/{0}/datasets/{1}',
                                   namespace_name, dataset_name),
                         payload=payload)
Exemple #19
0
    def _get(self, url, params=None, as_json=True):
        now_ms = Utils.now_ms()

        response = requests.get(url=url,
                                params=params,
                                headers=_HEADERS,
                                timeout=self._timeout)
        log.info(f"{url} method=GET duration_ms={Utils.now_ms() - now_ms}")

        return self._response(response, as_json)
Exemple #20
0
    def _put(self, url, payload=None, as_json=True):
        now_ms = Utils.now_ms()

        response = requests.put(url=url,
                                headers=_HEADERS,
                                json=payload,
                                timeout=self._timeout)
        log.info(f"{url} method=PUT payload={json.dumps(payload)} "
                 f"duration_ms={Utils.now_ms() - now_ms}")

        return self._response(response, as_json)
def test_mark_job_run_as_aborted(wo_client):
    aborted_at = Utils.utc_now()

    wo_client.mark_job_run_as_aborted(
        run_id='71feb41b-be50-428c-8470-37b9c292f787', at=aborted_at)

    wo_client._backend.post.assert_called_once_with(
        path=MarquezWriteOnlyClient._path(
            '/jobs/runs/71feb41b-be50-428c-8470-37b9c292f787/abort?at={0}',
            aborted_at),
        headers=mock.ANY)
Exemple #22
0
    def create_job(self, namespace_name, job_name, job_type, location=None,
                   inputs: [DatasetId] = None, outputs: [DatasetId] = None,
                   description=None, context=None, run_id=None):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(job_name, 'job_name')
        Utils.is_instance_of(job_type, JobType)

        payload = {
            'type': job_type.value,
            'inputs': [
                input.__dict__ for input in inputs
            ] if inputs else [],
            'outputs': [
                output.__dict__ for output in outputs
            ] if outputs else []
        }

        if run_id:
            payload['runId'] = run_id

        if context:
            payload['context'] = context

        if location:
            payload['location'] = location

        if description:
            payload['description'] = description

        return self._put(
            self._url('/namespaces/{0}/jobs/{1}', namespace_name, job_name),
            payload=payload
        )
Exemple #23
0
    def create_job(self,
                   namespace_name,
                   job_name,
                   job_type,
                   location=None,
                   input_dataset=None,
                   output_dataset=None,
                   description=None,
                   context=None):
        Utils.check_name_length(namespace_name, 'namespace_name')
        Utils.check_name_length(job_name, 'job_name')
        Utils.is_instance_of(job_type, JobType)

        payload = {
            'inputs': input_dataset or [],
            'outputs': output_dataset or [],
            'type': job_type.name
        }

        if context:
            payload['context'] = context

        if location:
            payload['location'] = location

        if description:
            payload['description'] = description

        return self._put(self._url('/namespaces/{0}/jobs/{1}', namespace_name,
                                   job_name),
                         payload=payload)
Exemple #24
0
    def _backend_from_env():
        backend = \
            os.environ.get('MARQUEZ_BACKEND', DEFAULT_MARQUEZ_BACKEND).upper()

        if backend == 'HTTP':
            url = os.environ.get('MARQUEZ_URL', DEFAULT_MARQUEZ_URL)
            api_key = os.environ.get('MARQUEZ_API_KEY')
            timeout = Utils.to_seconds(
                os.environ.get('MARQUEZ_TIMEOUT_MS', DEFAULT_TIMEOUT_MS))
            return HttpBackend(url, timeout, api_key)
        elif backend == 'FILE':
            file = os.environ.get('MARQUEZ_FILE', DEFAULT_MARQUEZ_FILE)
            return FileBackend(file)
        elif backend == 'LOG':
            return LogBackend()
Exemple #25
0
def test_is_instance_of():
    with pytest.raises(ValueError):
        Utils.is_instance_of(variable_value=JobType.BATCH,
                             variable_enum_type=DatasetType)
    with pytest.raises(ValueError):
        Utils.is_instance_of(variable_value=DatasetType.DB_TABLE,
                             variable_enum_type=JobType)
    with pytest.raises(ValueError):
        Utils.is_instance_of(variable_value=JobType.BATCH,
                             variable_enum_type=RunState)
Exemple #26
0
    def _get(self, url, params=None, as_json=True):
        now_ms = Utils.now_ms()

        response = requests.get(url,
                                params=params,
                                headers=_HEADERS,
                                timeout=self._timeout)

        get_details = {}
        get_details['url'] = url
        get_details['http_method'] = 'POST'
        get_details['http_headers'] = _HEADERS
        get_details['payload'] = params
        get_details['duration_ms'] = (self._now_ms() - now_ms)

        log.info(get_details)

        return self._response(response, as_json)
Exemple #27
0
    def _put(self, url, payload=None, as_json=True):
        now_ms = Utils.now_ms()

        response = requests.put(url=url,
                                headers=_HEADERS,
                                json=payload,
                                timeout=self._timeout)

        put_details = {}
        put_details['url'] = url
        put_details['http_method'] = 'POST'
        put_details['http_headers'] = _HEADERS
        put_details['payload'] = payload
        put_details['duration_ms'] = (self._now_ms() - now_ms)

        log.info(put_details)

        return self._response(response, as_json)
Exemple #28
0
def test_mark_job_run_as_failed(mock_post, client):
    mock_post.return_value.status_code.return_value = HTTPStatus.OK
    mock_post.return_value.json.return_value = FAILED

    now = Utils.utc_now()
    run = client.mark_job_run_as_failed(RUN_ID, at=now)

    assert run['id'] == RUN_ID
    assert run['nominalStartTime'] == NOMINAL_START_TIME
    assert run['nominalEndTime'] == NOMINAL_END_TIME
    assert run['state'] == RunState.FAILED
    assert run['startedAt'] == START_AT
    assert run['endedAt'] == ENDED_AT
    assert run['durationMs'] == DURATION_MS

    mock_post.assert_called_once_with(url=client._url(
        '/jobs/runs/{0}/fail?at={1}', RUN_ID, now),
                                      headers=mock.ANY,
                                      json=None,
                                      timeout=mock.ANY)
Exemple #29
0
    def test_create_job_run(self, mock_post):
        run_id = str(uuid.uuid4())
        action_at = Utils.utc_now()

        job_name = "my-job"
        run_args = {
            "email": "*****@*****.**",
            "emailOnFailure": "true",
            "emailOnRetry": "true",
            "retries": "1"
        }

        mock_post.return_value = {
            'id': f'{run_id}',
            'createdAt': '2020-08-12T22:33:02.787228Z',
            'updatedAt': '2020-08-12T22:33:02.787228Z',
            'nominalStartTime': None,
            'nominalEndTime': None,
            'state': 'NEW',
            'startedAt': f'{action_at}',
            'endedAt': None,
            'durationMs': None,
            'args': {
                "email": "*****@*****.**",
                "emailOnFailure": "true",
                "emailOnRetry": "true",
                "retries": "1"
            }
        }

        response = self.client.create_job_run(namespace_name=_NAMESPACE,
                                              job_name=job_name,
                                              run_id=run_id,
                                              nominal_start_time=None,
                                              nominal_end_time=None,
                                              run_args=run_args,
                                              mark_as_running=True)

        assert response['id'] is not None
        assert str(response['args']) == str(run_args)
        assert str(response['startedAt']) == action_at
Exemple #30
0
    def test_mark_job_run_as_aborted(self, mock_post):
        run_id = str(uuid.uuid4())
        action_at = Utils.utc_now()

        mock_post.return_value = {
            'id': f'{run_id}',
            'createdAt': '2020-08-12T22:36:50.739951Z',
            'updatedAt': '2020-08-13T17:56:39.516802Z',
            'nominalStartTime': None,
            'nominalEndTime': None,
            'state': 'ABORTED',
            'startedAt': f'{action_at}',
            'endedAt': None,
            'durationMs': None,
            'args': {}
        }

        response = self.client.mark_job_run_as_aborted(run_id=run_id,
                                                       action_at=action_at)

        assert str(response['id']) == run_id
        assert str(response['state']) == RunState.ABORTED.value
        assert str(response['startedAt']) == action_at