Example #1
0
def test_from_op_failure():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    op_json = {
        "id": "-1",
        "type": "NOOP",
        "description": "test",
        "status": {
            "state": "FAILED",
            "startTime": "early",
            "endTime": "late",
            "message": ""
        },
        "created": {
            "username": "",
            "time": "early",
            "version": "-1"
        },
        "lastModified": {
            "username": "",
            "time": "late",
            "version": "-1"
        },
        "relativeId": "operations/-1",
    }
    op = Operation.from_json(client, op_json)
    assert PlanNodeStatus.from_tamr_op(
        op) == PlanNodeStatus.PlanNodeStatus.FAILED
Example #2
0
def test_operation_from_json(client):
    alias = "operations/123"
    op1 = Operation.from_json(client, op_1_json, alias)
    assert op1.api_path == alias
    assert op1.relative_id == op_1_json["relativeId"]
    assert op1.resource_id == "1"
    assert op1.type == op_1_json["type"]
    assert op1.description == op_1_json["description"]
    assert op1.status == op_1_json["status"]
    assert op1.state == "SUCCEEDED"
    assert op1.succeeded
Example #3
0
    def train(self, **options):
        """Learn from verified labels.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The resultant operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        op_json = self.client.post(self.api_path +
                                   ":refresh").successful().json()
        op = Operation.from_json(self.client, op_json)
        return op.apply_options(**options)
Example #4
0
    def predict(self, **options):
        """Suggest labels for unverified records.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The resultant operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        dependent_dataset = "/".join(self.api_path.split("/")[:-1])
        op_json = self.client.post(dependent_dataset +
                                   ":refresh").successful().json()
        op = Operation.from_json(self.client, op_json)
        return op.apply_options(**options)
Example #5
0
    def refresh(self, **options):
        """Updates the dataset profile if needed.

        The dataset profile is updated on the server; you will need to call
        :func:`~tamr_unify_client.dataset.resource.Dataset.profile`
        to retrieve the updated profile.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The refresh operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        op_json = self.client.post(self.api_path + ":refresh").successful().json()
        op = Operation.from_json(self.client, op_json)
        return op.apply_options(**options)
Example #6
0
    def refresh(self, **options):
        """Updates the estimated pair counts if needed.

        The pair count estimates are updated on the server; you will need to call
        :func:`~tamr_unify_client.mastering.project.MasteringProject.estimate_pairs`
        to retrieve the updated estimate.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The refresh operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        op_json = self.client.post(self.api_path + ":refresh").successful().json()
        op = Operation.from_json(self.client, op_json)
        return op.apply_options(**options)
Example #7
0
def get_all(tamr: Client) -> List[Operation]:
    """
    Get a list of all jobs or operations.

    Args:
        tamr: A Tamr client

    Returns:
        A list of Operation objects.

    """
    response = tamr.get(
        "/api/versioned/v1/operations", headers={"Accept": "application/json"}, stream=True
    ).json()
    ops = [Operation.from_json(tamr, item) for item in response]

    return ops
Example #8
0
    def test_refresh(self):
        responses.add(
            responses.POST,
            f"{self._url_base}/{self._api_path}:refresh",
            json=self._refresh_json,
        )
        updated = self._refresh_json.copy()
        updated["status"]["state"] = "SUCCEEDED"
        responses.add(responses.GET,
                      f"{self._url_base}/operations/24",
                      json=updated)

        estimate = EstimatedPairCounts.from_json(self.tamr,
                                                 self._estimate_json,
                                                 self._api_path)
        generated = estimate.refresh(poll_interval_seconds=0)

        created = Operation.from_json(self.tamr, updated)
        self.assertEqual(repr(generated), repr(created))
Example #9
0
def test__collect_operation_calls():
    # setup mock client
    mock_client = Client(None)

    # setup mock operations
    base_operation_json = {
        "id": "2",
        "type": "SPARK",
        "description": "Profiling [employees_tiny.csv] attributes.",
        "status": {
            "state": "SUCCEEDED",
            "startTime": "2020-07-16T17:57:54.458Z",
            "endTime": "2020-07-16T17:58:22.836Z",
            "message": "",
        },
        "created": {
            "username": "******",
            "time": "2020-07-16T17:57:28.920Z",
            "version": "82"
        },
        "lastModified": {
            "username": "******",
            "time": "2020-07-16T17:58:23.977Z",
            "version": "119",
        },
        "relativeId": "operations/2",
    }

    operation_states = [
        OperationState.SUCCEEDED,
        OperationState.PENDING,
        OperationState.CANCELED,
        OperationState.RUNNING,
        OperationState.FAILED,
    ]
    mocks = {}

    for state in operation_states:
        op_json = base_operation_json.copy()
        op_json["status"]["state"] = state.value
        mock_operation = Operation.from_json(mock_client, op_json)

        mock_response = Response()
        mock_response._content = json.dumps(op_json).encode("utf-8")
        mock_response.status_code = 200

        mocks[state] = {"op": mock_operation, "response": mock_response}

    # test succeeded with many pending
    mock_client.get = MagicMock(side_effect=[
        # response while pending
        mocks[OperationState.PENDING]["response"],
        # polling
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.RUNNING]["response"],
        # response while running
        mocks[OperationState.RUNNING]["response"],
        # response while waiting
        mocks[OperationState.SUCCEEDED]["response"],
        # response when complete
        mocks[OperationState.SUCCEEDED]["response"],
    ])

    with patch("tamr_toolbox.utils.client._from_response",
               return_value=mock_client):
        result_success = utils.testing._collect_operation_calls(
            response=mocks[OperationState.PENDING]["response"],
            poll_interval_seconds=0)

    assert len(result_success) == 3
    for resp in result_success:
        assert resp.json()["id"] == "2"
    assert result_success[0].json(
    )["status"]["state"] == OperationState.PENDING.value
    assert result_success[1].json(
    )["status"]["state"] == OperationState.RUNNING.value
    assert result_success[2].json(
    )["status"]["state"] == OperationState.SUCCEEDED.value

    # test failed quickly
    mock_client.get = MagicMock(side_effect=[
        # response while pending
        mocks[OperationState.PENDING]["response"],
        # polling
        mocks[OperationState.FAILED]["response"],
        # response while running
        mocks[OperationState.FAILED]["response"],
        # response while waiting
        mocks[OperationState.FAILED]["response"],
        # response when complete
        mocks[OperationState.FAILED]["response"],
    ])

    with patch("tamr_toolbox.utils.client._from_response",
               return_value=mock_client):
        result_failed = utils.testing._collect_operation_calls(
            response=mocks[OperationState.PENDING]["response"],
            poll_interval_seconds=0)

    assert len(result_failed) == 3
    for resp in result_failed:
        assert resp.json()["id"] == "2"
    assert result_failed[0].json(
    )["status"]["state"] == OperationState.PENDING.value
    assert result_failed[1].json(
    )["status"]["state"] == OperationState.FAILED.value
    assert result_failed[2].json(
    )["status"]["state"] == OperationState.FAILED.value