예제 #1
0
def test_binning_model_records():

    records_body = [{
        "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bb8"],
        "isActive": ["true"],
        "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"],
        "similarityFunction": ["COSINE"],
        "tokenizer": ["DEFAULT"],
        "fieldName": ["surname"],
        "threshold": ["0.75"],
    }]

    records_url = (
        f"http://localhost:9100/api/versioned/v1/projects/1/binningModel/records"
    )

    responses.add(responses.GET, project_url, json=project_config)

    responses.add(
        responses.GET,
        records_url,
        body="\n".join(json.dumps(body) for body in records_body),
    )

    tamr = Client(UsernamePasswordAuth("username", "password"))

    project = tamr.projects.by_resource_id("1").as_mastering()
    binning_model = project.binning_model()

    binning_model_records = list(binning_model.records())
    assert binning_model_records == records_body
예제 #2
0
def client():
    from tamr_unify_client import Client
    from tamr_unify_client.auth import UsernamePasswordAuth

    auth = UsernamePasswordAuth("username", "password")
    tamr = Client(auth)
    return tamr
예제 #3
0
def get_connect_session(connect_info: Client) -> requests.Session:
    """Returns an authenticated session using Tamr credentials from configuration.
    Raises an exception if df_connect is not installed or running correctly.

    Args:
        connect_info: An instance of a Client object

    Returns:
        An authenticated session

    Raises:
        RuntimeError: if the a connection to df_connect cannot be established
    """
    auth = UsernamePasswordAuth(connect_info.tamr_username, connect_info.tamr_password)
    s = requests.Session()
    s.auth = auth
    s.headers.update({"Content-type": "application/json"})
    s.headers.update({"Accept": "application/json"})

    # test that df_connect is running properly
    url = _get_url(connect_info, "/api/service/health")
    try:
        r = s.get(url)
        r.raise_for_status()
    except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError):
        raise RuntimeError(
            f"Tamr auxiliary service df-df_connect is either not running or not healthy at {url}!"
            f" Did you install it? Df-connect does not come with default Tamr installation."
            f" Check its status and your configuration."
        )
    return s
def test_continuous_mastering():
    auth = UsernamePasswordAuth("username", "password")
    unify = Client(auth)

    project_id = "1"
    project = unify.projects.by_resource_id(project_id)
    project = project.as_mastering()

    unified_dataset = project.unified_dataset()
    op = unified_dataset.refresh(poll_interval_seconds=0)
    assert op.succeeded()

    op = project.pairs().refresh(poll_interval_seconds=0)
    assert op.succeeded()

    model = project.pair_matching_model()
    op = model.train(poll_interval_seconds=0)
    assert op.succeeded()

    op = model.predict(poll_interval_seconds=0)
    assert op.succeeded()

    op = project.record_clusters().refresh(poll_interval_seconds=0)
    assert op.succeeded()

    op = project.published_clusters().refresh(poll_interval_seconds=0)
    assert op.succeeded()
    def test_dataset_profile(self):
        auth = UsernamePasswordAuth("username", "password")
        client = Client(auth)

        dataset_id = "3"
        dataset_url = f"{client.protocol}://{client.host}:{client.port}/api/versioned/v1/datasets/{dataset_id}"
        profile_url = f"{dataset_url}/profile"
        responses.add(responses.GET, dataset_url, json={})
        responses.add(responses.GET, profile_url, json=self.profile_stale)

        dataset = client.datasets.by_resource_id(dataset_id)
        profile = dataset.profile()
        self.assertEqual(self.profile_stale["datasetName"],
                         profile.dataset_name)
        self.assertEqual(self.profile_stale["relativeDatasetId"],
                         profile.relative_dataset_id)
        self.assertEqual(self.profile_stale["isUpToDate"],
                         profile.is_up_to_date)
        self.assertEqual(self.profile_stale["profiledDataVersion"],
                         profile.profiled_data_version)
        self.assertEqual(self.profile_stale["profiledAt"], profile.profiled_at)
        self.assertEqual(self.profile_stale["simpleMetrics"],
                         profile.simple_metrics)
        self.assertEqual(self.profile_stale["attributeProfiles"],
                         profile.attribute_profiles)
예제 #6
0
파일: app.py 프로젝트: ianbakst/test_app
    def post(self):
        """Returns list of projects"""

        # First parse the header for credentials:
        try:
            encoded_auth_header_bytes = request.headers['Authorization'].split(
            )[1].encode('utf-8')
            username, password = b64decode(encoded_auth_header_bytes).decode(
                'utf-8').split(':')
            if username != my_app_username or password != my_app_password:
                raise ValueError()
        # this is too broad except that if anything above breaks it means authorization isn't correct
        except Exception:
            resp = app.response_class(response=json.dumps(
                "credentials are required to access this resource."),
                                      status=401,
                                      content_type='application/json')
            return resp

        host = request.form['Host']
        user = request.form['Username']
        password = request.form['Password']
        auth = UsernamePasswordAuth(user, password)
        protocol = request.form['Protocol']
        port = request.form['Port']
        tamr = Client(auth, host=host, protocol=protocol, port=port)
        projects = get_all_project_names(tamr)
        return {'projects': projects}, 200
예제 #7
0
def test_request_absolute_endpoint():
    endpoint = "/api/service/health"
    full_url = f"http://localhost:9100{endpoint}"
    responses.add(responses.GET, full_url, json={})
    client = Client(UsernamePasswordAuth("username", "password"))
    # If client does not properly handle absolute paths, client.get() will
    # raise a ConnectionRefused exception.
    client.get(endpoint)
예제 #8
0
 def test_project_by_name(self):
     responses.add(responses.GET,
                   self.project_list_url,
                   json=self.project_json)
     auth = UsernamePasswordAuth("username", "password")
     tamr = Client(auth)
     actual_project = tamr.projects.by_name(self.project_name)
     assert actual_project._data == self.project_json[0]
def test_dataset_collection_repr():
    client = Client(UsernamePasswordAuth("username", "password"))
    full_clz_name = "tamr_unify_client.models.dataset.collection.DatasetCollection"

    rstr = f"{client.datasets!r}"

    assert rstr.startswith(f"{full_clz_name}(")
    assert "api_path='datasets'" in rstr
    assert rstr.endswith(")")
def test_username_auth_repr():
    auth = UsernamePasswordAuth("myusername", "SECRET")
    full_clz_name = "tamr_unify_client.auth.username_password.UsernamePasswordAuth"

    rstr = f"{auth!r}"

    assert rstr.startswith(f"{full_clz_name}(")
    assert "myusername" in rstr
    assert "SECRET" not in rstr
    assert rstr.endswith(")")
def test_http_error():
    """Ensure that the client surfaces HTTP errors as exceptions.
    """
    endpoint = f"http://localhost:9100/api/versioned/v1/projects/1"
    responses.add(responses.GET, endpoint, status=401)
    auth = UsernamePasswordAuth("nonexistent-username", "invalid-password")
    unify = Client(auth)
    with raises(HTTPError) as e:
        unify.projects.by_resource_id("1")
    assert f"401 Client Error: Unauthorized for url: {endpoint}" in str(e)
예제 #12
0
def test_client_create():
    my_client = utils.client.create(**CONFIG["my_instance_name"])
    assert my_client.host == CONFIG["my_instance_name"]["host"]
    assert my_client.port == int(CONFIG["my_instance_name"]["port"])
    assert my_client.protocol == CONFIG["my_instance_name"]["protocol"]
    assert my_client.base_path == "/api/versioned/v1/"
    assert my_client.auth == UsernamePasswordAuth(
        "admin",
        os.environ["TAMR_TOOLBOX_PASSWORD"],
    )
def test_dataset_status():
    dataset_id = "1"
    dataset_url = f"http://localhost:9100/api/versioned/v1/datasets/{dataset_id}"
    status_url = f"{dataset_url}/status"
    responses.add(responses.GET, dataset_url, json={})
    responses.add(responses.GET, status_url, json=status_json)
    auth = UsernamePasswordAuth("username", "password")
    tamr = Client(auth)

    dataset = tamr.datasets.by_resource_id(dataset_id)
    status = dataset.status()
    assert status._data == status_json
def test_dataset_records():
    dataset_id = "1"
    dataset_url = f"http://localhost:9100/api/versioned/v1/datasets/{dataset_id}"
    records_url = f"{dataset_url}/records"
    responses.add(responses.GET, dataset_url, json={})
    responses.add(responses.GET,
                  records_url,
                  body='{"attribute1": 1}\n{"attribute1": 2}')
    auth = UsernamePasswordAuth("username", "password")
    unify = Client(auth)

    dataset = unify.datasets.by_resource_id(dataset_id)
    records = list(dataset.records())
    assert records == [{"attribute1": 1}, {"attribute1": 2}]
def test_dataset_profile():
    dataset_id = "3"
    dataset_url = f"http://localhost:9100/api/versioned/v1/datasets/{dataset_id}"
    profile_url = f"{dataset_url}/profile"
    profile_refresh_url = f"{profile_url}:refresh"
    responses.add(responses.GET, dataset_url, json={})
    responses.add(responses.GET, profile_url, json=profile_json1)
    responses.add(responses.POST, profile_refresh_url, json=[], status=204)
    auth = UsernamePasswordAuth("username", "password")
    unify = Client(auth)

    dataset = unify.datasets.by_resource_id(dataset_id)
    profile = dataset.profile()
    assert profile._data == profile_json1
def test_continuous_mastering():
    auth = UsernamePasswordAuth("username", "password")
    unify = Client(auth)

    project_id = "1"
    project = unify.projects.by_resource_id(project_id)
    project = project.as_mastering()

    unified_dataset = project.unified_dataset()
    op = unified_dataset.refresh(poll_interval_seconds=0)
    assert op.succeeded()

    op = project.pairs().refresh(poll_interval_seconds=0)
    assert op.succeeded()

    model = project.pair_matching_model()
    op = model.train(poll_interval_seconds=0)
    assert op.succeeded()

    op = model.predict(poll_interval_seconds=0)
    assert op.succeeded()

    op = project.record_clusters().refresh(poll_interval_seconds=0)
    assert op.succeeded()

    op = project.published_clusters().refresh(poll_interval_seconds=0)
    assert op.succeeded()

    estimate_url = (
        f"http://localhost:9100/api/versioned/v1/projects/1/estimatedPairCounts"
    )
    estimate_json = {
        "isUpToDate": "true",
        "totalEstimate": {"candidatePairCount": "200", "generatedPairCount": "100"},
        "clauseEstimates": {
            "clause1": {"candidatePairCount": "50", "generatedPairCount": "25"},
            "clause2": {"candidatePairCount": "50", "generatedPairCount": "25"},
            "clause3": {"candidatePairCount": "100", "generatedPairCount": "50"},
        },
    }
    responses.add(responses.GET, estimate_url, json=estimate_json)

    status = project.estimate_pairs().is_up_to_date
    assert status

    candidate = project.estimate_pairs().total_estimate["candidatePairCount"]
    assert candidate == "200"

    clause1 = project.estimate_pairs().clause_estimates["clause1"]
    assert clause1["generatedPairCount"] == "25"
예제 #17
0
def test_project_add_source_dataset():
    responses.add(responses.GET, datasets_url, json=dataset_json)
    responses.add(responses.GET, projects_url, json=project_json)
    responses.add(
        responses.POST, input_datasets_url, json=post_input_datasets_json, status=204
    )
    responses.add(responses.GET, input_datasets_url, json=get_input_datasets_json)
    auth = UsernamePasswordAuth("username", "password")
    unify = Client(auth)
    dataset = unify.datasets.by_external_id(dataset_external_id)
    project = unify.projects.by_external_id(project_external_id)
    project.add_source_dataset(dataset)
    alias = project.api_path + "/inputDatasets"
    input_datasets = project.client.get(alias).successful().json()
    assert input_datasets == dataset_json
예제 #18
0
def test_record_clusters_with_data():

    project_config = {
        "name": "Project 1",
        "description": "Mastering Project",
        "type": "DEDUP",
        "unifiedDatasetName": "Project 1 - Unified Dataset",
        "externalId": "Project1",
        "resourceId": "1",
    }

    unified_dataset_json = {
        "id": "unify://unified-data/v1/datasets/8",
        "name": "Project_1_unified_dataset",
        "version": "10",
        "relativeId": "datasets/8",
        "externalId": "Project_1_unified_dataset",
    }

    rcwd_json = {
        "externalId": "1",
        "id": "unify://unified-data/v1/datasets/36",
        "name": "Project_1_unified_dataset_dedup_clusters_with_data",
        "relativeId": "datasets/36",
        "version": "251",
    }

    datasets_json = [rcwd_json]

    unify = Client(UsernamePasswordAuth("username", "password"))

    project_id = "1"

    project_url = f"http://localhost:9100/api/versioned/v1/projects/{project_id}"
    unified_dataset_url = (
        f"http://localhost:9100/api/versioned/v1/projects/{project_id}/unifiedDataset"
    )
    datasets_url = f"http://localhost:9100/api/versioned/v1/datasets"

    responses.add(responses.GET, project_url, json=project_config)
    responses.add(responses.GET,
                  unified_dataset_url,
                  json=unified_dataset_json)
    responses.add(responses.GET, datasets_url, json=datasets_json)
    project = unify.projects.by_resource_id(project_id)
    actual_rcwd_dataset = project.as_mastering().record_clusters_with_data()
    assert actual_rcwd_dataset.name == rcwd_json["name"]
def test_client_repr():
    auth = UsernamePasswordAuth("username", "password")

    unify = Client(auth)
    rstr = f"{unify!r}"

    assert rstr.startswith("tamr_unify_client.client.Client(")
    assert "http" in rstr
    assert rstr.endswith(")")
    assert "password" not in rstr

    unify = Client(auth, protocol="http", port=1234, base_path="foo/bar")
    rstr = f"{unify!r}"

    assert "'http'" in rstr
    assert "1234" in rstr
    assert "foo/bar" in rstr
def test_continuous_categorization():
    auth = UsernamePasswordAuth("username", "password")
    unify = Client(auth)

    project_id = "3"
    project = unify.projects.by_resource_id(project_id)
    project = project.as_categorization()

    unified_dataset = project.unified_dataset()
    op = unified_dataset.refresh(poll_interval_seconds=0)
    assert op.succeeded()

    model = project.model()
    op = model.train(poll_interval_seconds=0)
    assert op.succeeded()

    op = model.predict(poll_interval_seconds=0)
    assert op.succeeded()
    def test_profile_refresh(self):
        auth = UsernamePasswordAuth("username", "password")
        client = Client(auth)

        dataset_id = "3"
        dataset_url = f"{client.protocol}://{client.host}:{client.port}/api/versioned/v1/datasets/{dataset_id}"
        profile_url = f"{dataset_url}/profile"
        profile_refresh_url = f"{profile_url}:refresh"
        responses.add(responses.GET, dataset_url, json={})
        responses.add(responses.GET, profile_url, json=self.profile_stale)
        responses.add(responses.POST,
                      profile_refresh_url,
                      json=self.operation_succeeded)

        dataset = client.datasets.by_resource_id(dataset_id)
        profile = dataset.profile()
        op = profile.refresh()
        self.assertTrue(op.succeeded())
def test_dataset_status_repr():
    client = Client(UsernamePasswordAuth("username", "password"))
    data = {
        "relativeId": "path/to/thing/1",
        "datasetName": "testdsname",
        "relativeDatasetId": "path/to/data/1",
        "isStreamable": True,
    }
    status = DatasetStatus.from_json(client, data)
    full_clz_name = "tamr_unify_client.models.dataset_status.DatasetStatus"

    rstr = f"{status!r}"

    assert rstr.startswith(f"{full_clz_name}(")
    assert "testdsname" in rstr
    assert "True" in rstr
    assert "path/to/thing" in rstr
    assert rstr.endswith(")")
    def test_profile_create(self):
        auth = UsernamePasswordAuth("username", "password")
        client = Client(auth)

        dataset_id = "3"
        dataset_url = f"{client.protocol}://{client.host}:{client.port}/api/versioned/v1/datasets/{dataset_id}"
        profile_url = f"{dataset_url}/profile"
        profile_refresh_url = f"{profile_url}:refresh"
        responses.add(responses.GET, dataset_url, json={})
        # We need to ensure that, when creating the profile,
        # nothing ever tries to access the (non-existent) profile.
        responses.add(responses.GET, profile_url, status=404)
        responses.add(responses.POST,
                      profile_refresh_url,
                      json=self.operation_succeeded)

        dataset = client.datasets.by_resource_id(dataset_id)
        op = dataset.create_profile()
        self.assertTrue(op.succeeded())
def test_client_repr():
    auth = UsernamePasswordAuth("username", "password")
    unify = Client(auth)
    full_clz_name = "tamr_unify_client.client.Client"

    rstr = f"{unify!r}"

    assert rstr.startswith(f"{full_clz_name}(")
    assert "http" in rstr
    assert "password" not in rstr
    assert rstr.endswith(")")

    # further testing when Client has optional arguments
    unify = Client(auth, protocol="http", port=1234, base_path="foo/bar")
    rstr = f"{unify!r}"

    assert "'http'" in rstr
    assert "1234" in rstr
    assert "foo/bar" in rstr
예제 #25
0
def test_request_session_cookie():
    endpoint = "http://localhost:9100/api/versioned/v1/test"
    responses.add(responses.GET, endpoint, json={})

    session = requests.Session()
    cookie = requests.cookies.create_cookie(
        name="test_cookie", value="the-cookie-works"
    )
    session.cookies.set_cookie(cookie)

    client = Client(UsernamePasswordAuth("username", "password"), session=session)

    assert client.session is session

    endpoint = "test"
    client.get(endpoint)

    assert len(responses.calls) == 1
    req = responses.calls[0].request
    assert req.url.endswith("test")
    assert req.headers.get("Cookie") is not None
    assert "test_cookie=" in req.headers.get("Cookie")
예제 #26
0
def create(
    *,
    username: str,
    password: str,
    host: str,
    port: Optional[Union[str, int]] = 9100,
    protocol: str = "http",
    store_auth_cookie: bool = False,
    enforce_healthy: bool = False,
) -> Client:
    """Creates a Tamr client from the provided configuration values

    Args:
        username: The username to log access Tamr as
        password: the password for the user
        host: The ip address of Tamr
        port: The port of the Tamr UI. Pass a value of `None` to specify an address with no port
        protocol: https or http
        store_auth_cookie: If true will allow Tamr authentication cookie to be stored and reused
        enforce_healthy: If true will enforce a healthy state upon creation

    Returns:
        Tamr client
    """
    full_address = f"{protocol}://{host}:{port}" if port is not None else f"{protocol}://{host}"
    LOGGER.info(f"Creating client as user {username} at {full_address}.")
    client = Client(
        auth=UsernamePasswordAuth(username=username, password=password),
        host=host,
        port=int(port) if port is not None else None,
        protocol=protocol,
        store_auth_cookie=store_auth_cookie,
    )
    if enforce_healthy:
        if not health_check(client):
            LOGGER.error(f"Tamr is not healthy. Check logs and Tamr.")
            raise SystemError("Tamr is not healthy. Check logs and Tamr.")
    return client
예제 #27
0
def create(
    *,
    username: str,
    password: str,
    host: str,
    port: str = "9100",
    protocol: str = "http",
    enforce_healthy: bool = False,
) -> Client:
    """Creates a Tamr client from the provided configuration values

    Args:
        username: The username to log access Tamr as
        password: the password for the user
        host: The ip address of Tamr
        port: The port of the Tamr UI
        protocol: https or http
        enforce_healthy: If true will enforce a healthy state upon creation

    Returns:
        Tamr client
    """
    LOGGER.info(
        f"Creating client as user {username} at {protocol}://{host}:{port}.")
    client = Client(
        auth=UsernamePasswordAuth(username=username, password=password),
        host=host,
        port=int(port),
        protocol=protocol,
    )
    healthy_status = health_check(client)
    if healthy_status or not enforce_healthy:
        return client
    else:
        LOGGER.error(f"Tamr is not healthy. Check logs and Tamr.")
        raise SystemError("Tamr is not healthy. Check logs and Tamr.")
예제 #28
0
from functools import partial
import json

import responses

from tamr_unify_client import Client
from tamr_unify_client.auth import UsernamePasswordAuth
from tamr_unify_client.project.resource import ProjectSpec

auth = UsernamePasswordAuth("username", "password")
tamr = Client(auth)

creation_spec = {
    "name": "Project 1",
    "description": "Mastering Project",
    "type": "DEDUP",
    "unifiedDatasetName": "Project 1 - Unified Dataset",
    "externalId": "Project1",
}

project_json = {
    **creation_spec,
    "id": "unify://unified-data/v1/projects/1",
    "created": {
        "username": "******",
        "time": "2018-09-10T16:06:20.636Z",
        "version": "project 1 created version",
    },
    "lastModified": {
        "username": "******",
        "time": "2018-09-10T16:06:20.851Z",
예제 #29
0
 def setUp(self):
     auth = UsernamePasswordAuth("username", "password")
     self.tamr = Client(auth)
예제 #30
0
def test_binning_model_update_records():

    records_body = [
        {
            "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bb8"],
            "isActive": ["true"],
            "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"],
            "similarityFunction": ["COSINE"],
            "tokenizer": ["DEFAULT"],
            "fieldName": ["surname"],
            "threshold": ["0.75"],
        },
        {
            "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bc9"],
            "isActive": ["true"],
            "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"],
            "similarityFunction": ["COSINE"],
            "tokenizer": ["DEFAULT"],
            "fieldName": ["surname"],
            "threshold": ["0.75"],
        },
        {
            "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bd8"],
            "isActive": ["true"],
            "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"],
            "similarityFunction": ["COSINE"],
            "tokenizer": ["DEFAULT"],
            "fieldName": ["surname"],
            "threshold": ["0.75"],
        },
    ]

    expected_updates = [
        {
            "action": "CREATE",
            "recordId": "d8b7351d-24ce-49aa-8655-5b5809ab6bb8",
            "record": {
                "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bb8"],
                "isActive": ["true"],
                "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"],
                "similarityFunction": ["COSINE"],
                "tokenizer": ["DEFAULT"],
                "fieldName": ["surname"],
                "threshold": ["0.75"],
            },
        },
        {
            "action": "CREATE",
            "recordId": "d8b7351d-24ce-49aa-8655-5b5809ab6bc9",
            "record": {
                "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bc9"],
                "isActive": ["true"],
                "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"],
                "similarityFunction": ["COSINE"],
                "tokenizer": ["DEFAULT"],
                "fieldName": ["surname"],
                "threshold": ["0.75"],
            },
        },
        {
            "action": "CREATE",
            "recordId": "d8b7351d-24ce-49aa-8655-5b5809ab6bd8",
            "record": {
                "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bd8"],
                "isActive": ["true"],
                "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"],
                "similarityFunction": ["COSINE"],
                "tokenizer": ["DEFAULT"],
                "fieldName": ["surname"],
                "threshold": ["0.75"],
            },
        },
    ]

    snoop_dict = {}

    def update_callback(request, snoop):
        snoop["payload"] = request.body
        return 200, {}, "{}"

    update_records_url = (
        f"http://localhost:9100/api/versioned/v1/projects/1/binningModel/records"
    )

    responses.add(responses.GET, project_url, json=project_config)

    responses.add_callback(
        responses.POST,
        update_records_url,
        callback=partial(update_callback, snoop=snoop_dict),
    )

    tamr = Client(UsernamePasswordAuth("username", "password"))

    project = tamr.projects.by_resource_id("1").as_mastering()
    binning_model = project.binning_model()

    updates = [{
        "action": "CREATE",
        "recordId": record["id"][0],
        "record": record
    } for record in records_body]

    binning_model.update_records(updates)
    actual = [json.loads(item) for item in snoop_dict["payload"]]
    assert expected_updates == actual