def test_binning_model_records(): records_body = [{ "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bb8"], "isActive": ["true"], "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"], "similarityFunction": ["COSINE"], "tokenizer": ["DEFAULT"], "fieldName": ["surname"], "threshold": ["0.75"], }] records_url = ( f"http://localhost:9100/api/versioned/v1/projects/1/binningModel/records" ) responses.add(responses.GET, project_url, json=project_config) responses.add( responses.GET, records_url, body="\n".join(json.dumps(body) for body in records_body), ) tamr = Client(UsernamePasswordAuth("username", "password")) project = tamr.projects.by_resource_id("1").as_mastering() binning_model = project.binning_model() binning_model_records = list(binning_model.records()) assert binning_model_records == records_body
def client(): from tamr_unify_client import Client from tamr_unify_client.auth import UsernamePasswordAuth auth = UsernamePasswordAuth("username", "password") tamr = Client(auth) return tamr
def get_connect_session(connect_info: Client) -> requests.Session: """Returns an authenticated session using Tamr credentials from configuration. Raises an exception if df_connect is not installed or running correctly. Args: connect_info: An instance of a Client object Returns: An authenticated session Raises: RuntimeError: if the a connection to df_connect cannot be established """ auth = UsernamePasswordAuth(connect_info.tamr_username, connect_info.tamr_password) s = requests.Session() s.auth = auth s.headers.update({"Content-type": "application/json"}) s.headers.update({"Accept": "application/json"}) # test that df_connect is running properly url = _get_url(connect_info, "/api/service/health") try: r = s.get(url) r.raise_for_status() except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError): raise RuntimeError( f"Tamr auxiliary service df-df_connect is either not running or not healthy at {url}!" f" Did you install it? Df-connect does not come with default Tamr installation." f" Check its status and your configuration." ) return s
def test_continuous_mastering(): auth = UsernamePasswordAuth("username", "password") unify = Client(auth) project_id = "1" project = unify.projects.by_resource_id(project_id) project = project.as_mastering() unified_dataset = project.unified_dataset() op = unified_dataset.refresh(poll_interval_seconds=0) assert op.succeeded() op = project.pairs().refresh(poll_interval_seconds=0) assert op.succeeded() model = project.pair_matching_model() op = model.train(poll_interval_seconds=0) assert op.succeeded() op = model.predict(poll_interval_seconds=0) assert op.succeeded() op = project.record_clusters().refresh(poll_interval_seconds=0) assert op.succeeded() op = project.published_clusters().refresh(poll_interval_seconds=0) assert op.succeeded()
def test_dataset_profile(self): auth = UsernamePasswordAuth("username", "password") client = Client(auth) dataset_id = "3" dataset_url = f"{client.protocol}://{client.host}:{client.port}/api/versioned/v1/datasets/{dataset_id}" profile_url = f"{dataset_url}/profile" responses.add(responses.GET, dataset_url, json={}) responses.add(responses.GET, profile_url, json=self.profile_stale) dataset = client.datasets.by_resource_id(dataset_id) profile = dataset.profile() self.assertEqual(self.profile_stale["datasetName"], profile.dataset_name) self.assertEqual(self.profile_stale["relativeDatasetId"], profile.relative_dataset_id) self.assertEqual(self.profile_stale["isUpToDate"], profile.is_up_to_date) self.assertEqual(self.profile_stale["profiledDataVersion"], profile.profiled_data_version) self.assertEqual(self.profile_stale["profiledAt"], profile.profiled_at) self.assertEqual(self.profile_stale["simpleMetrics"], profile.simple_metrics) self.assertEqual(self.profile_stale["attributeProfiles"], profile.attribute_profiles)
def post(self): """Returns list of projects""" # First parse the header for credentials: try: encoded_auth_header_bytes = request.headers['Authorization'].split( )[1].encode('utf-8') username, password = b64decode(encoded_auth_header_bytes).decode( 'utf-8').split(':') if username != my_app_username or password != my_app_password: raise ValueError() # this is too broad except that if anything above breaks it means authorization isn't correct except Exception: resp = app.response_class(response=json.dumps( "credentials are required to access this resource."), status=401, content_type='application/json') return resp host = request.form['Host'] user = request.form['Username'] password = request.form['Password'] auth = UsernamePasswordAuth(user, password) protocol = request.form['Protocol'] port = request.form['Port'] tamr = Client(auth, host=host, protocol=protocol, port=port) projects = get_all_project_names(tamr) return {'projects': projects}, 200
def test_request_absolute_endpoint(): endpoint = "/api/service/health" full_url = f"http://localhost:9100{endpoint}" responses.add(responses.GET, full_url, json={}) client = Client(UsernamePasswordAuth("username", "password")) # If client does not properly handle absolute paths, client.get() will # raise a ConnectionRefused exception. client.get(endpoint)
def test_project_by_name(self): responses.add(responses.GET, self.project_list_url, json=self.project_json) auth = UsernamePasswordAuth("username", "password") tamr = Client(auth) actual_project = tamr.projects.by_name(self.project_name) assert actual_project._data == self.project_json[0]
def test_dataset_collection_repr(): client = Client(UsernamePasswordAuth("username", "password")) full_clz_name = "tamr_unify_client.models.dataset.collection.DatasetCollection" rstr = f"{client.datasets!r}" assert rstr.startswith(f"{full_clz_name}(") assert "api_path='datasets'" in rstr assert rstr.endswith(")")
def test_username_auth_repr(): auth = UsernamePasswordAuth("myusername", "SECRET") full_clz_name = "tamr_unify_client.auth.username_password.UsernamePasswordAuth" rstr = f"{auth!r}" assert rstr.startswith(f"{full_clz_name}(") assert "myusername" in rstr assert "SECRET" not in rstr assert rstr.endswith(")")
def test_http_error(): """Ensure that the client surfaces HTTP errors as exceptions. """ endpoint = f"http://localhost:9100/api/versioned/v1/projects/1" responses.add(responses.GET, endpoint, status=401) auth = UsernamePasswordAuth("nonexistent-username", "invalid-password") unify = Client(auth) with raises(HTTPError) as e: unify.projects.by_resource_id("1") assert f"401 Client Error: Unauthorized for url: {endpoint}" in str(e)
def test_client_create(): my_client = utils.client.create(**CONFIG["my_instance_name"]) assert my_client.host == CONFIG["my_instance_name"]["host"] assert my_client.port == int(CONFIG["my_instance_name"]["port"]) assert my_client.protocol == CONFIG["my_instance_name"]["protocol"] assert my_client.base_path == "/api/versioned/v1/" assert my_client.auth == UsernamePasswordAuth( "admin", os.environ["TAMR_TOOLBOX_PASSWORD"], )
def test_dataset_status(): dataset_id = "1" dataset_url = f"http://localhost:9100/api/versioned/v1/datasets/{dataset_id}" status_url = f"{dataset_url}/status" responses.add(responses.GET, dataset_url, json={}) responses.add(responses.GET, status_url, json=status_json) auth = UsernamePasswordAuth("username", "password") tamr = Client(auth) dataset = tamr.datasets.by_resource_id(dataset_id) status = dataset.status() assert status._data == status_json
def test_dataset_records(): dataset_id = "1" dataset_url = f"http://localhost:9100/api/versioned/v1/datasets/{dataset_id}" records_url = f"{dataset_url}/records" responses.add(responses.GET, dataset_url, json={}) responses.add(responses.GET, records_url, body='{"attribute1": 1}\n{"attribute1": 2}') auth = UsernamePasswordAuth("username", "password") unify = Client(auth) dataset = unify.datasets.by_resource_id(dataset_id) records = list(dataset.records()) assert records == [{"attribute1": 1}, {"attribute1": 2}]
def test_dataset_profile(): dataset_id = "3" dataset_url = f"http://localhost:9100/api/versioned/v1/datasets/{dataset_id}" profile_url = f"{dataset_url}/profile" profile_refresh_url = f"{profile_url}:refresh" responses.add(responses.GET, dataset_url, json={}) responses.add(responses.GET, profile_url, json=profile_json1) responses.add(responses.POST, profile_refresh_url, json=[], status=204) auth = UsernamePasswordAuth("username", "password") unify = Client(auth) dataset = unify.datasets.by_resource_id(dataset_id) profile = dataset.profile() assert profile._data == profile_json1
def test_continuous_mastering(): auth = UsernamePasswordAuth("username", "password") unify = Client(auth) project_id = "1" project = unify.projects.by_resource_id(project_id) project = project.as_mastering() unified_dataset = project.unified_dataset() op = unified_dataset.refresh(poll_interval_seconds=0) assert op.succeeded() op = project.pairs().refresh(poll_interval_seconds=0) assert op.succeeded() model = project.pair_matching_model() op = model.train(poll_interval_seconds=0) assert op.succeeded() op = model.predict(poll_interval_seconds=0) assert op.succeeded() op = project.record_clusters().refresh(poll_interval_seconds=0) assert op.succeeded() op = project.published_clusters().refresh(poll_interval_seconds=0) assert op.succeeded() estimate_url = ( f"http://localhost:9100/api/versioned/v1/projects/1/estimatedPairCounts" ) estimate_json = { "isUpToDate": "true", "totalEstimate": {"candidatePairCount": "200", "generatedPairCount": "100"}, "clauseEstimates": { "clause1": {"candidatePairCount": "50", "generatedPairCount": "25"}, "clause2": {"candidatePairCount": "50", "generatedPairCount": "25"}, "clause3": {"candidatePairCount": "100", "generatedPairCount": "50"}, }, } responses.add(responses.GET, estimate_url, json=estimate_json) status = project.estimate_pairs().is_up_to_date assert status candidate = project.estimate_pairs().total_estimate["candidatePairCount"] assert candidate == "200" clause1 = project.estimate_pairs().clause_estimates["clause1"] assert clause1["generatedPairCount"] == "25"
def test_project_add_source_dataset(): responses.add(responses.GET, datasets_url, json=dataset_json) responses.add(responses.GET, projects_url, json=project_json) responses.add( responses.POST, input_datasets_url, json=post_input_datasets_json, status=204 ) responses.add(responses.GET, input_datasets_url, json=get_input_datasets_json) auth = UsernamePasswordAuth("username", "password") unify = Client(auth) dataset = unify.datasets.by_external_id(dataset_external_id) project = unify.projects.by_external_id(project_external_id) project.add_source_dataset(dataset) alias = project.api_path + "/inputDatasets" input_datasets = project.client.get(alias).successful().json() assert input_datasets == dataset_json
def test_record_clusters_with_data(): project_config = { "name": "Project 1", "description": "Mastering Project", "type": "DEDUP", "unifiedDatasetName": "Project 1 - Unified Dataset", "externalId": "Project1", "resourceId": "1", } unified_dataset_json = { "id": "unify://unified-data/v1/datasets/8", "name": "Project_1_unified_dataset", "version": "10", "relativeId": "datasets/8", "externalId": "Project_1_unified_dataset", } rcwd_json = { "externalId": "1", "id": "unify://unified-data/v1/datasets/36", "name": "Project_1_unified_dataset_dedup_clusters_with_data", "relativeId": "datasets/36", "version": "251", } datasets_json = [rcwd_json] unify = Client(UsernamePasswordAuth("username", "password")) project_id = "1" project_url = f"http://localhost:9100/api/versioned/v1/projects/{project_id}" unified_dataset_url = ( f"http://localhost:9100/api/versioned/v1/projects/{project_id}/unifiedDataset" ) datasets_url = f"http://localhost:9100/api/versioned/v1/datasets" responses.add(responses.GET, project_url, json=project_config) responses.add(responses.GET, unified_dataset_url, json=unified_dataset_json) responses.add(responses.GET, datasets_url, json=datasets_json) project = unify.projects.by_resource_id(project_id) actual_rcwd_dataset = project.as_mastering().record_clusters_with_data() assert actual_rcwd_dataset.name == rcwd_json["name"]
def test_client_repr(): auth = UsernamePasswordAuth("username", "password") unify = Client(auth) rstr = f"{unify!r}" assert rstr.startswith("tamr_unify_client.client.Client(") assert "http" in rstr assert rstr.endswith(")") assert "password" not in rstr unify = Client(auth, protocol="http", port=1234, base_path="foo/bar") rstr = f"{unify!r}" assert "'http'" in rstr assert "1234" in rstr assert "foo/bar" in rstr
def test_continuous_categorization(): auth = UsernamePasswordAuth("username", "password") unify = Client(auth) project_id = "3" project = unify.projects.by_resource_id(project_id) project = project.as_categorization() unified_dataset = project.unified_dataset() op = unified_dataset.refresh(poll_interval_seconds=0) assert op.succeeded() model = project.model() op = model.train(poll_interval_seconds=0) assert op.succeeded() op = model.predict(poll_interval_seconds=0) assert op.succeeded()
def test_profile_refresh(self): auth = UsernamePasswordAuth("username", "password") client = Client(auth) dataset_id = "3" dataset_url = f"{client.protocol}://{client.host}:{client.port}/api/versioned/v1/datasets/{dataset_id}" profile_url = f"{dataset_url}/profile" profile_refresh_url = f"{profile_url}:refresh" responses.add(responses.GET, dataset_url, json={}) responses.add(responses.GET, profile_url, json=self.profile_stale) responses.add(responses.POST, profile_refresh_url, json=self.operation_succeeded) dataset = client.datasets.by_resource_id(dataset_id) profile = dataset.profile() op = profile.refresh() self.assertTrue(op.succeeded())
def test_dataset_status_repr(): client = Client(UsernamePasswordAuth("username", "password")) data = { "relativeId": "path/to/thing/1", "datasetName": "testdsname", "relativeDatasetId": "path/to/data/1", "isStreamable": True, } status = DatasetStatus.from_json(client, data) full_clz_name = "tamr_unify_client.models.dataset_status.DatasetStatus" rstr = f"{status!r}" assert rstr.startswith(f"{full_clz_name}(") assert "testdsname" in rstr assert "True" in rstr assert "path/to/thing" in rstr assert rstr.endswith(")")
def test_profile_create(self): auth = UsernamePasswordAuth("username", "password") client = Client(auth) dataset_id = "3" dataset_url = f"{client.protocol}://{client.host}:{client.port}/api/versioned/v1/datasets/{dataset_id}" profile_url = f"{dataset_url}/profile" profile_refresh_url = f"{profile_url}:refresh" responses.add(responses.GET, dataset_url, json={}) # We need to ensure that, when creating the profile, # nothing ever tries to access the (non-existent) profile. responses.add(responses.GET, profile_url, status=404) responses.add(responses.POST, profile_refresh_url, json=self.operation_succeeded) dataset = client.datasets.by_resource_id(dataset_id) op = dataset.create_profile() self.assertTrue(op.succeeded())
def test_client_repr(): auth = UsernamePasswordAuth("username", "password") unify = Client(auth) full_clz_name = "tamr_unify_client.client.Client" rstr = f"{unify!r}" assert rstr.startswith(f"{full_clz_name}(") assert "http" in rstr assert "password" not in rstr assert rstr.endswith(")") # further testing when Client has optional arguments unify = Client(auth, protocol="http", port=1234, base_path="foo/bar") rstr = f"{unify!r}" assert "'http'" in rstr assert "1234" in rstr assert "foo/bar" in rstr
def test_request_session_cookie(): endpoint = "http://localhost:9100/api/versioned/v1/test" responses.add(responses.GET, endpoint, json={}) session = requests.Session() cookie = requests.cookies.create_cookie( name="test_cookie", value="the-cookie-works" ) session.cookies.set_cookie(cookie) client = Client(UsernamePasswordAuth("username", "password"), session=session) assert client.session is session endpoint = "test" client.get(endpoint) assert len(responses.calls) == 1 req = responses.calls[0].request assert req.url.endswith("test") assert req.headers.get("Cookie") is not None assert "test_cookie=" in req.headers.get("Cookie")
def create( *, username: str, password: str, host: str, port: Optional[Union[str, int]] = 9100, protocol: str = "http", store_auth_cookie: bool = False, enforce_healthy: bool = False, ) -> Client: """Creates a Tamr client from the provided configuration values Args: username: The username to log access Tamr as password: the password for the user host: The ip address of Tamr port: The port of the Tamr UI. Pass a value of `None` to specify an address with no port protocol: https or http store_auth_cookie: If true will allow Tamr authentication cookie to be stored and reused enforce_healthy: If true will enforce a healthy state upon creation Returns: Tamr client """ full_address = f"{protocol}://{host}:{port}" if port is not None else f"{protocol}://{host}" LOGGER.info(f"Creating client as user {username} at {full_address}.") client = Client( auth=UsernamePasswordAuth(username=username, password=password), host=host, port=int(port) if port is not None else None, protocol=protocol, store_auth_cookie=store_auth_cookie, ) if enforce_healthy: if not health_check(client): LOGGER.error(f"Tamr is not healthy. Check logs and Tamr.") raise SystemError("Tamr is not healthy. Check logs and Tamr.") return client
def create( *, username: str, password: str, host: str, port: str = "9100", protocol: str = "http", enforce_healthy: bool = False, ) -> Client: """Creates a Tamr client from the provided configuration values Args: username: The username to log access Tamr as password: the password for the user host: The ip address of Tamr port: The port of the Tamr UI protocol: https or http enforce_healthy: If true will enforce a healthy state upon creation Returns: Tamr client """ LOGGER.info( f"Creating client as user {username} at {protocol}://{host}:{port}.") client = Client( auth=UsernamePasswordAuth(username=username, password=password), host=host, port=int(port), protocol=protocol, ) healthy_status = health_check(client) if healthy_status or not enforce_healthy: return client else: LOGGER.error(f"Tamr is not healthy. Check logs and Tamr.") raise SystemError("Tamr is not healthy. Check logs and Tamr.")
from functools import partial import json import responses from tamr_unify_client import Client from tamr_unify_client.auth import UsernamePasswordAuth from tamr_unify_client.project.resource import ProjectSpec auth = UsernamePasswordAuth("username", "password") tamr = Client(auth) creation_spec = { "name": "Project 1", "description": "Mastering Project", "type": "DEDUP", "unifiedDatasetName": "Project 1 - Unified Dataset", "externalId": "Project1", } project_json = { **creation_spec, "id": "unify://unified-data/v1/projects/1", "created": { "username": "******", "time": "2018-09-10T16:06:20.636Z", "version": "project 1 created version", }, "lastModified": { "username": "******", "time": "2018-09-10T16:06:20.851Z",
def setUp(self): auth = UsernamePasswordAuth("username", "password") self.tamr = Client(auth)
def test_binning_model_update_records(): records_body = [ { "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bb8"], "isActive": ["true"], "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"], "similarityFunction": ["COSINE"], "tokenizer": ["DEFAULT"], "fieldName": ["surname"], "threshold": ["0.75"], }, { "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bc9"], "isActive": ["true"], "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"], "similarityFunction": ["COSINE"], "tokenizer": ["DEFAULT"], "fieldName": ["surname"], "threshold": ["0.75"], }, { "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bd8"], "isActive": ["true"], "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"], "similarityFunction": ["COSINE"], "tokenizer": ["DEFAULT"], "fieldName": ["surname"], "threshold": ["0.75"], }, ] expected_updates = [ { "action": "CREATE", "recordId": "d8b7351d-24ce-49aa-8655-5b5809ab6bb8", "record": { "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bb8"], "isActive": ["true"], "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"], "similarityFunction": ["COSINE"], "tokenizer": ["DEFAULT"], "fieldName": ["surname"], "threshold": ["0.75"], }, }, { "action": "CREATE", "recordId": "d8b7351d-24ce-49aa-8655-5b5809ab6bc9", "record": { "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bc9"], "isActive": ["true"], "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"], "similarityFunction": ["COSINE"], "tokenizer": ["DEFAULT"], "fieldName": ["surname"], "threshold": ["0.75"], }, }, { "action": "CREATE", "recordId": "d8b7351d-24ce-49aa-8655-5b5809ab6bd8", "record": { "id": ["d8b7351d-24ce-49aa-8655-5b5809ab6bd8"], "isActive": ["true"], "clauseId": ["2e6c5f1b-ed49-40ab-8cbb-350aded25070"], "similarityFunction": ["COSINE"], "tokenizer": ["DEFAULT"], "fieldName": ["surname"], "threshold": ["0.75"], }, }, ] snoop_dict = {} def update_callback(request, snoop): snoop["payload"] = request.body return 200, {}, "{}" update_records_url = ( f"http://localhost:9100/api/versioned/v1/projects/1/binningModel/records" ) responses.add(responses.GET, project_url, json=project_config) responses.add_callback( responses.POST, update_records_url, callback=partial(update_callback, snoop=snoop_dict), ) tamr = Client(UsernamePasswordAuth("username", "password")) project = tamr.projects.by_resource_id("1").as_mastering() binning_model = project.binning_model() updates = [{ "action": "CREATE", "recordId": record["id"][0], "record": record } for record in records_body] binning_model.update_records(updates) actual = [json.loads(item) for item in snoop_dict["payload"]] assert expected_updates == actual