def test_create(): s = fake.session() dataset = fake.dataset() attrs = tuple( [ tc.SubAttribute( name=str(i), is_nullable=True, type=tc.attribute.type.Array(tc.attribute.type.STRING), ) for i in range(4) ] ) attr = tc.attribute.create( s, dataset, name="attr", is_nullable=False, type=tc.attribute.type.Record(attributes=attrs), ) assert attr.name == "attr" assert not attr.is_nullable assert isinstance(attr.type, tc.attribute.type.Record) assert attr.type.attributes == attrs
def test_by_name(): s = fake.session() instance = fake.instance() project = tc.project.by_name(s, instance, "proj") assert project.name == "proj" assert project.description == "Mastering Project"
def test_replace_all_errors(): s = fake.session() project = fake.mastering_project() transforms = fake.transforms() with pytest.raises(HTTPError): tc.transformations.replace_all(s, project, transforms)
def test_operation_check_success(): s = fake.session() url = tc.URL(path="operations/1") op_json = utils.load_json("operation_succeeded.json") op = tc.operation._from_json(url, op_json) tc.operation.check(s, op)
def test_manual_labels(): s = fake.session() instance = fake.instance() project = fake.categorization_project() tc.categorization.project.manual_labels(session=s, instance=instance, project=project)
def test_by_resource_id_mastering(): s = fake.session() instance = fake.instance() project = tc.project.by_resource_id(s, instance, "1") assert isinstance(project, tc.MasteringProject) assert project.name == "proj" assert project.description == "Mastering Project"
def test_by_resource_id(): s = fake.session() instance = fake.instance() resource_id = "2020-08-17_21-32-10-961" tc.backup.by_resource_id(session=s, instance=instance, resource_id=resource_id)
def test_create_no_primary_key(): s = fake.session() instance = fake.instance() df = pd.DataFrame(_records_with_keys_json_2) with pytest.raises(tc.primary_key.NotFound): tc.dataframe.create(s, instance, df, name="df_dataset")
def test_replace_all(): s = fake.session() project = fake.mastering_project() transforms = fake.transforms() transforms.unified_scope.append("//extra TX") transforms.input_scope.pop(1) tc.transformations.replace_all(s, project, transforms)
def test_by_name(): s = fake.session() instance = fake.instance() dataset = tc.dataset.by_name(s, instance, "dataset 1 name") assert dataset.name == "dataset 1 name" assert dataset.description == "dataset 1 description" assert dataset.key_attribute_names == ("tamr_id",)
def test_from_resource_id(): s = fake.session() instance = fake.instance() dataset = tc.dataset.from_resource_id(s, instance, "1") assert dataset.name == "dataset 1 name" assert dataset.description == "dataset 1 description" assert dataset.key_attribute_names == ("tamr_id", )
def test_upsert_infer_primary_key(): s = fake.session() dataset = fake.dataset() df = pd.DataFrame(_records_json) response = tc.dataframe.upsert(s, dataset, df) assert response == _response_json
def test_update(): s = fake.session() attr = fake.attribute() updated_attr = tc.attribute.update( s, attr, description="Synthetic row number updated") assert updated_attr.description == "Synthetic row number updated"
def test_from_project(): s = fake.session() project = fake.mastering_project() unified_dataset = tc.dataset.unified.from_project(s, project) assert unified_dataset.name == "dataset 1 name" assert unified_dataset.description == "dataset 1 description" assert unified_dataset.key_attribute_names == ("tamr_id",)
def test_by_resource_id_categorization(): s = fake.session() instance = fake.instance() project = tc.project.by_resource_id(s, instance, "2") assert isinstance(project, tc.CategorizationProject) assert project.name == "Party Categorization" assert project.description == "Categorizes organization at the Party/Domestic level"
def test_delete_primary_key_not_found(): s = fake.session() dataset = fake.dataset() with pytest.raises(tc.primary_key.NotFound): tc.record.delete(s, dataset, _records_json, primary_key_name="wrong_primary_key")
def test_delete(): s = fake.session() dataset = fake.dataset() response = tc.record.delete(s, dataset, _records_json, primary_key_name="primary_key") assert response == _response_json
def test_from_project_dataset_not_found(): s = fake.session() instance = fake.instance() project = fake.mastering_project() url = tc.URL(path="projects/1/unifiedDataset") responses.add(responses.GET, str(url), status=404) with pytest.raises(tc.dataset.unified.NotFound): tc.dataset.unified.from_project(s, instance, project)
def test_operation_failed_success(): s = fake.session() url = tc.URL(path="operations/1") op_json = utils.load_json("operation_failed.json") op = tc.operation._from_json(url, op_json) with pytest.raises(tc.operation.Failed) as exc_info: tc.operation.check(s, op) err_msg = str(exc_info.value) assert str(url) in err_msg assert op.status is not None and str(op.status["state"]) in err_msg
def test_upsert_primary_key_not_found(): s = fake.session() dataset = fake.dataset() df = pd.DataFrame(_records_json) with pytest.raises(tc.primary_key.NotFound): tc.dataframe.upsert(s, dataset, df, primary_key_name="wrong_primary_key")
def test_update(): s = fake.session() dataset = fake.dataset() updates = [ tc.record._create_command(record, primary_key_name="primary_key") for record in _records_json ] response = tc.record._update(s, dataset, updates) assert response == _response_json
def test_upsert_index_column_name_collision(): s = fake.session() dataset = fake.dataset() df = pd.DataFrame(_records_json_2) df.index.name = "primary_key" # create column in `df` with same name as index and matching "primary_key" df.insert(0, df.index.name, df.index) with pytest.raises(tc.primary_key.Ambiguous): tc.dataframe.upsert(s, dataset, df, primary_key_name="primary_key")
def test_create_handle_record_failure(): s = fake.session() instance = fake.instance() df = pd.DataFrame(_records_with_keys_json_2) with pytest.raises(tc.dataframe.CreationFailure): tc.dataframe.create(s, instance, df, name="df_dataset", primary_key_name="primary_key")
def test_create_dataset_already_exists(): s = fake.session() instance = fake.instance() with pytest.raises(tc.dataset.AlreadyExists): tc.dataset.create( s, instance, name="new dataset", key_attribute_names=("primary_key",), description="a new dataset", )
def test_get_all_filter(): s = fake.session() instance = fake.instance() all_projects = tc.project.get_all( s, instance, filter="description==Categorization Project") assert len(all_projects) == 1 project = all_projects[0] assert isinstance(project, tc.CategorizationProject) assert project.name == "project 2" assert project.description == "Categorization Project"
def test_create_project_already_exists(): s = fake.session() instance = fake.instance() with pytest.raises(tc.project.AlreadyExists): tc.project._create( s, instance, name="New Mastering Project", project_type="DEDUP", description="A Mastering Project", )
def test_update_cluster_results_async(): s = fake.session() project = fake.mastering_project() op = tc.mastering._update_cluster_results_async(s, project) assert op.type == "SPARK" assert op.description == "Materialize views to Elastic" assert op.status == { "state": "PENDING", "startTime": "", "endTime": "", "message": "Job has not yet been submitted to Spark", }
def test_estimate_pairs_async(): s = fake.session() project = fake.mastering_project() op = tc.mastering._estimate_pairs_async(s, project) assert op.type == "SPARK" assert op.description == "operation 1 description" assert op.status == { "state": "PENDING", "startTime": "", "endTime": "", "message": "Job has not yet been submitted to Spark", }
def test_apply_changes_async(): s = fake.session() unified_dataset = fake.unified_dataset() op = tc.dataset.unified._apply_changes_async(s, unified_dataset) assert op.type == "SPARK" assert op.description == "operation 1 description" assert op.status == { "state": "PENDING", "startTime": "", "endTime": "", "message": "Job has not yet been submitted to Spark", }
def test_update_async(): s = fake.session() project = fake.golden_records_project() op = tc.golden_records._update_async(s, project) assert op.type == "SPARK" assert op.description == "Updating Golden Records" assert op.status == { "state": "PENDING", "startTime": "", "endTime": "", "message": "Job has not yet been submitted to Spark", }