Beispiel #1
0
def test_create():
    s = fake.session()
    dataset = fake.dataset()

    attrs = tuple(
        [
            tc.SubAttribute(
                name=str(i),
                is_nullable=True,
                type=tc.attribute.type.Array(tc.attribute.type.STRING),
            )
            for i in range(4)
        ]
    )

    attr = tc.attribute.create(
        s,
        dataset,
        name="attr",
        is_nullable=False,
        type=tc.attribute.type.Record(attributes=attrs),
    )

    assert attr.name == "attr"
    assert not attr.is_nullable
    assert isinstance(attr.type, tc.attribute.type.Record)
    assert attr.type.attributes == attrs
Beispiel #2
0
def test_by_name():
    s = fake.session()
    instance = fake.instance()

    project = tc.project.by_name(s, instance, "proj")
    assert project.name == "proj"
    assert project.description == "Mastering Project"
Beispiel #3
0
def test_replace_all_errors():
    s = fake.session()
    project = fake.mastering_project()
    transforms = fake.transforms()

    with pytest.raises(HTTPError):
        tc.transformations.replace_all(s, project, transforms)
Beispiel #4
0
def test_operation_check_success():
    s = fake.session()
    url = tc.URL(path="operations/1")
    op_json = utils.load_json("operation_succeeded.json")
    op = tc.operation._from_json(url, op_json)

    tc.operation.check(s, op)
Beispiel #5
0
def test_manual_labels():
    s = fake.session()
    instance = fake.instance()
    project = fake.categorization_project()

    tc.categorization.project.manual_labels(session=s,
                                            instance=instance,
                                            project=project)
Beispiel #6
0
def test_by_resource_id_mastering():
    s = fake.session()
    instance = fake.instance()

    project = tc.project.by_resource_id(s, instance, "1")
    assert isinstance(project, tc.MasteringProject)
    assert project.name == "proj"
    assert project.description == "Mastering Project"
Beispiel #7
0
def test_by_resource_id():
    s = fake.session()
    instance = fake.instance()
    resource_id = "2020-08-17_21-32-10-961"

    tc.backup.by_resource_id(session=s,
                             instance=instance,
                             resource_id=resource_id)
Beispiel #8
0
def test_create_no_primary_key():
    s = fake.session()
    instance = fake.instance()

    df = pd.DataFrame(_records_with_keys_json_2)

    with pytest.raises(tc.primary_key.NotFound):
        tc.dataframe.create(s, instance, df, name="df_dataset")
Beispiel #9
0
def test_replace_all():
    s = fake.session()
    project = fake.mastering_project()
    transforms = fake.transforms()

    transforms.unified_scope.append("//extra TX")
    transforms.input_scope.pop(1)
    tc.transformations.replace_all(s, project, transforms)
Beispiel #10
0
def test_by_name():
    s = fake.session()
    instance = fake.instance()

    dataset = tc.dataset.by_name(s, instance, "dataset 1 name")
    assert dataset.name == "dataset 1 name"
    assert dataset.description == "dataset 1 description"
    assert dataset.key_attribute_names == ("tamr_id",)
Beispiel #11
0
def test_from_resource_id():
    s = fake.session()
    instance = fake.instance()

    dataset = tc.dataset.from_resource_id(s, instance, "1")
    assert dataset.name == "dataset 1 name"
    assert dataset.description == "dataset 1 description"
    assert dataset.key_attribute_names == ("tamr_id", )
Beispiel #12
0
def test_upsert_infer_primary_key():
    s = fake.session()
    dataset = fake.dataset()

    df = pd.DataFrame(_records_json)

    response = tc.dataframe.upsert(s, dataset, df)
    assert response == _response_json
Beispiel #13
0
def test_update():
    s = fake.session()
    attr = fake.attribute()

    updated_attr = tc.attribute.update(
        s, attr, description="Synthetic row number updated")

    assert updated_attr.description == "Synthetic row number updated"
Beispiel #14
0
def test_from_project():
    s = fake.session()
    project = fake.mastering_project()

    unified_dataset = tc.dataset.unified.from_project(s, project)
    assert unified_dataset.name == "dataset 1 name"
    assert unified_dataset.description == "dataset 1 description"
    assert unified_dataset.key_attribute_names == ("tamr_id",)
Beispiel #15
0
def test_by_resource_id_categorization():
    s = fake.session()
    instance = fake.instance()

    project = tc.project.by_resource_id(s, instance, "2")
    assert isinstance(project, tc.CategorizationProject)
    assert project.name == "Party Categorization"
    assert project.description == "Categorizes organization at the Party/Domestic level"
Beispiel #16
0
def test_delete_primary_key_not_found():
    s = fake.session()
    dataset = fake.dataset()

    with pytest.raises(tc.primary_key.NotFound):
        tc.record.delete(s,
                         dataset,
                         _records_json,
                         primary_key_name="wrong_primary_key")
Beispiel #17
0
def test_delete():
    s = fake.session()
    dataset = fake.dataset()

    response = tc.record.delete(s,
                                dataset,
                                _records_json,
                                primary_key_name="primary_key")
    assert response == _response_json
Beispiel #18
0
def test_from_project_dataset_not_found():
    s = fake.session()
    instance = fake.instance()
    project = fake.mastering_project()

    url = tc.URL(path="projects/1/unifiedDataset")
    responses.add(responses.GET, str(url), status=404)

    with pytest.raises(tc.dataset.unified.NotFound):
        tc.dataset.unified.from_project(s, instance, project)
Beispiel #19
0
def test_operation_failed_success():
    s = fake.session()
    url = tc.URL(path="operations/1")
    op_json = utils.load_json("operation_failed.json")
    op = tc.operation._from_json(url, op_json)

    with pytest.raises(tc.operation.Failed) as exc_info:
        tc.operation.check(s, op)
    err_msg = str(exc_info.value)
    assert str(url) in err_msg
    assert op.status is not None and str(op.status["state"]) in err_msg
Beispiel #20
0
def test_upsert_primary_key_not_found():
    s = fake.session()
    dataset = fake.dataset()

    df = pd.DataFrame(_records_json)

    with pytest.raises(tc.primary_key.NotFound):
        tc.dataframe.upsert(s,
                            dataset,
                            df,
                            primary_key_name="wrong_primary_key")
Beispiel #21
0
def test_update():
    s = fake.session()
    dataset = fake.dataset()

    updates = [
        tc.record._create_command(record, primary_key_name="primary_key")
        for record in _records_json
    ]

    response = tc.record._update(s, dataset, updates)
    assert response == _response_json
Beispiel #22
0
def test_upsert_index_column_name_collision():
    s = fake.session()
    dataset = fake.dataset()

    df = pd.DataFrame(_records_json_2)
    df.index.name = "primary_key"

    # create column in `df` with same name as index and matching "primary_key"
    df.insert(0, df.index.name, df.index)

    with pytest.raises(tc.primary_key.Ambiguous):
        tc.dataframe.upsert(s, dataset, df, primary_key_name="primary_key")
Beispiel #23
0
def test_create_handle_record_failure():
    s = fake.session()
    instance = fake.instance()

    df = pd.DataFrame(_records_with_keys_json_2)

    with pytest.raises(tc.dataframe.CreationFailure):
        tc.dataframe.create(s,
                            instance,
                            df,
                            name="df_dataset",
                            primary_key_name="primary_key")
Beispiel #24
0
def test_create_dataset_already_exists():
    s = fake.session()
    instance = fake.instance()

    with pytest.raises(tc.dataset.AlreadyExists):
        tc.dataset.create(
            s,
            instance,
            name="new dataset",
            key_attribute_names=("primary_key",),
            description="a new dataset",
        )
Beispiel #25
0
def test_get_all_filter():
    s = fake.session()
    instance = fake.instance()

    all_projects = tc.project.get_all(
        s, instance, filter="description==Categorization Project")
    assert len(all_projects) == 1

    project = all_projects[0]
    assert isinstance(project, tc.CategorizationProject)
    assert project.name == "project 2"
    assert project.description == "Categorization Project"
Beispiel #26
0
def test_create_project_already_exists():
    s = fake.session()
    instance = fake.instance()

    with pytest.raises(tc.project.AlreadyExists):
        tc.project._create(
            s,
            instance,
            name="New Mastering Project",
            project_type="DEDUP",
            description="A Mastering Project",
        )
Beispiel #27
0
def test_update_cluster_results_async():
    s = fake.session()
    project = fake.mastering_project()

    op = tc.mastering._update_cluster_results_async(s, project)
    assert op.type == "SPARK"
    assert op.description == "Materialize views to Elastic"
    assert op.status == {
        "state": "PENDING",
        "startTime": "",
        "endTime": "",
        "message": "Job has not yet been submitted to Spark",
    }
Beispiel #28
0
def test_estimate_pairs_async():
    s = fake.session()
    project = fake.mastering_project()

    op = tc.mastering._estimate_pairs_async(s, project)
    assert op.type == "SPARK"
    assert op.description == "operation 1 description"
    assert op.status == {
        "state": "PENDING",
        "startTime": "",
        "endTime": "",
        "message": "Job has not yet been submitted to Spark",
    }
Beispiel #29
0
def test_apply_changes_async():
    s = fake.session()
    unified_dataset = fake.unified_dataset()

    op = tc.dataset.unified._apply_changes_async(s, unified_dataset)
    assert op.type == "SPARK"
    assert op.description == "operation 1 description"
    assert op.status == {
        "state": "PENDING",
        "startTime": "",
        "endTime": "",
        "message": "Job has not yet been submitted to Spark",
    }
def test_update_async():
    s = fake.session()
    project = fake.golden_records_project()

    op = tc.golden_records._update_async(s, project)
    assert op.type == "SPARK"
    assert op.description == "Updating Golden Records"
    assert op.status == {
        "state": "PENDING",
        "startTime": "",
        "endTime": "",
        "message": "Job has not yet been submitted to Spark",
    }