コード例 #1
0
def test_get_tier_confidence_unstreamable():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    dataset = client.datasets.by_resource_id(INPUT_DATASET_ID)
    df_input = dataframe.from_dataset(dataset)
    dataset.upsert_records(df_input.head(1).to_dict(orient="records"),
                           primary_key_name=dataset.key_attribute_names[0])
    project = client.projects.by_resource_id(CATEGORIZATION_PROJECT_ID)

    with pytest.raises(RuntimeError):
        metrics.get_tier_confidence(project)

    # revert to the original state
    all_ops = categorization.jobs.run(project)

    for op in all_ops:
        assert op.succeeded()

    leaf_confidence_dict = metrics.get_tier_confidence(project, tier=-1)

    assert len(leaf_confidence_dict) == 6
    assert math.isclose(leaf_confidence_dict["Dairy|Cheese"],
                        0.77,
                        rel_tol=0.01)
    assert math.isclose(leaf_confidence_dict["Dairy|Milk"], 0.92, rel_tol=0.01)
    assert math.isclose(leaf_confidence_dict["Meat|Beef"], 0.81, rel_tol=0.01)
    assert math.isclose(leaf_confidence_dict["Meat|Chicken|bone-in"],
                        0.53,
                        rel_tol=0.01)
    assert leaf_confidence_dict["Meat|Chicken|boneless"] is None
    assert leaf_confidence_dict["Vegetables"] is None
コード例 #2
0
def test_get_tier_confidence_tier3():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(CATEGORIZATION_PROJECT_ID)
    tier3_confidence_dict = metrics.get_tier_confidence(project, tier=3)

    assert len(tier3_confidence_dict) == 2
    assert math.isclose(tier3_confidence_dict["Meat|Chicken|bone-in"], 0.53, rel_tol=0.01)
    assert tier3_confidence_dict["Meat|Chicken|boneless"] is None
コード例 #3
0
def test_get_tier_confidence_tier1():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(CATEGORIZATION_PROJECT_ID)
    tier1_confidence_dict = metrics.get_tier_confidence(project, tier=1)

    assert len(tier1_confidence_dict) == 3
    assert math.isclose(tier1_confidence_dict["Dairy"], 0.81, rel_tol=0.01)
    assert math.isclose(tier1_confidence_dict["Meat"], 0.64, rel_tol=0.01)
    assert tier1_confidence_dict["Vegetables"] is None
コード例 #4
0
def test_get_tier_confidence_leaf():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(CATEGORIZATION_PROJECT_ID)
    leaf_confidence_dict = metrics.get_tier_confidence(project, tier=-1)

    assert len(leaf_confidence_dict) == 6
    assert math.isclose(leaf_confidence_dict["Dairy|Cheese"], 0.77, rel_tol=0.01)
    assert math.isclose(leaf_confidence_dict["Dairy|Milk"], 0.92, rel_tol=0.01)
    assert math.isclose(leaf_confidence_dict["Meat|Beef"], 0.81, rel_tol=0.01)
    assert math.isclose(leaf_confidence_dict["Meat|Chicken|bone-in"], 0.53, rel_tol=0.01)
    assert leaf_confidence_dict["Meat|Chicken|boneless"] is None
    assert leaf_confidence_dict["Vegetables"] is None
コード例 #5
0
def test_get_tier_confidence_refresh():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    dataset = client.datasets.by_resource_id(INPUT_DATASET_ID)
    df_input = dataframe.from_dataset(dataset)
    dataset.upsert_records(df_input.head(1).to_dict(orient="records"),
                           primary_key_name=dataset.key_attribute_names[0])
    project = client.projects.by_resource_id(CATEGORIZATION_PROJECT_ID)
    tier1_confidence_dict = metrics.get_tier_confidence(
        project, tier=1, allow_dataset_refresh=True)

    assert len(tier1_confidence_dict) == 3
    assert math.isclose(tier1_confidence_dict["Dairy"], 0.81, rel_tol=0.01)
    assert math.isclose(tier1_confidence_dict["Meat"], 0.64, rel_tol=0.01)
    assert tier1_confidence_dict["Vegetables"] is None
コード例 #6
0
def test_get_tier_confidence_invalid_tier_less_than_negative_one():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(CATEGORIZATION_PROJECT_ID)

    with pytest.raises(ValueError):
        metrics.get_tier_confidence(project, tier=-2)
コード例 #7
0
def test_get_tier_confidence_invalid_tier_float():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(CATEGORIZATION_PROJECT_ID)
    with pytest.raises(TypeError):
        metrics.get_tier_confidence(project, tier=1.5)
コード例 #8
0
def test_get_tier_confidence_invalid_project_type():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(MASTERING_PROJECT_ID)

    with pytest.raises(TypeError):
        metrics.get_tier_confidence(project)
コード例 #9
0
"""Snippet for retrieving confidence metrics from a Tamr Categorization project"""
import tamr_toolbox as tbox
from tamr_toolbox.project.categorization.metrics import get_tier_confidence

# Read config, make Tamr Client, make logger
tamr = tbox.utils.client.create(username="******",
                                password="******",
                                host="localhost")

# Get a Tamr categorization project by ID
my_project = tamr.projects.by_resource_id("my_project_id")

# By default gets the average confidence at leaf nodes without allowing dataset to refresh
leaf_node_confidence_dict = get_tier_confidence(my_project)

# Can allow the dataset to refresh if it is not streamable
# NOTE THIS WILL KICK OFF A <MATERIALIZE VIEWS> JOB
leaf_node_confidence_dict2 = get_tier_confidence(my_project,
                                                 allow_dataset_refresh=True)

# Can also set the specific tier, which starts at 1
tier1_confidence_dict = get_tier_confidence(my_project, tier=1)