Beispiel #1
0
def test_load_labels_data():
    """ Test the load_labels_data function by checking whether the query
    produces the correct labels
    """
    # set up labeling config variables
    dates = [
        datetime.datetime(2016, 1, 1, 0, 0),
        datetime.datetime(2016, 2, 1, 0, 0)
    ]

    # make a dataframe of labels to test against
    labels_df = pd.DataFrame(labels,
                             columns=[
                                 'entity_id', 'as_of_date', 'label_timespan',
                                 'label_name', 'label_type', 'label'
                             ])

    labels_df['as_of_date'] = convert_string_column_to_date(
        labels_df['as_of_date'])
    labels_df.set_index(['entity_id', 'as_of_date'])

    # create an engine and generate a table with fake feature data
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        create_schemas(engine, features_tables, labels, states)
        with get_matrix_storage_engine() as matrix_storage_engine:
            builder = MatrixBuilder(
                db_config=db_config,
                matrix_storage_engine=matrix_storage_engine,
                engine=engine,
            )

            # make the entity-date table
            entity_date_table_name = builder.make_entity_date_table(
                as_of_times=dates,
                label_type='binary',
                label_name='booking',
                state='state_one AND state_two',
                matrix_type='train',
                matrix_uuid='my_uuid',
                label_timespan='1 month')

            result = builder.load_labels_data(
                label_name=label_name,
                label_type=label_type,
                label_timespan='1 month',
                matrix_uuid='my_uuid',
                entity_date_table_name=entity_date_table_name,
            )
            df = pd.DataFrame.from_dict({
                'entity_id': [2, 3, 4, 4],
                'as_of_date': [dates[1], dates[1], dates[0], dates[1]],
                'booking': [0, 0, 1, 0],
            }).set_index(['entity_id', 'as_of_date'])

            test = (result == df)
            assert (test.all().all())
Beispiel #2
0
def test_load_labels_data_include_missing_labels_as_false():
    """ Test the load_labels_data function by checking whether the query
    produces the correct labels
    """
    # set up labeling config variables
    dates = [
        datetime.datetime(2016, 1, 1, 0, 0),
        datetime.datetime(2016, 2, 1, 0, 0),
        datetime.datetime(2016, 6, 1, 0, 0),
    ]

    # same as the other load_labels_data test, except we include an extra date, 2016-06-01
    # this date does have entity 0 included via the states table, but no labels

    # make a dataframe of labels to test against
    labels_df = pd.DataFrame(
        labels,
        columns=[
            "entity_id",
            "as_of_date",
            "label_timespan",
            "label_name",
            "label_type",
            "label",
        ],
    )

    labels_df["as_of_date"] = convert_string_column_to_date(labels_df["as_of_date"])
    labels_df.set_index(["entity_id", "as_of_date"])

    # create an engine and generate a table with fake feature data
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        create_schemas(engine, features_tables, labels, states)
        with get_matrix_storage_engine() as matrix_storage_engine:
            builder = MatrixBuilder(
                db_config=db_config,
                matrix_storage_engine=matrix_storage_engine,
                experiment_hash=experiment_hash,
                engine=engine,
                include_missing_labels_in_train_as=False,
            )

            # make the entity-date table
            entity_date_table_name = builder.make_entity_date_table(
                as_of_times=dates,
                label_type="binary",
                label_name="booking",
                state="active",
                matrix_type="train",
                matrix_uuid="my_uuid",
                label_timespan="1 month",
            )

            result = builder.load_labels_data(
                label_name=label_name,
                label_type=label_type,
                label_timespan="1 month",
                matrix_uuid="my_uuid",
                entity_date_table_name=entity_date_table_name,
            )
            df = pd.DataFrame.from_dict(
                {
                    "entity_id": [0, 2, 3, 4, 4],
                    "as_of_date": [dates[2], dates[1], dates[1], dates[0], dates[1]],
                    "booking": [0, 0, 0, 1, 0],
                }
            ).set_index(["entity_id", "as_of_date"])
            # the first row would not be here if we had not configured the Builder
            # to include missing labels as false

            test = result == df
            assert test.all().all()
Beispiel #3
0
def test_load_labels_data():
    """ Test the load_labels_data function by checking whether the query
    produces the correct labels
    """
    # set up labeling config variables
    dates = [datetime.datetime(2016, 1, 1, 0, 0), datetime.datetime(2016, 2, 1, 0, 0)]

    # make a dataframe of labels to test against
    labels_df = pd.DataFrame(
        labels,
        columns=[
            "entity_id",
            "as_of_date",
            "label_timespan",
            "label_name",
            "label_type",
            "label",
        ],
    )

    labels_df["as_of_date"] = convert_string_column_to_date(labels_df["as_of_date"])
    labels_df.set_index(["entity_id", "as_of_date"])

    # create an engine and generate a table with fake feature data
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        create_schemas(engine, features_tables, labels, states)
        with get_matrix_storage_engine() as matrix_storage_engine:
            builder = MatrixBuilder(
                db_config=db_config,
                matrix_storage_engine=matrix_storage_engine,
                experiment_hash=experiment_hash,
                engine=engine,
            )

            # make the entity-date table
            entity_date_table_name = builder.make_entity_date_table(
                as_of_times=dates,
                label_type="binary",
                label_name="booking",
                state="active",
                matrix_type="train",
                matrix_uuid="my_uuid",
                label_timespan="1 month",
            )

            result = builder.load_labels_data(
                label_name=label_name,
                label_type=label_type,
                label_timespan="1 month",
                matrix_uuid="my_uuid",
                entity_date_table_name=entity_date_table_name,
            )
            df = pd.DataFrame.from_dict(
                {
                    "entity_id": [2, 3, 4, 4],
                    "as_of_date": [dates[1], dates[1], dates[0], dates[1]],
                    "booking": [0, 0, 1, 0],
                }
            ).set_index(["entity_id", "as_of_date"])

            test = result == df
            assert test.all().all()