Esempi in Python per FeatureSetCalculator.FeatureSetCalculator, esempi in Python per featuretools.computational_backends.feature_set_calculator.FeatureSetCalculator.FeatureSetCalculator

Esempio n. 1

0

Mostra file

def test_make_identity(es):
    f = IdentityFeature(es["log"].ww["datetime"])

    feature_set = FeatureSet([f])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = to_pandas(calculator.run(np.array([0])))

    v = df[f.get_name()][0]
    assert v == datetime(2011, 4, 9, 10, 30, 0)

Esempio n. 2

0

Mostra file

def test_dependent_percentile(es):
    v = ft.Feature(es['log']['value'])
    p = ft.Feature(v, primitive=Percentile)
    p2 = ft.Feature(p - 1, primitive=Percentile)
    feature_set = FeatureSet([p, p2])
    calculator = FeatureSetCalculator(es, feature_set)
    df = calculator.run(np.array(range(10, 17)))
    true = es['log'].df[v.get_name()].rank(pct=True)
    true = true.loc[range(10, 17)]
    for t, a in zip(true.values, df[p.get_name()].values):
        assert (pd.isnull(t) and pd.isnull(a)) or t == a

Esempio n. 3

0

Mostra file

File: test_direct_features.py Progetto: zwcdp/featuretools

def test_direct_from_identity(es):
    device = es['sessions']['device_type']
    d = DirectFeature(base_feature=device, child_entity=es['log'])

    feature_set = FeatureSet([d])
    calculator = FeatureSetCalculator(es,
                                      feature_set=feature_set,
                                      time_last=None)
    df = calculator.run(np.array([0, 5]))
    v = df[d.get_name()].tolist()
    assert v == [0, 1]

Esempio n. 4

0

Mostra file

File: test_feature_set_calculator.py Progetto: yangchenghuang/featuretools

def test_make_dfeat(es):
    f = DirectFeature(es['customers']['age'], child_entity=es['sessions'])

    feature_set = FeatureSet([f])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = to_pandas(calculator.run(np.array([0])))

    v = df[f.get_name()][0]
    assert (v == 33)

Esempio n. 5

0

Mostra file

File: test_feature_set_calculator.py Progetto: xyutech/featuretools

def test_make_agg_feat_using_prev_n_events(es):
    agg_feat_1 = ft.Feature(es['log']['value'],
                            parent_entity=es['sessions'],
                            use_previous=Timedelta(1,
                                                   'observations',
                                                   entity=es['log']),
                            primitive=Min)

    agg_feat_2 = ft.Feature(es['log']['value'],
                            parent_entity=es['sessions'],
                            use_previous=Timedelta(3,
                                                   'observations',
                                                   entity=es['log']),
                            primitive=Min)

    assert agg_feat_1.get_name() != agg_feat_2.get_name(), \
        'Features should have different names based on use_previous'

    feature_set = FeatureSet([agg_feat_1, agg_feat_2])
    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(
                                          2011, 4, 9, 10, 30, 6),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))

    # time_last is included by default
    v1 = df[agg_feat_1.get_name()][0]
    v2 = df[agg_feat_2.get_name()][0]
    assert v1 == 5
    assert v2 == 0

    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(
                                          2011, 4, 9, 10, 30, 30),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))

    v1 = df[agg_feat_1.get_name()][0]
    v2 = df[agg_feat_2.get_name()][0]
    assert v1 == 20
    assert v2 == 10

Esempio n. 6

0

Mostra file

def test_make_dfeat(es):
    f = DirectFeature(ft.Feature(es["customers"].ww["age"]),
                      child_dataframe_name="sessions")

    feature_set = FeatureSet([f])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = to_pandas(calculator.run(np.array([0])))

    v = df[f.get_name()][0]
    assert v == 33

Esempio n. 7

0

Mostra file

def test_make_3_stacked_agg_feats(df):
    """
    Tests stacking 3 agg features.

    The test specifically uses non numeric indices to test how ancestor columns are handled
    as dataframes are merged together

    """
    if isinstance(df, dd.DataFrame):
        pytest.xfail("normalize_datdataframe fails with dask DataFrame")
    es = ft.EntitySet()
    ltypes = {
        "e1": Categorical,
        "e2": Categorical,
        "e3": Categorical,
        "val": Double
    }
    es.add_dataframe(dataframe=df,
                     index="id",
                     dataframe_name="e0",
                     logical_types=ltypes)

    es.normalize_dataframe(
        base_dataframe_name="e0",
        new_dataframe_name="e1",
        index="e1",
        additional_columns=["e2", "e3"],
    )

    es.normalize_dataframe(
        base_dataframe_name="e1",
        new_dataframe_name="e2",
        index="e2",
        additional_columns=["e3"],
    )

    es.normalize_dataframe(base_dataframe_name="e2",
                           new_dataframe_name="e3",
                           index="e3")

    sum_1 = ft.Feature(es["e0"].ww["val"],
                       parent_dataframe_name="e1",
                       primitive=Sum)
    sum_2 = ft.Feature(sum_1, parent_dataframe_name="e2", primitive=Sum)
    sum_3 = ft.Feature(sum_2, parent_dataframe_name="e3", primitive=Sum)

    feature_set = FeatureSet([sum_3])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array(["z"]))
    v = df[sum_3.get_name()][0]
    assert v == 5

Esempio n. 8

0

Mostra file

def test_returns_order_of_instance_ids(pd_es):
    feature_set = FeatureSet([ft.Feature(pd_es['customers']['age'])])
    calculator = FeatureSetCalculator(pd_es,
                                      time_last=None,
                                      feature_set=feature_set)

    instance_ids = [0, 1, 2]
    assert list(pd_es['customers'].df['id']) != instance_ids

    df = calculator.run(np.array(instance_ids))

    assert list(df.index) == instance_ids

Esempio n. 9

0

Mostra file

File: test_feature_set_calculator.py Progetto: xyutech/featuretools

def test_make_agg_feat_of_identity_index_variable(es):
    agg_feat = ft.Feature(es['log']['id'],
                          parent_entity=es['sessions'],
                          primitive=Count)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[agg_feat.get_name()][0]
    assert (v == 5)

Esempio n. 10

0

Mostra file

File: test_feature_set_calculator.py Progetto: stenpiren/featuretools

def test_make_agg_feat_of_agg_feat(es):
    log_count_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], primitive=Count)

    customer_sum_feat = ft.Feature(log_count_feat, parent_entity=es['customers'], primitive=Sum)

    feature_set = FeatureSet([customer_sum_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[customer_sum_feat.get_name()][0]
    assert (v == 10)

Esempio n. 11

0

Mostra file

File: test_direct_features.py Progetto: mikewcasale/featuretools

def test_direct_from_variable(es):
    # should be same behavior as test_direct_from_identity
    device = es['sessions']['device_type']
    d = DirectFeature(base_feature=device,
                      child_entity=es['log'])

    feature_set = FeatureSet([d])
    calculator = FeatureSetCalculator(es, feature_set=feature_set, time_last=None)
    df = calculator.run(np.array([0, 5]))
    df = to_pandas(df, index='id', sort_index=True)
    v = df[d.get_name()].tolist()
    assert v == [0, 1]

Esempio n. 12

0

Mostra file

File: test_feature_set_calculator.py Progetto: yszoke/PredictionSystem

def test_direct_squared(es):
    feature = IdentityFeature(es['log']['value'])
    squared = feature * feature
    feature_set = FeatureSet([feature, squared])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0, 1, 2]))
    if isinstance(df, dd.DataFrame):
        df = df.compute()
    for i, row in df.iterrows():
        assert (row[0] * row[0]) == row[1]

Esempio n. 13

0

Mostra file

File: test_feature_set_calculator.py Progetto: xyutech/featuretools

def test_make_agg_feat_of_grandchild_entity(es):
    agg_feat = ft.Feature(es['log']['id'],
                          parent_entity=es['customers'],
                          primitive=Count)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[agg_feat.get_name()][0]
    assert (v == 10)

Esempio n. 14

0

Mostra file

def test_agg_percentile(es):
    v = ft.Feature(es['log']['value'])
    p = ft.Feature(v, primitive=Percentile)
    agg = ft.Feature(p, parent_entity=es['sessions'], primitive=Sum)
    feature_set = FeatureSet([agg])
    calculator = FeatureSetCalculator(es, feature_set)
    df = calculator.run(np.array([0, 1]))
    log_vals = es['log'].df[[v.get_name(), 'session_id']]
    log_vals['percentile'] = log_vals[v.get_name()].rank(pct=True)
    true_p = log_vals.groupby('session_id')['percentile'].sum()[[0, 1]]
    for t, a in zip(true_p.values, df[agg.get_name()].values):
        assert (pd.isnull(t) and pd.isnull(a)) or t == a

Esempio n. 15

0

Mostra file

File: test_direct_features.py Progetto: yszoke/PredictionSystem

def test_direct_from_identity(es):
    device = es['sessions']['device_type']
    d = DirectFeature(base_feature=device, child_entity=es['log'])

    feature_set = FeatureSet([d])
    calculator = FeatureSetCalculator(es,
                                      feature_set=feature_set,
                                      time_last=None)
    df = calculator.run(np.array([0, 5]))
    if isinstance(df, dd.DataFrame):
        df = df.compute().set_index('id').sort_index()
    v = df[d.get_name()].tolist()
    assert v == [0, 1]

Esempio n. 16

0

Mostra file

File: calculate_feature_matrix.py Progetto: zwcdp/featuretools

        def calc_results(time_last,
                         ids,
                         precalculated_features=None,
                         training_window=None):
            calculator = FeatureSetCalculator(
                entityset,
                feature_set,
                time_last,
                training_window=training_window,
                precalculated_features=precalculated_features)

            matrix = calculator.run(ids)
            return matrix

Esempio n. 17

0

Mostra file

File: test_feature_set_calculator.py Progetto: stenpiren/featuretools

def test_full_entity_trans_of_agg(es):
    agg_feat = ft.Feature(es['log']['value'], parent_entity=es['customers'],
                          primitive=Sum)
    trans_feat = ft.Feature(agg_feat, primitive=CumSum)

    feature_set = FeatureSet([trans_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([1]))

    v = df[trans_feat.get_name()][1]
    assert v == 82

Esempio n. 18

0

Mostra file

File: test_feature_set_calculator.py Progetto: stenpiren/featuretools

def test_make_agg_feat_using_prev_time(es):
    agg_feat = ft.Feature(es['log']['id'],
                          parent_entity=es['sessions'],
                          use_previous=Timedelta(10, 's'),
                          primitive=Count)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(2011, 4, 9, 10, 30, 10),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))

    v = df[agg_feat.get_name()][0]
    assert (v == 2)

    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(2011, 4, 9, 10, 30, 30),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))

    v = df[agg_feat.get_name()][0]
    assert (v == 1)

Esempio n. 19

0

Mostra file

File: test_feature_set_calculator.py Progetto: john-rice/featuretools

def test_make_agg_feat_of_identity_column(es):
    agg_feat = ft.Feature(es['log'].ww['value'],
                          parent_dataframe_name='sessions',
                          primitive=Sum)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = to_pandas(calculator.run(np.array([0])))

    v = df[agg_feat.get_name()][0]
    assert (v == 50)

Esempio n. 20

0

Mostra file

File: test_transform_features.py Progetto: wwdxfa/featuretools

def test_two_kinds_of_dependents(es):
    v = ft.Feature(es['log']['value'])
    product = ft.Feature(es['log']['product_id'])
    agg = ft.Feature(v, parent_entity=es['customers'], where=product == 'coke zero', primitive=Sum)
    p = ft.Feature(agg, primitive=Percentile)
    g = ft.Feature(agg, primitive=Absolute)
    agg2 = ft.Feature(v, parent_entity=es['sessions'], where=product == 'coke zero', primitive=Sum)
    agg3 = ft.Feature(agg2, parent_entity=es['customers'], primitive=Sum)
    feature_set = FeatureSet([p, g, agg3])
    calculator = FeatureSetCalculator(es, feature_set)
    df = calculator.run(np.array([0, 1]))
    assert df[p.get_name()].tolist() == [2. / 3, 1.0]
    assert df[g.get_name()].tolist() == [15, 26]

Esempio n. 21

0

Mostra file

def test_make_agg_feat_using_prev_n_events(es):
    if not all(isinstance(entity.df, pd.DataFrame) for entity in es.entities):
        pytest.xfail('Distrubuted entitysets do not support use_previous')
    agg_feat_1 = ft.Feature(es['log']['value'],
                            parent_entity=es['sessions'],
                            use_previous=Timedelta(1, 'observations'),
                            primitive=Min)

    agg_feat_2 = ft.Feature(es['log']['value'],
                            parent_entity=es['sessions'],
                            use_previous=Timedelta(3, 'observations'),
                            primitive=Min)

    assert agg_feat_1.get_name() != agg_feat_2.get_name(), \
        'Features should have different names based on use_previous'

    feature_set = FeatureSet([agg_feat_1, agg_feat_2])
    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(
                                          2011, 4, 9, 10, 30, 6),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))

    # time_last is included by default
    v1 = df[agg_feat_1.get_name()][0]
    v2 = df[agg_feat_2.get_name()][0]
    assert v1 == 5
    assert v2 == 0

    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(
                                          2011, 4, 9, 10, 30, 30),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))

    v1 = df[agg_feat_1.get_name()][0]
    v2 = df[agg_feat_2.get_name()][0]
    assert v1 == 20
    assert v2 == 10

Esempio n. 22

0

Mostra file

File: test_feature_set_calculator.py Progetto: yszoke/PredictionSystem

def test_make_identity(es):
    f = IdentityFeature(es['log']['datetime'])

    feature_set = FeatureSet([f])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    if isinstance(df, dd.DataFrame):
        df = df.compute()

    v = df[f.get_name()][0]
    assert (v == datetime(2011, 4, 9, 10, 30, 0))

Esempio n. 23

0

Mostra file

File: test_feature_set_calculator.py Progetto: yszoke/PredictionSystem

def test_make_agg_feat_of_grandchild_entity(es):
    agg_feat = ft.Feature(es['log']['id'], parent_entity=es['customers'], primitive=Count)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    if isinstance(df, dd.DataFrame):
        df = df.compute().set_index('id')
        df.index = pd.Int64Index(df.index)
    v = df[agg_feat.get_name()][0]
    assert (v == 10)

Esempio n. 24

0

Mostra file

File: test_feature_set_calculator.py Progetto: yszoke/PredictionSystem

def test_full_entity_error_dask(dask_es):
    agg_feat = ft.Feature(dask_es['log']['value'], parent_entity=dask_es['customers'],
                          primitive=Sum)
    trans_feat = ft.Feature(agg_feat, primitive=CumSum)

    feature_set = FeatureSet([trans_feat])
    calculator = FeatureSetCalculator(dask_es,
                                      time_last=None,
                                      feature_set=feature_set)
    error_text = "Cannot use primitives that require full entity with Dask"

    with pytest.raises(ValueError, match=error_text):
        calculator.run(np.array([1]))

Esempio n. 25

0

Mostra file

File: test_feature_set_calculator.py Progetto: xyutech/featuretools

def test_with_features_built_from_es_metadata(es):
    metadata = es.metadata
    agg_feat = ft.Feature(metadata['log']['id'],
                          parent_entity=metadata['customers'],
                          primitive=Count)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[agg_feat.get_name()][0]
    assert (v == 10)

Esempio n. 26

0

Mostra file

def test_direct_percentile(es):
    v = ft.Feature(es['customers']['age'])
    p = ft.Feature(v, primitive=Percentile)
    d = ft.Feature(p, es['sessions'])
    feature_set = FeatureSet([d])
    calculator = FeatureSetCalculator(es, feature_set)
    df = calculator.run(np.array([0, 1]))

    cust_vals = es['customers'].df[[v.get_name()]]
    cust_vals['percentile'] = cust_vals[v.get_name()].rank(pct=True)
    true_p = cust_vals['percentile'].loc[[0, 0]]
    for t, a in zip(true_p.values, df[d.get_name()].values):
        assert (pd.isnull(t) and pd.isnull(a)) or t == a

Esempio n. 27

0

Mostra file

File: test_feature_set_calculator.py Progetto: yszoke/PredictionSystem

def test_agg_empty_child(es):
    customer_count_feat = ft.Feature(es['log']['id'], parent_entity=es['customers'], primitive=Count)
    feature_set = FeatureSet([customer_count_feat])

    # time last before the customer had any events, so child frame is empty
    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(2011, 4, 8),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    if isinstance(df, dd.DataFrame):
        df = df.compute()

    assert df["COUNT(log)"].iloc[0] == 0

Esempio n. 28

0

Mostra file

File: test_feature_set_calculator.py Progetto: john-rice/featuretools

def test_make_agg_feat_of_grandchild_dataframe(es):
    agg_feat = ft.Feature(es['log'].ww['id'],
                          parent_dataframe_name='customers',
                          primitive=Count)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    df = to_pandas(df, index='id')
    v = df[agg_feat.get_name()].values[0]
    assert (v == 10)

Esempio n. 29

0

Mostra file

def test_agg_empty_child(es):
    customer_count_feat = ft.Feature(es['log']['id'],
                                     parent_entity=es['customers'],
                                     primitive=Count)
    feature_set = FeatureSet([customer_count_feat])

    # time last before the customer had any events, so child frame is empty
    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(2011, 4, 8),
                                      feature_set=feature_set)
    df = to_pandas(calculator.run(np.array([0])), index='id')

    assert df["COUNT(log)"].iloc[0] == 0

Esempio n. 30

0

Mostra file

File: test_feature_set_calculator.py Progetto: yszoke/PredictionSystem

def test_make_agg_feat_of_identity_variable(es):
    agg_feat = ft.Feature(es['log']['value'], parent_entity=es['sessions'], primitive=Sum)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    if isinstance(df, dd.DataFrame):
        df = df.compute()

    v = df[agg_feat.get_name()][0]
    assert (v == 50)