Python FeatureSetCalculator.run Exemples, featuretools.computational_backends.feature_set_calculator.FeatureSetCalculator.run Python Exemples

Exemple #1

0

Afficher le fichier

def test_topn(es):
    topn = ft.Feature(es['log']['product_id'],
                      parent_entity=es['customers'],
                      primitive=NMostCommon(n=2))
    feature_set = FeatureSet([topn])

    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0, 1, 2]))

    true_results = pd.DataFrame(
        [['toothpaste', 'coke zero'],
         ['coke zero', 'Haribo sugar-free gummy bears'],
         ['taco clock', np.nan]])
    assert ([name in df.columns for name in topn.get_feature_names()])

    for i in range(df.shape[0]):
        true = true_results.loc[i]
        actual = df.loc[i]
        if i == 0:
            # coke zero and toothpase have same number of occurrences
            assert set(true.values) == set(actual.values)
        else:
            for i1, i2 in zip(true, actual):
                assert (pd.isnull(i1) and pd.isnull(i2)) or (i1 == i2)

Exemple #2

0

Afficher le fichier

def test_two_relationships_to_single_entity(games_es):
    es = games_es
    home_team, away_team = es.relationships
    path = RelationshipPath([(False, home_team)])
    mean_at_home = ft.AggregationFeature(es['games']['home_team_score'],
                                         es['teams'],
                                         relationship_path=path,
                                         primitive=ft.primitives.Mean)
    path = RelationshipPath([(False, away_team)])
    mean_at_away = ft.AggregationFeature(es['games']['away_team_score'],
                                         es['teams'],
                                         relationship_path=path,
                                         primitive=ft.primitives.Mean)
    home_team_mean = ft.DirectFeature(mean_at_home,
                                      es['games'],
                                      relationship=home_team)
    away_team_mean = ft.DirectFeature(mean_at_away,
                                      es['games'],
                                      relationship=away_team)

    feature_set = FeatureSet([home_team_mean, away_team_mean])
    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(2011, 8, 28),
                                      feature_set=feature_set)
    df = calculator.run(np.array(range(3)))
    assert (df[home_team_mean.get_name()] == [1.5, 1.5, 2.5]).all()
    assert (df[away_team_mean.get_name()] == [1, 0.5, 2]).all()

Exemple #3

0

Afficher le fichier

def test_make_dfeat_of_agg_feat_through_parent(es):
    """
    The graph looks like this:

        R       C = Customers, the entity we're trying to predict on
       / \\     R = Regions, a parent of customers
      S   C     S = Stores, a child of regions
          |
         etc.

    We're trying to calculate a DFeat from C to R on an agg_feat of R on S.
    """
    store_id_feat = IdentityFeature(es['stores']['id'])

    store_count_feat = ft.Feature(store_id_feat,
                                  parent_entity=es[u'régions'],
                                  primitive=Count)

    num_stores_feat = DirectFeature(store_count_feat,
                                    child_entity=es['customers'])

    feature_set = FeatureSet([num_stores_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[num_stores_feat.get_name()][0]
    assert (v == 3)

Exemple #4

0

Afficher le fichier

def test_make_deep_agg_feat_of_dfeat_of_agg_feat(es):
    """
    The graph looks like this (higher implies parent):

          C     C = Customers, the entity we're trying to predict on
          |     S = Sessions, a child of Customers
      P   S     L = Log, a child of both Sessions and Log
       \\ /     P = Products, a parent of Log which is not a descendent of customers
        L

    We're trying to calculate a DFeat from L to P on an agg_feat of P on L, and
    then aggregate it with another agg_feat of C on L.
    """
    log_count_feat = ft.Feature(es['log']['id'],
                                parent_entity=es['products'],
                                primitive=Count)

    product_purchases_feat = DirectFeature(log_count_feat,
                                           child_entity=es['log'])

    purchase_popularity = ft.Feature(product_purchases_feat,
                                     parent_entity=es['customers'],
                                     primitive=Mean)

    feature_set = FeatureSet([purchase_popularity])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[purchase_popularity.get_name()][0]
    assert (v == 38.0 / 10.0)

Exemple #5

0

Afficher le fichier

def test_make_agg_feat_where_count_or_device_type_feat(es):
    """
    Feature we're creating is:
    Number of sessions for each customer where the
    number of logs in the session is less than 3
    """
    log_count_feat = ft.Feature(es['log']['id'],
                                parent_entity=es['sessions'],
                                primitive=Count)

    compare_count = log_count_feat > 1
    compare_device_type = IdentityFeature(es['sessions']['device_type']) == 1
    or_feat = compare_count.OR(compare_device_type)
    feat = ft.Feature(es['sessions']['id'],
                      parent_entity=es['customers'],
                      where=or_feat,
                      primitive=Count)

    feature_set = FeatureSet([feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    name = feat.get_name()
    instances = df[name]
    assert (instances[0] == 3)

Exemple #6

0

Afficher le fichier

def test_make_dfeat_of_agg_feat_on_self(es):
    """
    The graph looks like this:

        R       R = Regions, a parent of customers
        |
        C       C = Customers, the entity we're trying to predict on
        |
       etc.

    We're trying to calculate a DFeat from C to R on an agg_feat of R on C.
    """
    customer_count_feat = ft.Feature(es['customers']['id'],
                                     parent_entity=es[u'régions'],
                                     primitive=Count)

    num_customers_feat = DirectFeature(customer_count_feat,
                                       child_entity=es['customers'])

    feature_set = FeatureSet([num_customers_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[num_customers_feat.get_name()][0]
    assert (v == 3)

Exemple #7

0

Afficher le fichier

def test_make_compare_feat(es):
    """
    Feature we're creating is:
    Number of sessions for each customer where the
    number of logs in the session is less than 3
    """
    log_count_feat = ft.Feature(es['log']['id'],
                                parent_entity=es['sessions'],
                                primitive=Count)

    mean_agg_feat = ft.Feature(log_count_feat,
                               parent_entity=es['customers'],
                               primitive=Mean)

    mean_feat = DirectFeature(mean_agg_feat, child_entity=es['sessions'])

    feat = log_count_feat > mean_feat

    feature_set = FeatureSet([feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0, 1, 2]))
    name = feat.get_name()
    instances = df[name]
    v0, v1, v2 = instances[0:3]
    assert v0
    assert v1
    assert not v2

Exemple #8

0

Afficher le fichier

def test_make_agg_feat_where_count_feat(es):
    """
    Feature we're creating is:
    Number of sessions for each customer where the
    number of logs in the session is less than 3
    """
    log_count_feat = ft.Feature(es['log']['id'],
                                parent_entity=es['sessions'],
                                primitive=Count)

    feat = ft.Feature(es['sessions']['id'],
                      parent_entity=es['customers'],
                      where=log_count_feat > 1,
                      primitive=Count)

    feature_set = FeatureSet([feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0, 1]))
    name = feat.get_name()
    instances = df[name]
    v0, v1 = instances[0:2]
    assert (v0 == 2)
    assert (v1 == 2)

Exemple #9

0

Afficher le fichier

Fichier : calculate_feature_matrix.py Projet : vikibytes/featuretools

        def calc_results(time_last,
                         ids,
                         precalculated_features=None,
                         training_window=None):

            update_progress_callback = None

            if progress_bar is not None:

                def update_progress_callback(done):
                    previous_progress = progress_bar.n
                    progress_bar.update(done * group.shape[0])
                    if progress_callback is not None:
                        update, progress_percent, time_elapsed = update_progress_callback_parameters(
                            progress_bar, previous_progress)
                        progress_callback(update, progress_percent,
                                          time_elapsed)

            calculator = FeatureSetCalculator(
                entityset,
                feature_set,
                time_last,
                training_window=training_window,
                precalculated_features=precalculated_features)
            matrix = calculator.run(ids,
                                    progress_callback=update_progress_callback)
            return matrix

Exemple #10

0

Afficher le fichier

Fichier : test_transform_features.py Projet : john-rice/featuretools

def test_diff(pd_es):
    value = ft.Feature(pd_es['log'].ww['value'])
    customer_id_feat = ft.Feature(pd_es['sessions'].ww['customer_id'], 'log')
    diff1 = ft.Feature(value,
                       groupby=ft.Feature(pd_es['log'].ww['session_id']),
                       primitive=Diff)
    diff2 = ft.Feature(value, groupby=customer_id_feat, primitive=Diff)

    feature_set = FeatureSet([diff1, diff2])
    calculator = FeatureSetCalculator(pd_es, feature_set=feature_set)
    df = calculator.run(np.array(range(15)))

    val1 = df[diff1.get_name()].tolist()
    val2 = df[diff2.get_name()].tolist()
    correct_vals1 = [
        np.nan, 5, 5, 5, 5, np.nan, 1, 1, 1, np.nan, np.nan, 5, np.nan, 7, 7
    ]
    correct_vals2 = [np.nan, 5, 5, 5, 5, -20, 1, 1, 1, -3, np.nan, 5, -5, 7, 7]
    for i, v in enumerate(val1):
        v1 = val1[i]
        if np.isnan(v1):
            assert (np.isnan(correct_vals1[i]))
        else:
            assert v1 == correct_vals1[i]
        v2 = val2[i]
        if np.isnan(v2):
            assert (np.isnan(correct_vals2[i]))
        else:
            assert v2 == correct_vals2[i]

Exemple #11

0

Afficher le fichier

def test_make_compare_feat(es):
    """
    Feature we're creating is:
    Number of sessions for each customer where the
    number of logs in the session is less than 3
    """
    log_count_feat = ft.Feature(es["log"].ww["id"],
                                parent_dataframe_name="sessions",
                                primitive=Count)

    mean_agg_feat = ft.Feature(log_count_feat,
                               parent_dataframe_name="customers",
                               primitive=Mean)

    mean_feat = DirectFeature(mean_agg_feat, child_dataframe_name="sessions")

    feat = log_count_feat > mean_feat

    feature_set = FeatureSet([feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0, 1, 2]))
    df = to_pandas(df, index="id", sort_index=True)

    name = feat.get_name()
    instances = df[name]
    v0, v1, v2 = instances[0:3]
    assert v0
    assert v1
    assert not v2

Exemple #12

0

Afficher le fichier

Fichier : test_feature_set_calculator.py Projet : wjsi/featuretools

def test_make_agg_feat_multiple_dtypes(es):
    if any(isinstance(entity.df, dd.DataFrame) for entity in es.entities):
        pytest.xfail(
            'Currently no dask compatible agg prims that use multiple dtypes')
    compare_prod = IdentityFeature(es['log']['product_id']) == 'coke zero'

    agg_feat = ft.Feature(es['log']['id'],
                          parent_entity=es['sessions'],
                          where=compare_prod,
                          primitive=Count)

    agg_feat2 = ft.Feature(es['log']['product_id'],
                           parent_entity=es['sessions'],
                           where=compare_prod,
                           primitive=Mode)

    feature_set = FeatureSet([agg_feat, agg_feat2])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    if isinstance(df, dd.DataFrame):
        df = df.compute()

    v = df[agg_feat.get_name()][0]
    v2 = df[agg_feat2.get_name()][0]
    assert (v == 3)
    assert (v2 == 'coke zero')

Exemple #13

0

Afficher le fichier

def test_make_agg_feat_multiple_dtypes(es):
    if es.dataframe_type != Library.PANDAS.value:
        pytest.xfail(
            "Currently no Dask or Spark compatible agg prims that use multiple dtypes"
        )
    compare_prod = IdentityFeature(es["log"].ww["product_id"]) == "coke zero"

    agg_feat = ft.Feature(
        es["log"].ww["id"],
        parent_dataframe_name="sessions",
        where=compare_prod,
        primitive=Count,
    )

    agg_feat2 = ft.Feature(
        es["log"].ww["product_id"],
        parent_dataframe_name="sessions",
        where=compare_prod,
        primitive=Mode,
    )

    feature_set = FeatureSet([agg_feat, agg_feat2])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))

    v = df[agg_feat.get_name()][0]
    v2 = df[agg_feat2.get_name()][0]
    assert v == 3
    assert v2 == "coke zero"

Exemple #14

0

Afficher le fichier

def test_make_dfeat_of_agg_feat_on_self(es):
    """
    The graph looks like this:

        R       R = Regions, a parent of customers
        |
        C       C = Customers, the dataframe we're trying to predict on
        |
       etc.

    We're trying to calculate a DFeat from C to R on an agg_feat of R on C.
    """
    customer_count_feat = ft.Feature(es["customers"].ww["id"],
                                     parent_dataframe_name="régions",
                                     primitive=Count)

    num_customers_feat = DirectFeature(customer_count_feat,
                                       child_dataframe_name="customers")

    feature_set = FeatureSet([num_customers_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    df = to_pandas(df, index="id")
    v = df[num_customers_feat.get_name()].values[0]
    assert v == 3

Exemple #15

0

Afficher le fichier

def test_make_dfeat_of_agg_feat_through_parent(es):
    """
    The graph looks like this:

        R       C = Customers, the dataframe we're trying to predict on
       / \\     R = Regions, a parent of customers
      S   C     S = Stores, a child of regions
          |
         etc.

    We're trying to calculate a DFeat from C to R on an agg_feat of R on S.
    """
    store_id_feat = IdentityFeature(es["stores"].ww["id"])

    store_count_feat = ft.Feature(store_id_feat,
                                  parent_dataframe_name="régions",
                                  primitive=Count)

    num_stores_feat = DirectFeature(store_count_feat,
                                    child_dataframe_name="customers")

    feature_set = FeatureSet([num_stores_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    df = to_pandas(df, index="id")
    v = df[num_stores_feat.get_name()].values[0]
    assert v == 3

Exemple #16

0

Afficher le fichier

def test_make_agg_feat_where_count_or_device_type_feat(es):
    """
    Feature we're creating is:
    Number of sessions for each customer where the
    number of logs in the session is less than 3
    """
    log_count_feat = ft.Feature(es["log"].ww["id"],
                                parent_dataframe_name="sessions",
                                primitive=Count)

    compare_count = log_count_feat > 1
    compare_device_type = IdentityFeature(
        es["sessions"].ww["device_type"]) == 1
    or_feat = compare_count.OR(compare_device_type)
    feat = ft.Feature(
        es["sessions"].ww["id"],
        parent_dataframe_name="customers",
        where=or_feat,
        primitive=Count,
    )

    feature_set = FeatureSet([feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    df = to_pandas(df, index="id", int_index=True)

    name = feat.get_name()
    instances = df[name]
    assert instances.values[0] == 3

Exemple #17

0

Afficher le fichier

Fichier : test_feature_set_calculator.py Projet : wjsi/featuretools

def test_make_agg_feat_where_count_and_device_type_feat(es):
    """
    Feature we're creating is:
    Number of sessions for each customer where the
    number of logs in the session is less than 3
    """
    log_count_feat = ft.Feature(es['log']['id'],
                                parent_entity=es['sessions'],
                                primitive=Count)

    compare_count = log_count_feat == 1
    compare_device_type = IdentityFeature(es['sessions']['device_type']) == 1
    and_feat = ft.Feature([compare_count, compare_device_type], primitive=And)
    feat = ft.Feature(es['sessions']['id'],
                      parent_entity=es['customers'],
                      where=and_feat,
                      primitive=Count)

    feature_set = FeatureSet([feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    if isinstance(df, dd.DataFrame):
        df = df.compute().set_index('id')
        df.index = pd.Int64Index(df.index)
    name = feat.get_name()
    instances = df[name]
    assert (instances[0] == 1)

Exemple #18

0

Afficher le fichier

def test_make_agg_feat_where_count_feat(es):
    """
    Feature we're creating is:
    Number of sessions for each customer where the
    number of logs in the session is less than 3
    """
    log_count_feat = ft.Feature(es["log"].ww["id"],
                                parent_dataframe_name="sessions",
                                primitive=Count)

    feat = ft.Feature(
        es["sessions"].ww["id"],
        parent_dataframe_name="customers",
        where=log_count_feat > 1,
        primitive=Count,
    )

    feature_set = FeatureSet([feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0, 1]))
    df = to_pandas(df, index="id", sort_index=True)

    name = feat.get_name()
    instances = df[name]
    v0, v1 = instances[0:2]
    assert v0 == 2
    assert v1 == 2

Exemple #19

0

Afficher le fichier

Fichier : test_feature_set_calculator.py Projet : xyutech/featuretools

def test_calls_progress_callback(es):
    # call with all feature types. make sure progress callback calls sum to 1
    identity = ft.Feature(es['customers']['age'])
    direct = ft.Feature(es['cohorts']['cohort_name'], es['customers'])
    agg = ft.Feature(es["sessions"]["id"],
                     parent_entity=es['customers'],
                     primitive=Count)
    agg_apply = ft.Feature(
        es["log"]["datetime"],
        parent_entity=es['customers'],
        primitive=TimeSinceLast
    )  # this feature is handle differently than simple features
    trans = ft.Feature(agg, primitive=CumSum)
    groupby_trans = ft.Feature(agg,
                               primitive=CumSum,
                               groupby=es["customers"]["cohort"])
    all_features = [identity, direct, agg, agg_apply, trans, groupby_trans]

    feature_set = FeatureSet(all_features)
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)

    class MockProgressCallback:
        def __init__(self):
            self.total = 0

        def __call__(self, update):
            self.total += update

    mock_progress_callback = MockProgressCallback()

    instance_ids = [0, 1, 2]
    calculator.run(np.array(instance_ids), mock_progress_callback)

    assert np.isclose(mock_progress_callback.total, 1)

    # testing again with a time_last with no data
    feature_set = FeatureSet(all_features)
    calculator = FeatureSetCalculator(es,
                                      time_last=pd.Timestamp("1950"),
                                      feature_set=feature_set)

    mock_progress_callback = MockProgressCallback()
    calculator.run(np.array(instance_ids), mock_progress_callback)

    assert np.isclose(mock_progress_callback.total, 1)

Exemple #20

0

Afficher le fichier

def test_make_trans_feat(es):
    f = ft.Feature(es['log']['datetime'], primitive=Hour)

    feature_set = FeatureSet([f])
    calculator = FeatureSetCalculator(es, feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[f.get_name()][0]
    assert v == 10

Exemple #21

0

Afficher le fichier

def test_percentile_with_cutoff(es):
    v = ft.Feature(es['log']['value'])
    p = ft.Feature(v, primitive=Percentile)
    feature_set = FeatureSet([p])
    calculator = FeatureSetCalculator(es, feature_set,
                                      pd.Timestamp('2011/04/09 10:30:13'))
    df = calculator.run(np.array([2]))
    assert df[p.get_name()].tolist()[0] == 1.0

Exemple #22

0

Afficher le fichier

def test_diff_single_value(es):
    diff = ft.Feature(es['stores']['num_square_feet'],
                      groupby=es['stores'][u'région_id'],
                      primitive=Diff)
    feature_set = FeatureSet([diff])
    calculator = FeatureSetCalculator(es, feature_set=feature_set)
    df = calculator.run(np.array([4]))
    assert df[diff.get_name()][4] == 6000.0

Exemple #23

0

Afficher le fichier

Fichier : test_direct_features.py Projet : yuv4r4j/featuretools

def test_direct_from_identity(es):
    device = es['sessions']['device_type']
    d = DirectFeature(base_feature=device, child_entity=es['log'])

    feature_set = FeatureSet([d])
    calculator = FeatureSetCalculator(es, feature_set=feature_set, time_last=None)
    df = calculator.run([0, 5])
    v = df[d.get_name()].tolist()
    assert v == [0, 1]

Exemple #24

0

Afficher le fichier

def test_diff_single_value_is_nan(es):
    diff = ft.Feature(es['stores']['num_square_feet'],
                      groupby=es['stores'][u'région_id'],
                      primitive=Diff)
    feature_set = FeatureSet([diff])
    calculator = FeatureSetCalculator(es, feature_set=feature_set)
    df = calculator.run(np.array([5]))
    assert df.shape[0] == 1
    assert df[diff.get_name()].dropna().shape[0] == 0

Exemple #25

0

Afficher le fichier

def test_precalculated_features(pd_es):
    error_msg = (
        "This primitive should never be used because the features are precalculated"
    )

    class ErrorPrim(AggregationPrimitive):
        """A primitive whose function raises an error."""

        name = "error_prim"
        input_types = [ColumnSchema(semantic_tags={"numeric"})]
        return_type = ColumnSchema(semantic_tags={"numeric"})

        def get_function(self, agg_type="pandas"):
            def error(s):
                raise RuntimeError(error_msg)

            return error

    value = ft.Feature(pd_es["log"].ww["value"])
    agg = ft.Feature(value,
                     parent_dataframe_name="sessions",
                     primitive=ErrorPrim)
    agg2 = ft.Feature(agg,
                      parent_dataframe_name="customers",
                      primitive=ErrorPrim)
    direct = ft.Feature(agg2, dataframe_name="sessions")

    # Set up a FeatureSet which knows which features are precalculated.
    precalculated_feature_trie = Trie(default=set,
                                      path_constructor=RelationshipPath)
    precalculated_feature_trie.get_node(direct.relationship_path).value.add(
        agg2.unique_name())
    feature_set = FeatureSet(
        [direct], approximate_feature_trie=precalculated_feature_trie)

    # Fake precalculated data.
    values = [0, 1, 2]
    parent_fm = pd.DataFrame({agg2.get_name(): values})
    precalculated_fm_trie = Trie(path_constructor=RelationshipPath)
    precalculated_fm_trie.get_node(direct.relationship_path).value = parent_fm

    calculator = FeatureSetCalculator(
        pd_es,
        feature_set=feature_set,
        precalculated_features=precalculated_fm_trie)

    instance_ids = [0, 2, 3, 5]
    fm = calculator.run(np.array(instance_ids))

    assert list(
        fm[direct.get_name()]) == [values[0], values[0], values[1], values[2]]

    # Calculating without precalculated features should error.
    with pytest.raises(RuntimeError, match=error_msg):
        FeatureSetCalculator(pd_es,
                             feature_set=FeatureSet([direct
                                                     ])).run(instance_ids)

Exemple #26

0

Afficher le fichier

def test_direct_squared(es):
    feature = IdentityFeature(es['log']['value'])
    squared = feature * feature
    feature_set = FeatureSet([feature, squared])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = to_pandas(calculator.run(np.array([0, 1, 2])))
    for i, row in df.iterrows():
        assert (row[0] * row[0]) == row[1]

Exemple #27

0

Afficher le fichier

Fichier : test_feature_set_calculator.py Projet : stenpiren/featuretools

def test_make_agg_feat_of_grandchild_entity(es):
    agg_feat = ft.Feature(es['log']['id'], parent_entity=es['customers'], primitive=Count)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[agg_feat.get_name()][0]
    assert (v == 10)

Exemple #28

0

Afficher le fichier

Fichier : test_feature_set_calculator.py Projet : xyutech/featuretools

def test_make_identity(es):
    f = IdentityFeature(es['log']['datetime'])

    feature_set = FeatureSet([f])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[f.get_name()][0]
    assert (v == datetime(2011, 4, 9, 10, 30, 0))

Exemple #29

0

Afficher le fichier

Fichier : test_feature_set_calculator.py Projet : xyutech/featuretools

def test_make_dfeat(es):
    f = DirectFeature(es['customers']['age'], child_entity=es['sessions'])

    feature_set = FeatureSet([f])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[f.get_name()][0]
    assert (v == 33)

Exemple #30

0

Afficher le fichier

Fichier : test_feature_set_calculator.py Projet : stenpiren/featuretools

def test_make_agg_feat_of_identity_index_variable(es):
    agg_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], primitive=Count)

    feature_set = FeatureSet([agg_feat])
    calculator = FeatureSetCalculator(es,
                                      time_last=None,
                                      feature_set=feature_set)
    df = calculator.run(np.array([0]))
    v = df[agg_feat.get_name()][0]
    assert (v == 5)