Ejemplo n.º 1
0
def test_make_deep_agg_feat_of_dfeat_of_agg_feat(entityset, backend):
    """
    The graph looks like this (higher implies parent):

          C     C = Customers, the entity we're trying to predict on
          |     S = Sessions, a child of Customers
      P   S     L = Log, a child of both Sessions and Log
       \ /      P = Products, a parent of Log which is not a descendent of customers
        L

    We're trying to calculate a DFeat from L to P on an agg_feat of P on L, and
    then aggregate it with another agg_feat of C on L.
    """
    log_count_feat = Count(entityset['log']['id'],
                           parent_entity=entityset['products'])

    product_purchases_feat = DirectFeature(log_count_feat,
                                           child_entity=entityset['log'])

    purchase_popularity = Mean(product_purchases_feat,
                               parent_entity=entityset['customers'])

    pandas_backend = backend([purchase_popularity])
    df = pandas_backend.calculate_all_features(instance_ids=[0],
                                               time_last=None)
    v = df[purchase_popularity.get_name()][0]
    assert (v == 38.0 / 10.0)
Ejemplo n.º 2
0
def test_mean_nan(es):
    array = pd.Series([5, 5, 5, 5, 5])
    mean_func_nans_default = Mean().get_function()
    mean_func_nans_false = Mean(skipna=False).get_function()
    mean_func_nans_true = Mean(skipna=True).get_function()
    assert mean_func_nans_default(array) == 5
    assert mean_func_nans_false(array) == 5
    assert mean_func_nans_true(array) == 5
    array = pd.Series([5, np.nan, np.nan, np.nan, np.nan, 10])
    assert mean_func_nans_default(array) == 7.5
    assert isnan(mean_func_nans_false(array))
    assert mean_func_nans_true(array) == 7.5
    array_nans = pd.Series([np.nan, np.nan, np.nan, np.nan])
    assert isnan(mean_func_nans_default(array_nans))
    assert isnan(mean_func_nans_false(array_nans))
    assert isnan(mean_func_nans_true(array_nans))

    # test naming
    default_feat = ft.Feature(es["log"]["value"],
                              parent_entity=es["customers"],
                              primitive=Mean)
    assert default_feat.get_name() == "MEAN(log.value)"
    ignore_nan_feat = ft.Feature(es["log"]["value"],
                                 parent_entity=es["customers"],
                                 primitive=Mean(skipna=True))
    assert ignore_nan_feat.get_name() == "MEAN(log.value)"
    include_nan_feat = ft.Feature(es["log"]["value"],
                                  parent_entity=es["customers"],
                                  primitive=Mean(skipna=False))
    assert include_nan_feat.get_name() == "MEAN(log.value, skipna=False)"
def test_make_deep_agg_feat_of_dfeat_of_agg_feat(entityset, backend):
    """
    The graph looks like this (higher implies parent):

          C     C = Customers, the entity we're trying to predict on
          |     S = Sessions, a child of Customers
      P   S     L = Log, a child of both Sessions and Log
       \ /      P = Products, a parent of Log which is not a descendent of customers
        L

    We're trying to calculate a DFeat from L to P on an agg_feat of P on L, and
    then aggregate it with another agg_feat of C on L.
    """
    log_count_feat = Count(entityset['log']['id'],
                           parent_entity=entityset['products'])

    product_purchases_feat = DirectFeature(log_count_feat,
                                           child_entity=entityset['log'])

    purchase_popularity = Mean(product_purchases_feat,
                               parent_entity=entityset['customers'])

    pandas_backend = backend([purchase_popularity])
    df = pandas_backend.calculate_all_features(instance_ids=[0],
                                               time_last=None)
    v = df[purchase_popularity.get_name()][0]
    assert (v == 38.0 / 10.0)
Ejemplo n.º 4
0
def test_check_input_types(es):
    count = Count(es["sessions"]["id"], es["customers"])
    mean = Mean(count, es[u"régions"])
    assert mean._check_input_types()

    boolean = count > 3
    mean = Mean(count, es[u"régions"], where=boolean)
    assert mean._check_input_types()
Ejemplo n.º 5
0
def test_mean_nan():
    array = np.array([5, 5, 5, 5, 5])
    mean_func_nans_default = Mean().get_function()
    mean_func_nans_false = Mean(ignore_nans=False).get_function()
    mean_func_nans_true = Mean(ignore_nans=True).get_function()
    assert mean_func_nans_default(array) == 5
    assert mean_func_nans_false(array) == 5
    assert mean_func_nans_true(array) == 5
    array = np.array([5, np.nan, np.nan, np.nan, np.nan, 10])
    assert isnan(mean_func_nans_default(array))
    assert isnan(mean_func_nans_false(array))
    assert mean_func_nans_true(array) == 7.5
    array_nans = np.array([np.nan, np.nan, np.nan, np.nan])
    assert isnan(mean_func_nans_default(array))
    assert isnan(mean_func_nans_false(array_nans))
    assert isnan(mean_func_nans_true(array_nans))
Ejemplo n.º 6
0
def test_deep_agg_feat_chain(entityset, backend):
    """
    Agg feat of agg feat:
        region.Mean(customer.Count(Log))
    """
    customer_count_feat = Count(entityset['log']['id'],
                                parent_entity=entityset['customers'])

    region_avg_feat = Mean(customer_count_feat,
                           parent_entity=entityset[u'régions'])

    pandas_backend = backend([region_avg_feat])
    df = pandas_backend.calculate_all_features(instance_ids=['United States'],
                                               time_last=None)
    v = df[region_avg_feat.get_name()][0]
    assert (v == 17 / 3.)
Ejemplo n.º 7
0
def test_make_compare_feat(entityset, backend):
    """
    Feature we're creating is:
    Number of sessions for each customer where the
    number of logs in the session is less than 3
    """
    Count.max_stack_depth = 2
    log_count_feat = Count(entityset['log']['id'],
                           parent_entity=entityset['sessions'])

    mean_agg_feat = Mean(log_count_feat, parent_entity=entityset['customers'])

    mean_feat = DirectFeature(mean_agg_feat,
                              child_entity=entityset['sessions'])

    feat = log_count_feat > mean_feat

    pandas_backend = backend([feat])
    df = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2],
                                               time_last=None)
    name = feat.get_name()
    instances = df[name]
    v0, v1, v2 = instances[0:3]
    assert v0
    assert v1
    assert not v2
def test_deep_agg_feat_chain(entityset, backend):
    """
    Agg feat of agg feat:
        region.Mean(customer.Count(Log))
    """
    customer_count_feat = Count(entityset['log']['id'],
                                parent_entity=entityset['customers'])

    region_avg_feat = Mean(customer_count_feat,
                           parent_entity=entityset['regions'])

    pandas_backend = backend([region_avg_feat])
    df = pandas_backend.calculate_all_features(instance_ids=['United States'],
                                               time_last=None)
    v = df[region_avg_feat.get_name()][0]
    assert (v == 17 / 3.)
Ejemplo n.º 9
0
def test_check_input_types(es):
    count = Count(es["sessions"]["id"], es["customers"])
    mean = Mean(count, es[u"régions"])
    assert mean._check_input_types()

    boolean = count > 3
    mean = Mean(count, es[u"régions"], where=boolean)
    assert mean._check_input_types()