def test_copy_features_does_not_copy_entityset(es): agg = Sum(es['log']['value'], es['sessions']) agg_where = Sum(es['log']['value'], es['sessions'], where=IdentityFeature(es['log']['value']) == 2) agg_use_previous = Sum(es['log']['value'], es['sessions'], use_previous='4 days') agg_use_previous_where = Sum(es['log']['value'], es['sessions'], where=IdentityFeature( es['log']['value']) == 2, use_previous='4 days') features = [agg, agg_where, agg_use_previous, agg_use_previous_where] in_memory_size = getsize(locals()) copied = [f.copy() for f in features] new_in_memory_size = getsize(locals()) assert new_in_memory_size < 2 * in_memory_size for f, c in zip(features, copied): assert f.entityset assert c.entityset assert id(f.entityset) == id(c.entityset) if f.where: assert c.where assert id(f.where.entityset) == id(c.where.entityset) for bf, bf_c in zip(f.base_features, c.base_features): assert id(bf.entityset) == id(bf_c.entityset) if bf.where: assert bf_c.where assert id(bf.where.entityset) == id(bf_c.where.entityset)
def test_copy_features_does_not_copy_entityset(es): agg = Sum(es['log']['value'], es['sessions']) agg_where = Sum(es['log']['value'], es['sessions'], where=IdentityFeature(es['log']['value']) == 2) agg_use_previous = Sum(es['log']['value'], es['sessions'], use_previous='4 days') agg_use_previous_where = Sum(es['log']['value'], es['sessions'], where=IdentityFeature(es['log']['value']) == 2, use_previous='4 days') features = [agg, agg_where, agg_use_previous, agg_use_previous_where] in_memory_size = getsize(locals()) copied = [f.copy() for f in features] new_in_memory_size = getsize(locals()) assert new_in_memory_size < 2 * in_memory_size for f, c in zip(features, copied): assert f.entityset assert c.entityset assert id(f.entityset) == id(c.entityset) if f.where: assert c.where assert id(f.where.entityset) == id(c.where.entityset) for bf, bf_c in zip(f.base_features, c.base_features): assert id(bf.entityset) == id(bf_c.entityset) if bf.where: assert bf_c.where assert id(bf.where.entityset) == id(bf_c.where.entityset)
def test_pickle_features(es): dfs_obj = DeepFeatureSynthesis(target_entity_id='sessions', entityset=es, filters=[], agg_primitives=[Last, Mean], trans_primitives=[], max_features=20) features_no_pickle = dfs_obj.build_features() dir_path = os.path.dirname(os.path.realpath(__file__)) filepath = os.path.join(dir_path, 'test_feature') es_filepath = os.path.join(dir_path, 'test_entityset') # pickle entityset save_obj_pickle(features_no_pickle[0].entityset, es_filepath) ft.save_features(features_no_pickle, filepath) features_pickle = ft.load_features(filepath, es) for feat_1, feat_2 in zip(features_no_pickle, features_pickle): assert feat_1.hash() == feat_2.hash() assert feat_1.entityset == feat_2.entityset # file is smaller than entityset in memory assert os.path.getsize(filepath) < getsize(feat_1.entityset) # file is smaller than entityset pickled assert os.path.getsize(filepath) < os.path.getsize(es_filepath) os.remove(filepath) os.remove(es_filepath)
def test_pickle_features_with_custom_primitive(es): NewMean = make_agg_primitive( np.nanmean, name="NewMean", input_types=[Numeric], return_type=Numeric, description="Calculate means ignoring nan values") dfs_obj = DeepFeatureSynthesis(target_entity_id='sessions', entityset=es, agg_primitives=[Last, Mean, NewMean], trans_primitives=[], max_features=20) features_no_pickle = dfs_obj.build_features() assert any([isinstance(feat, NewMean) for feat in features_no_pickle]) dir_path = os.path.dirname(os.path.realpath(__file__)) filepath = os.path.join(dir_path, 'test_feature') es_filepath = os.path.join(dir_path, 'test_entityset') # pickle entityset save_obj_pickle(es, es_filepath) ft.save_features(features_no_pickle, filepath) features_pickle = ft.load_features(filepath) for feat_1, feat_2 in zip(features_no_pickle, features_pickle): assert feat_1.hash() == feat_2.hash() assert feat_1.entityset == feat_2.entityset # file is smaller than entityset in memory assert os.path.getsize(filepath) < getsize(es) # file is smaller than entityset pickled assert os.path.getsize(filepath) < os.path.getsize(es_filepath) os.remove(filepath) os.remove(es_filepath)
def test_pickle_features_with_custom_primitive(es): NewMean = make_agg_primitive( np.nanmean, name="NewMean", input_types=[Numeric], return_type=Numeric, description="Calculate means ignoring nan values") dfs_obj = DeepFeatureSynthesis(target_entity_id='sessions', entityset=es, agg_primitives=[Last, Mean, NewMean], trans_primitives=[], max_features=20) features_no_pickle = dfs_obj.build_features() assert any([isinstance(feat, NewMean) for feat in features_no_pickle]) dir_path = os.path.dirname(os.path.realpath(__file__)) filepath = os.path.join(dir_path, 'test_feature') es_filepath = os.path.join(dir_path, 'test_entityset') # pickle entityset save_obj_pickle(es, es_filepath) ft.save_features(features_no_pickle, filepath) features_pickle = ft.load_features(filepath) for feat_1, feat_2 in zip(features_no_pickle, features_pickle): assert feat_1.hash() == feat_2.hash() assert feat_1.entityset == feat_2.entityset # file is smaller than entityset in memory assert os.path.getsize(filepath) < getsize(es) # file is smaller than entityset pickled assert os.path.getsize(filepath) < os.path.getsize(es_filepath) os.remove(filepath) os.remove(es_filepath)
def test_pickle_features(es): dfs_obj = DeepFeatureSynthesis(target_entity_id='sessions', entityset=es, agg_primitives=[Last, Mean], trans_primitives=[], max_features=20) features_no_pickle = dfs_obj.build_features() dir_path = os.path.dirname(os.path.realpath(__file__)) filepath = os.path.join(dir_path, 'test_feature') es_filepath = os.path.join(dir_path, 'test_entityset') # pickle entityset save_obj_pickle(es, es_filepath) ft.save_features(features_no_pickle, filepath) features_pickle = ft.load_features(filepath) for feat_1, feat_2 in zip(features_no_pickle, features_pickle): assert feat_1.hash() == feat_2.hash() assert feat_1.entityset == feat_2.entityset # file is smaller than entityset in memory assert os.path.getsize(filepath) < getsize(es) # file is smaller than entityset pickled assert os.path.getsize(filepath) < os.path.getsize(es_filepath) os.remove(filepath) os.remove(es_filepath)