Ejemplo n.º 1
0
def latent_sim_features(base_path,
                        log,
                        examples,
                        latent_path=None,
                        keys=KEYS,
                        sizes=SIZES,
                        redo=False):

    name = 'latent_sim_features'
    if latent_path is None:
        latent_path = base_path

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(log,
                                         examples,
                                         latent_path=latent_path,
                                         keys=keys,
                                         sizes=sizes)
        examples = reduce_mem_usage(examples, cols=cols)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','label','step'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Ejemplo n.º 2
0
Archivo: time.py Proyecto: rn5l/rsc19
def time_features(base_path,
                  log,
                  examples,
                  preprocessed_path=PREPROCESSED_FOLDER,
                  redo=False):

    name = 'time_features'

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(log,
                                         examples,
                                         preprocessed_path=preprocessed_path)
        examples = reduce_mem_usage(examples, cols=cols)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','label','step'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Ejemplo n.º 3
0
def pop_features(base_path,
                 log,
                 examples,
                 hidden=False,
                 min_pop=None,
                 train_only=False,
                 redo=False):

    name = 'pop_features'
    if hidden:
        name += '_hidden'
    if min_pop is not None:
        name += '_mp' + str(min_pop)
    if train_only:
        name += '_trainonly'

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(log,
                                         examples,
                                         hidden=hidden,
                                         min_pop=min_pop,
                                         train_only=train_only)
        examples = reduce_mem_usage(examples)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','prices','label'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Ejemplo n.º 4
0
def meta_features(base_path,
                  meta_path,
                  log,
                  examples,
                  latent='d2v',
                  latent_size=16,
                  redo=False):

    name = 'meta_features'
    if latent == None:
        name += '_all'
    else:
        name += '_' + str(latent_size)

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(meta_path,
                                         log,
                                         examples,
                                         latent_prefix=latent,
                                         latent_size=latent_size)
        examples = reduce_mem_usage(examples)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','prices','label'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Ejemplo n.º 5
0
def crawl_features(base_path, crawl_path, log, examples, redo=False):
    
    name = 'crawl_features'
    
    path = Path( base_path + 'features/' + name + '.fthr' )
    if path.is_file() and not redo:
        features = load_feather( path )
        features = features[features.session_id.isin( examples.session_id.unique() )]
        examples = copy_features( examples, features )
    else:
        examples, cols = create_features( crawl_path, log, examples )
        examples = reduce_mem_usage(examples)
        write_feather( examples[['session_id','impressions'] + list(cols)], path )
        #examples[['session_id','impressions','prices','label'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list( cols )
    return examples
Ejemplo n.º 6
0
def session_features(base_path, log, examples, price_path=None, crawl_path=CRAWL_FOLDER, poi_path=POI_FOLDER, redo=False):
    
    name = 'session_features'
    if price_path is None:
        price_path = base_path
    
    path = Path( base_path + 'features/' + name + '.fthr' )
    if path.is_file() and not redo:
        features = load_feather( path )
        features = features[features.session_id.isin( examples.session_id.unique() )]
        examples = copy_features( examples, features )
    else:
        examples, cols = create_features( log, examples, price_path=price_path, crawl_path=crawl_path, poi_path=poi_path )
        examples = reduce_mem_usage(examples, cols=cols)
        write_feather( examples[['session_id','impressions'] + list(cols)], path )
        #examples[['session_id','impressions','label','step'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list( cols )
        
    return examples
Ejemplo n.º 7
0
def list_context_features(base_path, log, examples, shifts=SHIFTS, redo=False):

    name = 'list_context_features_' + str(shifts)

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(log, examples, shifts=shifts)
        examples = reduce_mem_usage(examples)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','prices','label','position'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples