Ejemplo n.º 1
0
def main():
    log = load_hdfs(TEST_FOLDER + 'data_log.hd5')
    examples = load_hdfs(TEST_FOLDER + 'data_examples.hd5')
    meta_features(TEST_FOLDER,
                  META_FOLDER,
                  log,
                  examples,
                  latent=None,
                  redo=False)
Ejemplo n.º 2
0
def main():
    log = load_hdfs(TEST_FOLDER + 'data_log.hd5')
    examples = load_hdfs(TEST_FOLDER + 'data_examples.hd5')

    examples = pop_features(TEST_FOLDER, log, examples)
    examples = crawl_features(TEST_FOLDER, CRAWL_FOLDER, log, examples)
    examples = geo_features(TEST_FOLDER, CRAWL_FOLDER, log, examples)
    examples = latent_sim_features(TEST_FOLDER, log, examples, LATENT_FOLDER)

    rank_features(TEST_FOLDER, log, examples, redo=True)
Ejemplo n.º 3
0
def main():
    log = load_hdfs(TEST_FOLDER + 'data_log.hd5')
    examples = load_hdfs(TEST_FOLDER + 'data_examples.hd5')
    latent_sim_features(TEST_FOLDER,
                        log,
                        examples,
                        latent_path=LATENT_FOLDER,
                        keys=KEYS,
                        sizes=SIZES,
                        redo=True)
Ejemplo n.º 4
0
Archivo: price.py Proyecto: rn5l/rsc19
def main():
    log = load_hdfs(TEST_FOLDER + 'data_log.hd5')
    examples = load_hdfs(TEST_FOLDER + 'data_examples.hd5')
    log = clean_test(log)
    price_features(TEST_FOLDER,
                   log,
                   examples,
                   min_occurences=MIN_OCCURENCES,
                   hidden=HIDDEN,
                   fillna_mean=FILLNA_MEAN,
                   redo=True)
Ejemplo n.º 5
0
def main():
    log = load_hdfs( DATA_FOLDER + 'data_log.hd5' )
    #examples = load_hdfs( DATA_FOLDER + 'data_examples.hd5' )
    create_latent_factors( log, size=SIZEA, actions=ACTIONS_ITEM, values=VALUE_ITEM, key=KEY_ITEM, method=METHOD )
    gc.collect()
    create_latent_factors( log, size=SIZEA, actions=ACTIONS_ITEM, values=VALUE_ITEM, key=KEY_ITEM, method=METHOD )
    gc.collect()
    create_latent_factors( log, size=SIZEB, actions=ACTIONS_CLICK, values=VALUE_CLICK, key=KEY_CLICK, method=METHOD )
    gc.collect()
    create_latent_factors( log, size=SIZEB, actions=ACTIONS_CLICK, values=VALUE_CLICK, key=KEY_CLICK, method=METHOD )
    gc.collect()
Ejemplo n.º 6
0
def main():
    log = load_hdfs(TEST_FOLDER + 'data_log.hd5')
    examples = load_hdfs(TEST_FOLDER + 'data_examples.hd5')
    properties_features(TEST_FOLDER, META_FOLDER, log, examples, redo=True)
Ejemplo n.º 7
0
def main():
    log = load_hdfs( TEST_FOLDER + 'data_log.hd5' )
    examples = load_hdfs( TEST_FOLDER + 'data_examples.hd5' )
    combine_features( TEST_FOLDER, log, examples, redo=True )
Ejemplo n.º 8
0
def main():
    log = load_hdfs( TEST_FOLDER + 'data_log.hd5' )
    examples = load_hdfs( TEST_FOLDER + 'data_examples.hd5' )
    log = clean_test(log)
    position_features( TEST_FOLDER, log, examples, redo=True )
Ejemplo n.º 9
0
def create_set(base_path=SET, key='dataset', conf={}, redo=False):

    name = key

    path = Path(base_path + 'sets/' + name + '.fthr')
    if path.is_file() and not redo:
        print('loaded')
        examples = load_feather(path)
        gc.collect()
    else:
        print('create')
        log = load_hdfs(base_path + 'data_log.hd5')
        examples = load_hdfs(base_path + 'data_examples.hd5')
        if 'current_filters' in set(examples.columns):
            print('current_filters')
            del examples['current_filters']
        if 'session_id_pre' in set(examples.columns):
            print('session_id_pre')
            del examples['session_id_pre']

        examples = pop_features(conf['path_pop'],
                                log,
                                examples,
                                hidden=conf['pop_hidden'],
                                min_pop=conf['min_pop'],
                                train_only=conf['train_only'],
                                redo=redo)
        examples = price_features(conf['path_price'],
                                  log,
                                  examples,
                                  min_occurences=conf['min_occurences'],
                                  hidden=conf['price_hidden'],
                                  train_only=conf['train_only'],
                                  fillna_mean=conf['fillna_mean'],
                                  redo=redo)
        examples = session_features(conf['path_session'],
                                    log,
                                    examples,
                                    crawl_path=conf['path_crawl'],
                                    redo=redo)
        examples = crawl_features(base_path,
                                  conf['path_crawl'],
                                  log,
                                  examples,
                                  redo=redo)
        examples = geo_features(base_path,
                                conf['path_crawl'],
                                log,
                                examples,
                                redo=redo)
        examples = meta_features(base_path,
                                 conf['path_meta'],
                                 log,
                                 examples,
                                 latent=conf['meta_latent'],
                                 redo=redo)
        examples = user_features(conf['path_session'],
                                 log,
                                 examples,
                                 crawl_path=conf['path_crawl'],
                                 poi_path=conf['path_poi'],
                                 redo=redo)
        examples = position_features(base_path, log, examples, redo=redo)
        examples = properties_features(base_path,
                                       conf['path_meta'],
                                       log,
                                       examples,
                                       redo=redo)
        #examples = latent_features(base_path, log, examples, latent_path=conf['path_latent'], redo=redo)
        examples = latent_sim_features(base_path,
                                       log,
                                       examples,
                                       latent_path=conf['path_latent'],
                                       redo=redo)
        examples = combine_features(base_path, log, examples, redo=redo)
        examples = rank_features(base_path, log, examples, redo=redo)
        examples = time_features(base_path, log, examples, redo=redo)
        examples = list_context_features(base_path, log, examples, redo=redo)
        examples = stars_features(base_path,
                                  conf['path_meta'],
                                  log,
                                  examples,
                                  redo=redo)
        #examples = prediction_features(base_path, log, examples, redo=redo)

        #examples.to_csv( base_path + 'sets/' + name + '.csv' )
        write_feather(examples, path)

        del log
        gc.collect()

    #print_col_list( examples.columns )
    #examples = reduce_mem_usage(examples)
    return examples
Ejemplo n.º 10
0
def main():
    log = load_hdfs( DATA_FOLDER + 'data_log.hd5' )
    #examples = load_hdfs( DATA_FOLDER + 'data_examples.hd5' )
    create_latent_factors( log, size=SIZE, actions=ACTIONS_ITEM, key=KEY_ITEM )
    create_latent_factors( log, size=SIZE, actions=ACTIONS_CLICK, key=KEY_CLICK )
Ejemplo n.º 11
0
def main():
    meta = load_hdfs(BASE_PATH + PREP + 'meta_extended.hd5')
    keep = meta[['item_id', 'properties_code']]

    create_latent_factors(keep)