Ejemplo n.º 1
0
def cache_data(hparams, filename, flag):
    if hparams.data_format == 'ffm':
        cache_obj = FfmCache()
    elif hparams.data_format == 'din':
        cache_obj = DinCache()
    elif hparams.data_format == 'cccfnet':
        cache_obj = CCCFNetCache()
    else:
        raise ValueError(
            "data format must be ffm, din, cccfnet, this format not defined {0}"
            .format(hparams.data_format))
    if not os.path.exists(util.CACHE_DIR):
        os.mkdir(util.CACHE_DIR)
    if flag == 'train':
        #hparams.train_file_cache = ./cache/batch_size_128_train.userid.tfrecord
        hparams.train_file_cache = util.convert_cached_name(
            hparams.train_file, hparams.batch_size)
        cached_name = hparams.train_file_cache
        sample_num_path = util.TRAIN_NUM
        impression_id_path = util.TRAIN_IMPRESSION_ID
    elif flag == 'eval':
        hparams.eval_file_cache = util.convert_cached_name(
            hparams.eval_file, hparams.batch_size)
        cached_name = hparams.eval_file_cache
        sample_num_path = util.EVAL_NUM
        impression_id_path = util.EVAL_IMPRESSION_ID
    elif flag == 'test':
        hparams.test_file_cache = util.convert_cached_name(
            hparams.test_file, hparams.batch_size)
        cached_name = hparams.test_file_cache
        sample_num_path = util.TEST_NUM
        impression_id_path = util.TEST_IMPRESSION_ID
    elif flag == 'infer':
        hparams.infer_file_cache = util.convert_cached_name(
            hparams.infer_file, hparams.batch_size)
        cached_name = hparams.infer_file_cache
        sample_num_path = util.INFER_NUM
        impression_id_path = util.INFER_IMPRESSION_ID
    else:
        raise ValueError("flag must be train, eval, test, infer")
    print('cache filename:', filename)

    #如果没有record文件,则开始利用cache_obj转换
    if not os.path.isfile(cached_name):
        print('has not cached file, begin cached...')
        start_time = time.time()
        sample_num, impression_id_list = cache_obj.write_tfrecord(
            filename, cached_name, hparams)
        util.print_time("caced file used time", start_time)
        print("data sample num:{0}".format(sample_num))
        with open(sample_num_path, 'w') as f:
            f.write(str(sample_num) + '\n')
        with open(impression_id_path, 'w') as f:
            for impression_id in impression_id_list:
                f.write(str(impression_id) + '\n')
Ejemplo n.º 2
0
def cache_data(hparams, filename, flag):
    if hparams.data_format == 'ffm':
        cache_obj = FfmCache()
    elif hparams.data_format == 'din':
        cache_obj = DinCache()
    elif hparams.data_format == 'cccfnet':
        cache_obj = CCCFNetCache()
    else:
        raise ValueError(
            "data format must be ffm, din, cccfnet, this format not defined {0}".format(hparams.data_format))
    if not os.path.exists(util.CACHE_DIR):
        os.mkdir(util.CACHE_DIR)
    if flag == 'train':
        hparams.train_file_cache = util.convert_cached_name(hparams.train_file, hparams.batch_size)
        cached_name = hparams.train_file_cache
        sample_num_path = util.TRAIN_NUM
        impression_id_path = util.TRAIN_IMPRESSION_ID
    elif flag == 'eval':
        hparams.eval_file_cache = util.convert_cached_name(hparams.eval_file, hparams.batch_size)
        cached_name = hparams.eval_file_cache
        sample_num_path = util.EVAL_NUM
        impression_id_path = util.EVAL_IMPRESSION_ID
    elif flag == 'test':
        hparams.test_file_cache = util.convert_cached_name(hparams.test_file, hparams.batch_size)
        cached_name = hparams.test_file_cache
        sample_num_path = util.TEST_NUM
        impression_id_path = util.TEST_IMPRESSION_ID
    elif flag == 'infer':
        hparams.infer_file_cache = util.convert_cached_name(hparams.infer_file, hparams.batch_size)
        cached_name = hparams.infer_file_cache
        sample_num_path = util.INFER_NUM
        impression_id_path = util.INFER_IMPRESSION_ID
    else:
        raise ValueError("flag must be train, eval, test, infer")
    print('cache filename:', filename)
    if not os.path.isfile(cached_name):
        print('has not cached file, begin cached...')
        start_time = time.time()
        sample_num, impression_id_list = cache_obj.write_tfrecord(filename, cached_name, hparams)
        util.print_time("caced file used time", start_time)
        print("data sample num:{0}".format(sample_num))
        with open(sample_num_path, 'w') as f:
            f.write(str(sample_num) + '\n')
        with open(impression_id_path, 'w') as f:
            for impression_id in impression_id_list:
                f.write(str(impression_id) + '\n')
Ejemplo n.º 3
0
def cache_data(hparams, filename, flag):
    if hparams.data_format == 'ffm':
        cache_obj = FfmCache()
    else:
        raise ValueError(
            "data format must be ffm, this format not defined {0}".format(
                hparams.data_format))
    if not os.path.exists(util.CACHE_DIR):
        os.mkdir(util.CACHE_DIR)
    if flag == 'train':
        hparams.train_file_cache = util.convert_cached_name(
            hparams.train_file, hparams.batch_size)
        cached_name = hparams.train_file_cache
        sample_num_path = util.TRAIN_NUM
    elif flag == 'eval':
        hparams.eval_file_cache = util.convert_cached_name(
            hparams.eval_file, hparams.batch_size)
        cached_name = hparams.eval_file_cache
        sample_num_path = util.EVAL_NUM
    elif flag == 'test':
        hparams.test_file_cache = util.convert_cached_name(
            hparams.test_file, hparams.batch_size)
        cached_name = hparams.test_file_cache
        sample_num_path = util.TEST_NUM
    elif flag == 'infer':
        hparams.infer_file_cache = util.convert_cached_name(
            hparams.infer_file, hparams.batch_size)
        cached_name = hparams.infer_file_cache
        sample_num_path = util.INFER_NUM
    else:
        raise ValueError("flag must be train, eval, test, infer")
    print('cache filename:', filename)
    if not os.path.isfile(cached_name):
        print('has not cached file, begin to cache')
        start_time = time.time()
        sample_num = cache_obj.write_tfrecord(filename, cached_name,
                                              hparams)[0]
        util.print_time("cached as {} used time".format(cached_name),
                        start_time)
        print("data sample num:{0}".format(sample_num))
        with open(sample_num_path, 'w') as f:
            f.write(str(sample_num) + '\n')