def cache_data(hparams, filename, flag): if hparams.data_format == 'ffm': cache_obj = FfmCache() elif hparams.data_format == 'din': cache_obj = DinCache() elif hparams.data_format == 'cccfnet': cache_obj = CCCFNetCache() else: raise ValueError( "data format must be ffm, din, cccfnet, this format not defined {0}" .format(hparams.data_format)) if not os.path.exists(util.CACHE_DIR): os.mkdir(util.CACHE_DIR) if flag == 'train': #hparams.train_file_cache = ./cache/batch_size_128_train.userid.tfrecord hparams.train_file_cache = util.convert_cached_name( hparams.train_file, hparams.batch_size) cached_name = hparams.train_file_cache sample_num_path = util.TRAIN_NUM impression_id_path = util.TRAIN_IMPRESSION_ID elif flag == 'eval': hparams.eval_file_cache = util.convert_cached_name( hparams.eval_file, hparams.batch_size) cached_name = hparams.eval_file_cache sample_num_path = util.EVAL_NUM impression_id_path = util.EVAL_IMPRESSION_ID elif flag == 'test': hparams.test_file_cache = util.convert_cached_name( hparams.test_file, hparams.batch_size) cached_name = hparams.test_file_cache sample_num_path = util.TEST_NUM impression_id_path = util.TEST_IMPRESSION_ID elif flag == 'infer': hparams.infer_file_cache = util.convert_cached_name( hparams.infer_file, hparams.batch_size) cached_name = hparams.infer_file_cache sample_num_path = util.INFER_NUM impression_id_path = util.INFER_IMPRESSION_ID else: raise ValueError("flag must be train, eval, test, infer") print('cache filename:', filename) #如果没有record文件,则开始利用cache_obj转换 if not os.path.isfile(cached_name): print('has not cached file, begin cached...') start_time = time.time() sample_num, impression_id_list = cache_obj.write_tfrecord( filename, cached_name, hparams) util.print_time("caced file used time", start_time) print("data sample num:{0}".format(sample_num)) with open(sample_num_path, 'w') as f: f.write(str(sample_num) + '\n') with open(impression_id_path, 'w') as f: for impression_id in impression_id_list: f.write(str(impression_id) + '\n')
def cache_data(hparams, filename, flag): if hparams.data_format == 'ffm': cache_obj = FfmCache() elif hparams.data_format == 'din': cache_obj = DinCache() elif hparams.data_format == 'cccfnet': cache_obj = CCCFNetCache() else: raise ValueError( "data format must be ffm, din, cccfnet, this format not defined {0}".format(hparams.data_format)) if not os.path.exists(util.CACHE_DIR): os.mkdir(util.CACHE_DIR) if flag == 'train': hparams.train_file_cache = util.convert_cached_name(hparams.train_file, hparams.batch_size) cached_name = hparams.train_file_cache sample_num_path = util.TRAIN_NUM impression_id_path = util.TRAIN_IMPRESSION_ID elif flag == 'eval': hparams.eval_file_cache = util.convert_cached_name(hparams.eval_file, hparams.batch_size) cached_name = hparams.eval_file_cache sample_num_path = util.EVAL_NUM impression_id_path = util.EVAL_IMPRESSION_ID elif flag == 'test': hparams.test_file_cache = util.convert_cached_name(hparams.test_file, hparams.batch_size) cached_name = hparams.test_file_cache sample_num_path = util.TEST_NUM impression_id_path = util.TEST_IMPRESSION_ID elif flag == 'infer': hparams.infer_file_cache = util.convert_cached_name(hparams.infer_file, hparams.batch_size) cached_name = hparams.infer_file_cache sample_num_path = util.INFER_NUM impression_id_path = util.INFER_IMPRESSION_ID else: raise ValueError("flag must be train, eval, test, infer") print('cache filename:', filename) if not os.path.isfile(cached_name): print('has not cached file, begin cached...') start_time = time.time() sample_num, impression_id_list = cache_obj.write_tfrecord(filename, cached_name, hparams) util.print_time("caced file used time", start_time) print("data sample num:{0}".format(sample_num)) with open(sample_num_path, 'w') as f: f.write(str(sample_num) + '\n') with open(impression_id_path, 'w') as f: for impression_id in impression_id_list: f.write(str(impression_id) + '\n')
def cache_data(hparams, filename, flag): if hparams.data_format == 'ffm': cache_obj = FfmCache() else: raise ValueError( "data format must be ffm, this format not defined {0}".format( hparams.data_format)) if not os.path.exists(util.CACHE_DIR): os.mkdir(util.CACHE_DIR) if flag == 'train': hparams.train_file_cache = util.convert_cached_name( hparams.train_file, hparams.batch_size) cached_name = hparams.train_file_cache sample_num_path = util.TRAIN_NUM elif flag == 'eval': hparams.eval_file_cache = util.convert_cached_name( hparams.eval_file, hparams.batch_size) cached_name = hparams.eval_file_cache sample_num_path = util.EVAL_NUM elif flag == 'test': hparams.test_file_cache = util.convert_cached_name( hparams.test_file, hparams.batch_size) cached_name = hparams.test_file_cache sample_num_path = util.TEST_NUM elif flag == 'infer': hparams.infer_file_cache = util.convert_cached_name( hparams.infer_file, hparams.batch_size) cached_name = hparams.infer_file_cache sample_num_path = util.INFER_NUM else: raise ValueError("flag must be train, eval, test, infer") print('cache filename:', filename) if not os.path.isfile(cached_name): print('has not cached file, begin to cache') start_time = time.time() sample_num = cache_obj.write_tfrecord(filename, cached_name, hparams)[0] util.print_time("cached as {} used time".format(cached_name), start_time) print("data sample num:{0}".format(sample_num)) with open(sample_num_path, 'w') as f: f.write(str(sample_num) + '\n')