def ch_test_get_batch(title_len=52, content_len=300, batch_size=128): eval_title = np.load('../data/ch_eval_title.npy') eval_content = np.load('../data/ch_eval_content.npy') p = Pool() X_title = np.asarray(p.map(pad_X52, eval_title)) X_content = np.asarray(p.map(pad_X300, eval_content)) p.close() p.join() X = np.hstack([X_title, X_content]) sample_num = len(X) print('eval_sample_num=%d' % sample_num) eval_batch(X, ch_test_path, batch_size)
def wd_test_get_batch(title_len=30, batch_size=128): print('loading word eval_title and eval_content.') eval_title = np.load('../data/wd_eval_title.npy') eval_content = np.load('../data/wd_eval_content.npy') p = Pool(6) X_title = np.asarray(p.map(pad_X30, eval_title)) X_content = np.asarray(p.map(wd_pad_cut_docs, eval_content)) p.close() p.join() X_content.shape = [-1, 30 * 10] X = np.hstack([X_title, X_content]) sample_num = len(X) print('eval_sample_num=%d' % sample_num) eval_batch(X, wd_test_path, batch_size)
def ch_test_get_batch(title_len=52, batch_size=128): print('loading char eval_title and eval_content.') eval_title = np.load('../data/ch_eval_title.npy') eval_content = np.load('../data/ch_eval_content.npy') p = Pool() X_title = np.asarray(p.map(pad_X52, eval_title)) X_content = np.asarray(p.map(ch_pad_cut_docs, eval_content)) p.close() p.join() X_content.shape = [-1, 52 * 10] X = np.hstack([X_title, X_content]) sample_num = len(X) print('eval_sample_num=%d' % sample_num) eval_batch(X, ch_test_path, batch_size)
def wd_test_get_batch(title_len=30, batch_size=128): print('loading word eval_title and eval_content.') eval_title = np.load('../data/wd_eval_title.npy') eval_content = np.load('../data/wd_eval_content.npy') p = Pool(6) X_title = np.asarray(p.map(pad_X30, eval_title)) X_content = np.asarray(p.map(wd_pad_cut_docs, eval_content)) p.close() p.join() X_content.shape = [-1, 30*10] X = np.hstack([X_title, X_content]) sample_num = len(X) print('eval_sample_num=%d' % sample_num) eval_batch(X, wd_test_path, batch_size)
def ch_test_get_batch(title_len=52, batch_size=128): print('loading char eval_title and eval_content.') eval_title = np.load('../data/ch_eval_title.npy') eval_content = np.load('../data/ch_eval_content.npy') p = Pool() X_title = np.asarray(p.map(pad_X52, eval_title)) X_content = np.asarray(p.map(ch_pad_cut_docs, eval_content)) p.close() p.join() X_content.shape = [-1, 52*10] X = np.hstack([X_title, X_content]) sample_num = len(X) print('eval_sample_num=%d' % sample_num) eval_batch(X, ch_test_path, batch_size)
def wd_test_get_batch(title_len=30, content_len=150, batch_size=128): print('loading eval data...') title = np.load('../data/ch_eval_title.npy') content = np.load('../data/ch_eval_content.npy') print( 'transfer the eval data to fixed title_length 30 and content_length 150...' ) p = Pool() title = np.asarray(p.map(pad_X30, title)) content = np.asarray(p.map(pad_X150, content)) p.close() p.join() X = np.hstack([title, content]) print('OK!transfered! titles and content are stacked horizontally') print('test sample_num=', len(X)) eval_batch(X, wd_test_path, batch_size) print('release space, deleting X') del X gc.collect()
def wd_test_get_batch(title_len=20, content_len=100, batch_size=128): eval_title = np.load('../data_new/wd_test_title.npy') eval_content = np.load('../data_new/wd_test_content.npy') print(" raw titles:", eval_title[0], 'title.shape:', eval_title.shape) print(" raw contents:", eval_content[0], 'contents.shape:', eval_content.shape) p = Pool() X_title = np.asarray(p.map(pad_X20, eval_title)) X_content = np.asarray(p.map(pad_X100, eval_content)) p.close() p.join() print("padding 20 X_title:", X_title[0], "shape:", X_title.shape) print("padding 100 X_contents:", X_content[0], "shape:", X_content.shape) X = np.hstack([X_title, X_content]) print("X.shape:", X.shape) sample_num = len(X) print('eval_sample_num=%d' % sample_num) eval_batch(X, wd_test_path, batch_size)
def thulac_test_get_batch(batch_size=config.BATCH_SIZE): test_path = thulac_test_path + 'batch/' X = np.load(thulac_test_path + 'test_data_thulac.npy') sample_num = len(X) print('eval_sample_num=%d' % sample_num) eval_batch(X, test_path, batch_size)