def get_data_loader(args): data_loader_options = { 'model_type': 'translation', 'source_file': args.source_file, 'target_file': args.target_file, 'bucket_quant': args.bucket_quant, } dl = data_loader.Data_Loader(data_loader_options) return dl
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning Rate') parser.add_argument('--batch_size', type=int, default=1, help='Learning Rate') parser.add_argument('--sample_every', type=int, default=500, help='Sample generator output evry x steps') parser.add_argument('--summary_every', type=int, default=50, help='Sample generator output evry x steps') parser.add_argument('--save_model_every', type=int, default=1500, help='Save model every') parser.add_argument('--sample_size', type=int, default=300, help='Sampled output size') parser.add_argument('--top_k', type=int, default=5, help='Sample from top k predictions') parser.add_argument('--max_epochs', type=int, default=1000, help='Max Epochs') parser.add_argument('--beta1', type=float, default=0.5, help='Momentum for Adam Update') parser.add_argument('--resume_model', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument('--text_dir', type=str, default='Data/generator_training_data', help='Directory containing text files') parser.add_argument('--data_dir', type=str, default='Data', help='Data Directory') parser.add_argument('--seed', type=str, default='All', help='Seed for text generation') args = parser.parse_args() # model_config = json.loads( open('model_config.json').read() ) config = model_config.predictor_config dl = data_loader.Data_Loader({ 'model_type': 'generator', 'dir_name': args.text_dir }) text_samples, vocab = dl.load_generator_data(config['sample_size']) print text_samples.shape model_options = { 'vocab_size': len(vocab), 'residual_channels': config['residual_channels'], 'dilations': config['dilations'], 'filter_width': config['filter_width'], } generator_model = generator.ByteNet_Generator(model_options) generator_model.build_model() optim = tf.train.AdamOptimizer( args.learning_rate, beta1=args.beta1).minimize(generator_model.loss) generator_model.build_generator(reuse=True) merged_summary = tf.summary.merge_all() sess = tf.InteractiveSession() tf.initialize_all_variables().run() saver = tf.train.Saver() if args.resume_model: saver.restore(sess, args.resume_model) shutil.rmtree('Data/tb_summaries/generator_model') train_writer = tf.summary.FileWriter('Data/tb_summaries/generator_model', sess.graph) step = 0 for epoch in range(args.max_epochs): batch_no = 0 batch_size = args.batch_size while (batch_no + 1) * batch_size < text_samples.shape[0]: start = time.clock() text_batch = text_samples[batch_no * batch_size:(batch_no + 1) * batch_size, :] _, loss, prediction = sess.run([ optim, generator_model.loss, generator_model.arg_max_prediction ], feed_dict={ generator_model.t_sentence: text_batch }) end = time.clock() print "-------------------------------------------------------" print "LOSS: {}\tEPOCH: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".format( loss, epoch, batch_no, step, text_samples.shape[0] / args.batch_size) print "TIME FOR BATCH", end - start print "TIME FOR EPOCH (mins)", (end - start) * ( text_samples.shape[0] / args.batch_size) / 60.0 batch_no += 1 step += 1 if step % args.summary_every == 0: [summary] = sess.run( [merged_summary], feed_dict={generator_model.t_sentence: text_batch}) train_writer.add_summary(summary, step) print dl.inidices_to_string(prediction, vocab) print "********************************************************" if step % args.sample_every == 0: seed_sentence = np.array( [dl.string_to_indices(args.seed, vocab)], dtype='int32') for col in range(args.sample_size): [probs] = sess.run([generator_model.g_probs], feed_dict={ generator_model.seed_sentence: seed_sentence }) curr_preds = [] for bi in range(probs.shape[0]): pred_word = utils.sample_top(probs[bi][-1], top_k=args.top_k) curr_preds.append(pred_word) seed_sentence = np.insert(seed_sentence, seed_sentence.shape[1], curr_preds, axis=1) print col, dl.inidices_to_string(seed_sentence[0], vocab) f = open('Data/generator_sample.txt', 'wb') f.write(dl.inidices_to_string(seed_sentence[0], vocab)) f.close() if step % args.save_model_every == 0: save_path = saver.save( sess, "Data/Models/generation_model/model_epoch_{}_{}.ckpt". format(epoch, step))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning Rate') parser.add_argument('--batch_size', type=int, default=1, help='Learning Rate') parser.add_argument('--max_epochs', type=int, default=1000, help='Max Epochs') parser.add_argument('--beta1', type=float, default=0.5, help='Momentum for Adam Update') parser.add_argument('--resume_model', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument('--data_dir', type=str, default='Data', help='Data Directory') args = parser.parse_args() # model_config = json.loads( open('model_config.json').read() ) config = model_config.predictor_config model_options = { 'n_source_quant': config['n_source_quant'], 'n_target_quant': config['n_target_quant'], 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'sample_size': config['sample_size'], 'decoder_filter_width': config['decoder_filter_width'], 'batch_size': args.batch_size, } byte_net = model.Byte_net_model(model_options) bn_tensors = byte_net.build_prediction_model() optim = tf.train.AdamOptimizer(args.learning_rate, beta1=args.beta1).minimize( bn_tensors['loss'], var_list=bn_tensors['variables']) sess = tf.InteractiveSession() tf.initialize_all_variables().run() saver = tf.train.Saver() if args.resume_model: saver.restore(sess, args.resume_model) dl = data_loader.Data_Loader({ 'model_type': 'generator', 'dir_name': args.data_dir }) text_samples = dl.load_generator_data(config['sample_size']) print text_samples.shape for i in range(args.max_epochs): batch_no = 0 batch_size = args.batch_size while (batch_no + 1) * batch_size < text_samples.shape[0]: text_batch = text_samples[batch_no * batch_size:(batch_no + 1) * batch_size, :] _, loss, prediction = sess.run( [optim, bn_tensors['loss'], bn_tensors['prediction']], feed_dict={bn_tensors['sentence']: text_batch}) print "-------------------------------------------------------" print utils.list_to_string(prediction) print "Loss", i, batch_no, loss print "********************************************************" # print prediction batch_no += 1 if (batch_no % 500) == 0: save_path = saver.save( sess, "Data/Models/model_epoch_{}.ckpt".format(i))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning Rate') parser.add_argument('--batch_size', type=int, default=1, help='Learning Rate') parser.add_argument('--max_epochs', type=int, default=1000, help='Max Epochs') parser.add_argument('--beta1', type=float, default=0.5, help='Momentum for Adam Update') parser.add_argument('--resume_model', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument('--data_dir', type=str, default='Data', help='Data Directory') parser.add_argument('--log_dir', type=str, default='logs', help='Path to TensorBoard logs') args = parser.parse_args() # model_config = json.loads( open('model_config.json').read() ) config = model_config.predictor_config model_options = { 'n_source_quant': config['n_source_quant'], 'n_target_quant': config['n_target_quant'], 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'sample_size': config['sample_size'], 'decoder_filter_width': config['decoder_filter_width'], 'batch_size': args.batch_size, } byte_net = model.Byte_net_model(model_options) bn_tensors = byte_net.build_prediction_model() # Set up logging for TensorBoard writer = tf.summary.FileWriter(args.log_dir, graph=tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() optim = tf.train.AdamOptimizer(args.learning_rate, beta1 = args.beta1) \ .minimize(bn_tensors['loss'], var_list=bn_tensors['variables']) sess = tf.InteractiveSession() tf.global_variables_initializer().run() saver = tf.train.Saver() if args.resume_model: saver.restore(sess, args.resume_model) dl = data_loader.Data_Loader({ 'model_type': 'generator', 'dir_name': args.data_dir }) text_samples = dl.load_generator_data(config['sample_size']) print(text_samples.shape) models_path = "Data/Models/" if not os.path.exists(models_path): os.makedirs(models_path) for epoch in range(args.max_epochs): step = 0 batch_size = args.batch_size while (step + 1) * batch_size < text_samples.shape[0]: text_batch = text_samples[step * batch_size:(step + 1) * batch_size, :] _, summary, loss, prediction = sess.run([ optim, summaries, bn_tensors['loss'], bn_tensors['prediction'] ], feed_dict={ bn_tensors['sentence']: text_batch }) print("-------------------------------------------------------") print(utils.list_to_string(prediction)) print("Epoch", epoch, " Step", step, " Loss", loss) print("********************************************************") writer.add_summary(summary, step) writer.add_run_metadata( run_metadata, 'epoch_{:04d}, step_{:04d}'.format(epoch, step)) step += 1 if step % 500 == 0: saver.save(sess, models_path + "model_epoch_{}.ckpt".format(epoch))
help='choose which split of data to use ' '(`train` or `valid`)') parser.add_argument('--num_layers', type=int, default=15, help='num of layers') args = parser.parse_args() model_path = args.model_path data_loader_options = { 'model_type': 'translation', 'source_file': args.source_file, 'target_file': args.target_file, 'bucket_quant': args.bucket_quant, } dl = data_loader.Data_Loader(data_loader_options, split=args.split, vocab=None) buckets, source_vocab, target_vocab = dl.load_translation_data() config = model_config.translator_config model_options = { 'source_vocab_size': len(source_vocab), 'target_vocab_size': len(target_vocab), 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'encoder_dilations': config['encoder_dilations'], 'decoder_filter_width': config['decoder_filter_width'], 'encoder_filter_width': config['encoder_filter_width'], 'layer_norm': config['layer_norm'] } translator_model = translator.ByteNet_Translator(model_options)
def main(): args, config = get_args_and_config() source_sentence = None with open('Data/MachineTranslation/news-commentary-v11.de-en.de') as f: source_sentences = f.read().decode("utf-8").split('\n') with open('Data/MachineTranslation/news-commentary-v11.de-en.en') as f: target_sentences = f.read().decode("utf-8").split('\n') idx = 0 for i in range(len(source_sentences)): if 'NEW YORK' in target_sentences[i][0:40]: print(target_sentences[i]) idx = i break source_sentences = source_sentences[idx:idx + 1] target_sentences = target_sentences[idx:idx + 1] print(source_sentences) print(target_sentences) data_loader_options = { 'model_type': 'translation', 'source_file': 'Data/MachineTranslation/news-commentary-v11.de-en.de', 'target_file': 'Data/MachineTranslation/news-commentary-v11.de-en.en', 'bucket_quant': 25, } dl = data_loader.Data_Loader(data_loader_options) # buckets, source_vocab, target_vocab, frequent_keys = dl.load_translation_data() source, target = prepare_source_target_arrays(args, dl, source_sentences) model_options = { 'n_source_quant': len(dl.source_vocab), 'n_target_quant': len(dl.target_vocab), 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'encoder_dilations': config['encoder_dilations'], 'sample_size': 10, 'decoder_filter_width': config['decoder_filter_width'], 'encoder_filter_width': config['encoder_filter_width'], 'batch_size': 1, 'source_mask_chars': [dl.source_vocab['padding']], 'target_mask_chars': [dl.target_vocab['padding']] } byte_net = model.Byte_net_model(model_options) translator = byte_net.build_translation_model(args.translator_max_length) sess = tf.InteractiveSession() saver = tf.train.Saver() saver.restore(sess, args.model_path) input_batch = target print("INPUT", input_batch) print("Source", source) for i in range(0, 1000): prediction, probs = sess.run( [translator['prediction'], translator['probs']], feed_dict={ translator['source_sentence']: source, translator['target_sentence']: input_batch, }) # prediction = prediction[0] last_prediction = np.array([utils.weighted_pick(probs[i])]) last_prediction = last_prediction.reshape([1, -1]) input_batch[:, i + 1] = last_prediction[:, 0] res = dl.inidices_to_string(input_batch[0], dl.target_vocab) print("RES") print(res)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--top_k', type=int, default=5, help='sample from top k predictions') parser.add_argument('--beta1', type=float, default=0.9, help='hyperpara-Adam') parser.add_argument('--datapath', type=str, default="Data/coldrec/rec50_pretrain.csv", help='data path') parser.add_argument('--save_dir', type=str, default="Models/coldrec_baseline_4_emb64_bs256", help='save dir path') parser.add_argument('--eval_iter', type=int, default=1000, help='sample generator output evry x steps') parser.add_argument('--early_stop', type=int, default=10, help='after x step early stop') parser.add_argument('--step', type=int, default=400000, help='trainging step') parser.add_argument('--tt_percentage', type=float, default=0.2, help='0.2 means 80% training 20% testing') parser.add_argument('--data_ratio', type=float, default=1, help='real trainging data') parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate') parser.add_argument('--L2', type=float, default=0.001, help='L2 regularization') parser.add_argument('--dilation_count', type=int, default=4, help='dilation count number') parser.add_argument('--method', type=str, default="from_scratch", help='from_scratch, random_init, stack') parser.add_argument('--load_model', type=ast.literal_eval, default=False, help='whether loading pretrain model') parser.add_argument('--copy_softmax', type=ast.literal_eval, default=True, help='whether copying softmax param') parser.add_argument('--copy_layernorm', type=ast.literal_eval, default=True, help='whether copying layernorm param') parser.add_argument('--model_path', type=str, default="Models/", help='load model path') parser.add_argument('--padid', type=int, default=0, help='pad id') args = parser.parse_args() print(args) dl = data_loader.Data_Loader({ 'dir_name': args.datapath, 'padid': args.padid }) all_samples = dl.item print(all_samples.shape) items = dl.item_dict print("len(items)", len(items)) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(all_samples))) all_samples = all_samples[shuffle_indices] # Split train/test set dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) train_set, valid_set = all_samples[:dev_sample_index], all_samples[ dev_sample_index:] random.seed(10) ratio = args.data_ratio train_set_len = len(train_set) train_index_set = set(list(range(train_set_len))) if ratio == 0.2: train_ratio = int(ratio * float(train_set_len)) real_train_index_set = random.sample(list(train_index_set), train_ratio) real_train_set = train_set[real_train_index_set] train_set = np.array(real_train_set) print("real train len", len(train_set)) elif ratio == 0.4: last_ratio = ratio - 0.2 last_train_ratio = int(last_ratio * float(train_set_len)) last_train_index_set = random.sample(list(train_index_set), last_train_ratio) last_train_set = train_set[last_train_index_set] remain_train_index_set = train_index_set - set(last_train_index_set) remain_len = len(remain_train_index_set) new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) new_train_set = train_set[new_train_index_set] real_train_set = np.concatenate((last_train_set, new_train_set), axis=0) train_set = np.array(real_train_set) print("real train len", len(train_set)) elif ratio == 0.6: last_last_ratio = ratio - 0.2 - 0.2 last_last_train_ratio = int(last_last_ratio * float(train_set_len)) last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) last_last_train_set = train_set[last_last_train_index_set] remain_train_index_set = train_index_set - set( last_last_train_index_set) remain_len = len(remain_train_index_set) last_train_index_set = random.sample( list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) last_train_set = train_set[last_train_index_set] real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) remain_train_index_set = remain_train_index_set - set( last_train_index_set) remain_len = len(remain_train_index_set) new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) new_train_set = train_set[new_train_index_set] real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) train_set = np.array(real_train_set) print("real train len", len(train_set)) elif ratio == 0.8: last_last_ratio = ratio - 0.2 - 0.2 - 0.2 last_last_train_ratio = int(last_last_ratio * float(train_set_len)) last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) last_last_train_set = train_set[last_last_train_index_set] remain_train_index_set = train_index_set - set( last_last_train_index_set) remain_len = len(remain_train_index_set) last_train_index_set = random.sample( list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) last_train_set = train_set[last_train_index_set] real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) remain_train_index_set = remain_train_index_set - set( last_train_index_set) remain_len = len(remain_train_index_set) new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) new_train_set = train_set[new_train_index_set] real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) remain_train_index_set = remain_train_index_set - set( new_train_index_set) remain_len = len(remain_train_index_set) new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 2.0 * float(remain_len))) new_train_set = train_set[new_train_index_set] real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) train_set = np.array(real_train_set) print("real train len", len(train_set)) elif ratio == 1: train_set = np.array(train_set) print("real train len", len(train_set)) else: train_ratio = int(ratio * float(train_set_len)) real_train_index_set = random.sample(list(train_index_set), train_ratio) real_train_set = train_set[real_train_index_set] train_set = np.array(real_train_set) print("real train len", len(train_set)) model_para = { 'item_size': len(items), 'dilated_channels': 64, 'dilations': [1, 4] * args.dilation_count, 'step': args.step, 'kernel_size': 3, 'learning_rate': args.learning_rate, 'L2': args.L2, 'batch_size': 256, 'load_model': args.load_model, 'model_path': args.model_path, 'copy_softmax': args.copy_softmax, 'copy_layernorm': args.copy_layernorm, 'method': args.method } print(model_para) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) itemrec = generator_deep.NextItNet_Decoder(model_para) itemrec.train_graph() optimizer = tf.train.AdamOptimizer(model_para['learning_rate'], beta1=args.beta1).minimize(itemrec.loss) itemrec.predict_graph(reuse=True) tf.add_to_collection("dilate_input", itemrec.dilate_input) tf.add_to_collection("context_embedding", itemrec.context_embedding) sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver(max_to_keep=1) #writer=tf.summary.FileWriter('./stack_graph',sess.graph) numIters = 1 max_mrr = 0 break_stick = 0 early_stop = 0 while (1): if break_stick == 1: break batch_no = 0 batch_size = model_para['batch_size'] while (batch_no + 1) * batch_size < train_set.shape[0]: start = time.time() item_batch = train_set[batch_no * batch_size:(batch_no + 1) * batch_size, :] _, loss = sess.run([optimizer, itemrec.loss], feed_dict={itemrec.itemseq_input: item_batch}) end = time.time() if numIters % args.eval_iter == 0: print( "-------------------------------------------------------train" ) print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}". format(loss, batch_no, numIters, train_set.shape[0] / batch_size)) print("TIME FOR BATCH", end - start) print("TIME FOR EPOCH (mins)", (end - start) * (train_set.shape[0] / batch_size) / 60.0) batch_no += 1 if numIters % args.eval_iter == 0: print( "-------------------------------------------------------test" ) batch_no_test = 0 batch_size_test = batch_size * 1 curr_preds_5 = [] rec_preds_5 = [] ndcg_preds_5 = [] curr_preds_10 = [] rec_preds_10 = [] ndcg_preds_10 = [] while (batch_no_test + 1) * batch_size_test < valid_set.shape[0]: item_batch = valid_set[batch_no_test * batch_size_test:(batch_no_test + 1) * batch_size_test, :] [probs_10, probs_5] = sess.run( [itemrec.top_10, itemrec.top_5], feed_dict={itemrec.input_predict: item_batch}) #print(probs_10[1].shape) #(256,1,10) for bi in range(batch_size_test): pred_items_10 = probs_10[1][bi][-1] pred_items_5 = probs_5[1][bi][-1] true_item = item_batch[bi][-1] predictmap_5 = { ch: i for i, ch in enumerate(pred_items_5) } pred_items_10 = { ch: i for i, ch in enumerate(pred_items_10) } rank_5 = predictmap_5.get(true_item) rank_10 = pred_items_10.get(true_item) if rank_5 == None: curr_preds_5.append(0.0) rec_preds_5.append(0.0) ndcg_preds_5.append(0.0) else: MRR_5 = 1.0 / (rank_5 + 1) Rec_5 = 1.0 ndcg_5 = 1.0 / math.log(rank_5 + 2, 2) curr_preds_5.append(MRR_5) rec_preds_5.append(Rec_5) ndcg_preds_5.append(ndcg_5) if rank_10 == None: curr_preds_10.append(0.0) rec_preds_10.append(0.0) ndcg_preds_10.append(0.0) else: MRR_10 = 1.0 / (rank_10 + 1) Rec_10 = 1.0 ndcg_10 = 1.0 / math.log(rank_10 + 2, 2) curr_preds_10.append(MRR_10) rec_preds_10.append(Rec_10) ndcg_preds_10.append(ndcg_10) batch_no_test += 1 mrr = sum(curr_preds_5) / float(len(curr_preds_5)) mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10)) hit = sum(rec_preds_5) / float(len(rec_preds_5)) hit_10 = sum(rec_preds_10) / float(len(rec_preds_10)) ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5)) ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10)) if mrr > max_mrr: max_mrr = mrr print("Save model! mrr_5:", mrr) print("Save model! mrr_10:", mrr_10) print("Save model! hit_5:", hit) print("Save model! hit_10:", hit_10) print("Save model! ndcg_5:", ndcg) print("Save model! ndcg_10:", ndcg_10) early_stop = 0 saver.save( sess, args.save_dir + "/{}_{}_{}_{}.ckpt".format( args.dilation_count, args.learning_rate, args.data_ratio, args.step)) else: print("mrr_5:", mrr) print("mrr_10:", mrr_10) print("hit_5:", hit) print("hit_10:", hit_10) print("ndcg_5:", ndcg) print("ndcg_10:", ndcg_10) early_stop += 1 if numIters >= model_para['step']: break_stick = 1 break if early_stop >= args.early_stop: break_stick = 1 print("early stop!") break numIters += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning Rate') parser.add_argument('--batch_size', type=int, default=8, help='Learning Rate') parser.add_argument('--bucket_quant', type=int, default=50, help='Learning Rate') parser.add_argument('--max_epochs', type=int, default=1000, help='Max Epochs') parser.add_argument('--beta1', type=float, default=0.5, help='Momentum for Adam Update') parser.add_argument('--resume_model', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument( '--source_file', type=str, default='Data/MachineTranslation/news-commentary-v11.de-en.de', help='Source File') parser.add_argument( '--target_file', type=str, default='Data/MachineTranslation/news-commentary-v11.de-en.en', help='Target File') parser.add_argument('--sample_every', type=int, default=500, help='Sample generator output evry x steps') parser.add_argument('--summary_every', type=int, default=50, help='Sample generator output evry x steps') parser.add_argument('--top_k', type=int, default=5, help='Sample from top k predictions') parser.add_argument('--resume_from_bucket', type=int, default=0, help='Resume From Bucket') args = parser.parse_args() data_loader_options = { 'model_type': 'translation', 'source_file': args.source_file, 'target_file': args.target_file, 'bucket_quant': args.bucket_quant, } dl = data_loader.Data_Loader(data_loader_options) buckets, source_vocab, target_vocab = dl.load_translation_data() print "Number Of Buckets", len(buckets) config = model_config.translator_config model_options = { 'source_vocab_size': len(source_vocab), 'target_vocab_size': len(target_vocab), 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'encoder_dilations': config['encoder_dilations'], 'decoder_filter_width': config['decoder_filter_width'], 'encoder_filter_width': config['encoder_filter_width'], } translator_model = translator.ByteNet_Translator(model_options) translator_model.build_model() optim = tf.train.AdamOptimizer( args.learning_rate, beta1=args.beta1).minimize(translator_model.loss) translator_model.build_translator(reuse=True) merged_summary = tf.summary.merge_all() sess = tf.InteractiveSession() tf.initialize_all_variables().run() saver = tf.train.Saver() if args.resume_model: saver.restore(sess, args.resume_model) shutil.rmtree('Data/tb_summaries/translator_model') train_writer = tf.summary.FileWriter('Data/tb_summaries/translator_model', sess.graph) bucket_sizes = [bucket_size for bucket_size in buckets] bucket_sizes.sort() step = 0 batch_size = args.batch_size for epoch in range(args.max_epochs): for bucket_size in bucket_sizes: if epoch == 0 and bucket_size < args.resume_from_bucket: continue batch_no = 0 while (batch_no + 1) * batch_size < len(buckets[bucket_size]): start = time.clock() source, target = dl.get_batch_from_pairs( buckets[bucket_size][batch_no * batch_size:(batch_no + 1) * batch_size]) _, loss, prediction = sess.run( [ optim, translator_model.loss, translator_model.arg_max_prediction ], feed_dict={ translator_model.source_sentence: source, translator_model.target_sentence: target, }) end = time.clock() print "LOSS: {}\tEPOCH: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}\t bucket_size:{}".format( loss, epoch, batch_no, step, len(buckets[bucket_size]) / args.batch_size, bucket_size) print "TIME FOR BATCH", end - start print "TIME FOR BUCKET (mins)", (end - start) * ( len(buckets[bucket_size]) / args.batch_size) / 60.0 batch_no += 1 step += 1 if step % args.summary_every == 0: [summary] = sess.run( [merged_summary], feed_dict={ translator_model.source_sentence: source, translator_model.target_sentence: target, }) train_writer.add_summary(summary, step) print "******" print "Source ", dl.inidices_to_string( source[0], source_vocab) print "---------" print "Target ", dl.inidices_to_string( target[0], target_vocab) print "----------" print "Prediction ", dl.inidices_to_string( prediction[0:bucket_size], target_vocab) print "******" if step % args.sample_every == 0: log_file = open('Data/translator_sample.txt', 'wb') generated_target = target[:, 0:1] for col in range(bucket_size): [probs] = sess.run( [translator_model.t_probs], feed_dict={ translator_model.t_source_sentence: source, translator_model.t_target_sentence: generated_target, }) curr_preds = [] for bi in range(probs.shape[0]): pred_word = utils.sample_top(probs[bi][-1], top_k=args.top_k) curr_preds.append(pred_word) generated_target = np.insert(generated_target, generated_target.shape[1], curr_preds, axis=1) for bi in range(probs.shape[0]): print col, dl.inidices_to_string( generated_target[bi], target_vocab) print col, dl.inidices_to_string( target[bi], target_vocab) print "***************" if col == bucket_size - 1: try: log_file.write("Predicted: " + dl.inidices_to_string( generated_target[bi], target_vocab) + '\n') log_file.write( "Actual Target: " + dl.inidices_to_string( target[bi], target_vocab) + '\n') log_file.write( "Actual Source: " + dl.inidices_to_string( source[bi], source_vocab) + '\n *******') except: pass print "***************" log_file.close() save_path = saver.save( sess, "Data/Models/translation_model/model_epoch_{}_{}.ckpt".format( epoch, bucket_size))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--bucket_quant', type=int, default=50, help='Learning Rate') parser.add_argument('--model_path', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument('--source_file', type=str, default='Data/MachineTranslation/news-commentary-v11.de-en.de', help='Source File') parser.add_argument('--target_file', type=str, default='Data/MachineTranslation/news-commentary-v11.de-en.en', help='Target File') parser.add_argument('--top_k', type=int, default=5, help='Sample from top k predictions') parser.add_argument('--batch_size', type=int, default=16, help='Batch Size') parser.add_argument('--bucket_size', type=int, default=None, help='Bucket Size') args = parser.parse_args() data_loader_options = { 'model_type' : 'translation', 'source_file' : args.source_file, 'target_file' : args.target_file, 'bucket_quant' : args.bucket_quant, } dl = data_loader.Data_Loader(data_loader_options) buckets, source_vocab, target_vocab = dl.load_translation_data() print "Number Of Buckets", len(buckets) config = model_config.translator_config model_options = { 'source_vocab_size' : len(source_vocab), 'target_vocab_size' : len(target_vocab), 'residual_channels' : config['residual_channels'], 'decoder_dilations' : config['decoder_dilations'], 'encoder_dilations' : config['encoder_dilations'], 'decoder_filter_width' : config['decoder_filter_width'], 'encoder_filter_width' : config['encoder_filter_width'], } translator_model = translator.ByteNet_Translator( model_options ) translator_model.build_translator() sess = tf.InteractiveSession() tf.initialize_all_variables().run() saver = tf.train.Saver() if args.model_path: saver.restore(sess, args.model_path) bucket_sizes = [bucket_size for bucket_size in buckets] bucket_sizes.sort() if not args.bucket_size: bucket_size = random.choice(bucket_sizes) else: bucket_size = args.bucket_size source, target = dl.get_batch_from_pairs( random.sample(buckets[bucket_size], args.batch_size) ) log_file = open('Data/translator_sample.txt', 'wb') generated_target = target[:,0:1] for col in range(bucket_size): [probs] = sess.run([translator_model.t_probs], feed_dict = { translator_model.t_source_sentence : source, translator_model.t_target_sentence : generated_target, }) curr_preds = [] for bi in range(probs.shape[0]): pred_word = utils.sample_top(probs[bi][-1], top_k = args.top_k ) curr_preds.append(pred_word) generated_target = np.insert(generated_target, generated_target.shape[1], curr_preds, axis = 1) for bi in range(probs.shape[0]): print col, dl.inidices_to_string(generated_target[bi], target_vocab) print col, dl.inidices_to_string(target[bi], target_vocab) print "***************" if col == bucket_size - 1: try: log_file.write("Predicted: " + dl.inidices_to_string(generated_target[bi], target_vocab) + '\n') log_file.write("Actual Target: " + dl.inidices_to_string(target[bi], target_vocab) + '\n') log_file.write("Actual Source: " + dl.inidices_to_string(source[bi], source_vocab) + '\n *******') except: pass log_file.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--top_k', type=int, default=5, help='sample from top k predictions') parser.add_argument('--beta1', type=float, default=0.9, help='hyperpara-Adam') parser.add_argument('--datapath', type=str, default="Data/movielen_20/movielen_20.csv", help='data path') parser.add_argument('--save_dir', type=str, default="Models/ml20", help='save dir path') parser.add_argument('--eval_iter', type=int, default=1000, help='sample generator output evry x steps') parser.add_argument('--early_stop', type=int, default=10, help='after x step early stop') parser.add_argument('--step', type=int, default=400000, help='trainging step') parser.add_argument('--tt_percentage', type=float, default=0.2, help='0.2 means 80% training 20% testing') parser.add_argument('--data_ratio', type=float, default=1, help='real trainging data') parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate') parser.add_argument('--L2', type=float, default=0, help='L2 regularization') parser.add_argument('--dilation_count', type=int, default=16, help='dilation count number') parser.add_argument('--method', type=str, default="from_scratch", help='from_scratch, StackR, stackC, stackA') parser.add_argument('--load_model', type=ast.literal_eval, default=False, help='whether loading pretrain model') parser.add_argument('--model_path', type=str, default="Models/", help='load model path') parser.add_argument('--padid', type=int, default=0, help='pad id') parser.add_argument('--masked_lm_prob', type=float, default=0.2, help='0.2 means 20% items are masked') parser.add_argument('--max_predictions_per_seq', type=int, default=50, help='maximum number of masked tokens') parser.add_argument( '--max_position', type=int, default=100, help= 'maximum number of for positional embedding, it has to be larger than the sequence lens' ) parser.add_argument( '--has_positionalembedding', type=bool, default=False, help='whether contains positional embedding before performing cnnn') args = parser.parse_args() print(args) dl = data_loader.Data_Loader({ 'dir_name': args.datapath, 'padid': args.padid }) all_samples = dl.item print(all_samples.shape) items = dl.item_dict itemlist = items.values() item_size = len(items) print("len(items)", item_size) max_predictions_per_seq = args.max_predictions_per_seq masked_lm_prob = args.masked_lm_prob # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(all_samples))) all_samples = all_samples[shuffle_indices] # Split train/test set dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) train_set, valid_set = all_samples[:dev_sample_index], all_samples[ dev_sample_index:] random.seed(10) ratio = args.data_ratio train_set_len = len(train_set) train_index_set = set(list(range(train_set_len))) if ratio == 0.2: train_ratio = int(ratio * float(train_set_len)) real_train_index_set = random.sample(list(train_index_set), train_ratio) real_train_set = train_set[real_train_index_set] train_set = np.array(real_train_set) print("real train len", len(train_set)) elif ratio == 0.4: last_ratio = ratio - 0.2 last_train_ratio = int(last_ratio * float(train_set_len)) last_train_index_set = random.sample(list(train_index_set), last_train_ratio) last_train_set = train_set[last_train_index_set] remain_train_index_set = train_index_set - set(last_train_index_set) remain_len = len(remain_train_index_set) new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) new_train_set = train_set[new_train_index_set] real_train_set = np.concatenate((last_train_set, new_train_set), axis=0) train_set = np.array(real_train_set) print("real train len", len(train_set)) elif ratio == 0.6: last_last_ratio = ratio - 0.2 - 0.2 last_last_train_ratio = int(last_last_ratio * float(train_set_len)) last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) last_last_train_set = train_set[last_last_train_index_set] remain_train_index_set = train_index_set - set( last_last_train_index_set) remain_len = len(remain_train_index_set) last_train_index_set = random.sample( list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) last_train_set = train_set[last_train_index_set] real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) remain_train_index_set = remain_train_index_set - set( last_train_index_set) remain_len = len(remain_train_index_set) new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) new_train_set = train_set[new_train_index_set] real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) train_set = np.array(real_train_set) print("real train len", len(train_set)) elif ratio == 0.8: last_last_ratio = ratio - 0.2 - 0.2 - 0.2 last_last_train_ratio = int(last_last_ratio * float(train_set_len)) last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) last_last_train_set = train_set[last_last_train_index_set] remain_train_index_set = train_index_set - set( last_last_train_index_set) remain_len = len(remain_train_index_set) last_train_index_set = random.sample( list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) last_train_set = train_set[last_train_index_set] real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) remain_train_index_set = remain_train_index_set - set( last_train_index_set) remain_len = len(remain_train_index_set) new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) new_train_set = train_set[new_train_index_set] real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) remain_train_index_set = remain_train_index_set - set( new_train_index_set) remain_len = len(remain_train_index_set) new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 2.0 * float(remain_len))) new_train_set = train_set[new_train_index_set] real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) train_set = np.array(real_train_set) print("real train len", len(train_set)) elif ratio == 1: train_set = np.array(train_set) print("real train len", len(train_set)) else: train_ratio = int(ratio * float(train_set_len)) real_train_index_set = random.sample(list(train_index_set), train_ratio) real_train_set = train_set[real_train_index_set] train_set = np.array(real_train_set) print("real train len", len(train_set)) model_para = { 'item_size': len(items), 'dilated_channels': 64, 'dilations': [1, 4] * args.dilation_count, 'step': args.step, 'kernel_size': 3, 'learning_rate': args.learning_rate, 'L2': args.L2, 'batch_size': 1024, 'load_model': args.load_model, 'model_path': args.model_path, 'method': args.method, 'max_position': args.max_position, # maximum number of for positional embedding, it has to be larger than the sequence lens 'has_positionalembedding': args.has_positionalembedding } print(model_para) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) itemrec = generator_deep.NextItNet_Decoder(model_para) itemrec.train_graph() optimizer = tf.train.AdamOptimizer(model_para['learning_rate'], beta1=args.beta1).minimize(itemrec.loss) itemrec.predict_graph(reuse=True) gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) saver = tf.train.Saver(max_to_keep=1) init = tf.global_variables_initializer() sess.run(init) # writer=tf.summary.FileWriter('./stack_graph',sess.graph) numIters = 1 max_mrr = 0 break_stick = 0 early_stop = 0 while (1): if break_stick == 1: break batch_no = 0 batch_size = model_para['batch_size'] while (batch_no + 1) * batch_size < train_set.shape[0]: start = time.time() item_batch = train_set[batch_no * batch_size:(batch_no + 1) * batch_size, :] output_tokens_batch, maskedpositions_batch, maskedlabels_batch, masked_lm_weights_batch = create_masked_lm_predictions_frombatch( item_batch, masked_lm_prob, max_predictions_per_seq, items=itemlist, rng=None, item_size=item_size) _, loss = sess.run( [optimizer, itemrec.loss], feed_dict={ itemrec.itemseq_output: item_batch[:, 1:], # 2 3 4 5 6 7 8 9 itemrec.itemseq_input: output_tokens_batch, # 1 2 0 4 5 0 7 8 9 itemrec.masked_position: maskedpositions_batch, # [1 4] itemrec.masked_items: maskedlabels_batch, # [3,6] itemrec.label_weights: masked_lm_weights_batch # [1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0] #useless }) end = time.time() if numIters % args.eval_iter == 0: print( "-------------------------------------------------------train" ) print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}". format(loss, batch_no, numIters, train_set.shape[0] / batch_size)) print("TIME FOR BATCH", end - start) print("TIME FOR EPOCH (mins)", (end - start) * (train_set.shape[0] / batch_size) / 60.0) batch_no += 1 if numIters % args.eval_iter == 0: print( "-------------------------------------------------------test" ) batch_no_test = 0 batch_size_test = batch_size * 1 curr_preds_5 = [] rec_preds_5 = [] ndcg_preds_5 = [] curr_preds_10 = [] rec_preds_10 = [] ndcg_preds_10 = [] while (batch_no_test + 1) * batch_size_test < valid_set.shape[0]: item_batch = valid_set[batch_no_test * batch_size_test:(batch_no_test + 1) * batch_size_test, :] [probs_10, probs_5] = sess.run( [itemrec.top_10, itemrec.top_5], feed_dict={itemrec.itemseq_input: item_batch[:, 0:-1]}) # print(probs_10[1].shape) #(256,1,10) for bi in range(batch_size_test): pred_items_10 = probs_10[1][bi][-1] pred_items_5 = probs_5[1][bi][-1] true_item = item_batch[bi][-1] predictmap_5 = { ch: i for i, ch in enumerate(pred_items_5) } pred_items_10 = { ch: i for i, ch in enumerate(pred_items_10) } rank_5 = predictmap_5.get(true_item) rank_10 = pred_items_10.get(true_item) if rank_5 == None: curr_preds_5.append(0.0) rec_preds_5.append(0.0) ndcg_preds_5.append(0.0) else: MRR_5 = 1.0 / (rank_5 + 1) Rec_5 = 1.0 ndcg_5 = 1.0 / math.log(rank_5 + 2, 2) curr_preds_5.append(MRR_5) rec_preds_5.append(Rec_5) ndcg_preds_5.append(ndcg_5) if rank_10 == None: curr_preds_10.append(0.0) rec_preds_10.append(0.0) ndcg_preds_10.append(0.0) else: MRR_10 = 1.0 / (rank_10 + 1) Rec_10 = 1.0 ndcg_10 = 1.0 / math.log(rank_10 + 2, 2) curr_preds_10.append(MRR_10) rec_preds_10.append(Rec_10) ndcg_preds_10.append(ndcg_10) batch_no_test += 1 mrr = sum(curr_preds_5) / float(len(curr_preds_5)) mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10)) hit = sum(rec_preds_5) / float(len(rec_preds_5)) hit_10 = sum(rec_preds_10) / float(len(rec_preds_10)) ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5)) ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10)) if mrr > max_mrr: max_mrr = mrr print("Save model! mrr_5:", mrr) print("Save model! mrr_10:", mrr_10) print("Save model! hit_5:", hit) print("Save model! hit_10:", hit_10) print("Save model! ndcg_5:", ndcg) print("Save model! ndcg_10:", ndcg_10) early_stop = 0 saver.save( sess, args.save_dir + "/{}_{}_{}_{}.ckpt".format( args.dilation_count, args.learning_rate, args.data_ratio, args.step)) else: print("mrr_5:", mrr) print("mrr_10:", mrr_10) print("hit_5:", hit) print("hit_10:", hit_10) print("ndcg_5:", ndcg) print("ndcg_10:", ndcg_10) early_stop += 1 if numIters >= model_para['step']: break_stick = 1 break if early_stop >= args.early_stop: break_stick = 1 print("early stop!") break numIters += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning Rate') parser.add_argument('--batch_size', type=int, default=16, help='Learning Rate') parser.add_argument('--bucket_quant', type=int, default=25, help='Learning Rate') parser.add_argument('--max_epochs', type=int, default=1000, help='Max Epochs') parser.add_argument('--beta1', type=float, default=0.5, help='Momentum for Adam Update') parser.add_argument('--resume_model', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument( '--source_file', type=str, default='Data/MachineTranslation/news-commentary-v11.de-en.de', help='Source File') parser.add_argument( '--target_file', type=str, default='Data/MachineTranslation/news-commentary-v11.de-en.en', help='Target File') args = parser.parse_args() data_loader_options = { 'model_type': 'translation', 'source_file': args.source_file, 'target_file': args.target_file, 'bucket_quant': args.bucket_quant, } dl = data_loader.Data_Loader(data_loader_options) buckets, source_vocab, target_vocab, frequent_keys = dl.load_translation_data( ) config = model_config.translator_config model_options = { 'n_source_quant': len(source_vocab), 'n_target_quant': len(target_vocab), 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'encoder_dilations': config['encoder_dilations'], 'sample_size': 10, 'decoder_filter_width': config['decoder_filter_width'], 'encoder_filter_width': config['encoder_filter_width'], 'batch_size': args.batch_size, 'source_mask_chars': [source_vocab['padding']], 'target_mask_chars': [target_vocab['padding']] } last_saved_model_path = None if args.resume_model: last_saved_model_path = args.resume_model print "Number Of Buckets", len(buckets) for i in range(1, args.max_epochs): cnt = 0 for _, key in frequent_keys: cnt += 1 print "KEY", cnt, key if key > 400: continue if len(buckets[key]) < args.batch_size: print "BUCKET TOO SMALL", key continue sess = tf.InteractiveSession() batch_no = 0 batch_size = args.batch_size byte_net = model.Byte_net_model(model_options) bn_tensors = byte_net.build_translation_model(sample_size=key) adam = tf.train.AdamOptimizer(args.learning_rate, beta1=args.beta1) optim = adam.minimize(bn_tensors['loss'], var_list=bn_tensors['variables']) train_writer = tf.train.SummaryWriter('logs/', sess.graph) tf.initialize_all_variables().run() saver = tf.train.Saver() if last_saved_model_path: saver.restore(sess, last_saved_model_path) while (batch_no + 1) * batch_size < len(buckets[key]): source, target = dl.get_batch_from_pairs( buckets[key][batch_no * batch_size:(batch_no + 1) * batch_size]) _, loss, prediction, summary, source_gradient, target_gradient = sess.run( [ optim, bn_tensors['loss'], bn_tensors['prediction'], bn_tensors['merged_summary'], bn_tensors['source_gradient'], bn_tensors['target_gradient'] ], feed_dict={ bn_tensors['source_sentence']: source, bn_tensors['target_sentence']: target, }) train_writer.add_summary(summary, batch_no * (cnt + 1)) print "Loss", loss, batch_no, len( buckets[key]) / batch_size, i, cnt, key print "******" print "Source ", dl.inidices_to_string(source[0], source_vocab) print "---------" print "Target ", dl.inidices_to_string(target[0], target_vocab) print "----------" print "Prediction ", dl.inidices_to_string( prediction[0:key], target_vocab) print "******" batch_no += 1 if batch_no % 1000 == 0: save_path = saver.save( sess, "Data/Models/model_translation_epoch_{}_{}.ckpt". format(i, cnt)) last_saved_model_path = "Data/Models/model_translation_epoch_{}_{}.ckpt".format( i, cnt) save_path = saver.save( sess, "Data/Models/model_translation_epoch_{}.ckpt".format(i)) last_saved_model_path = "Data/Models/model_translation_epoch_{}.ckpt".format( i) tf.reset_default_graph() sess.close()
def __init__(self, task): bucket_quant = 10 current_path = '/'.join(os.path.realpath(__file__).split('/')[:-1]) translator_root_path = join(current_path, 'pretrained_models') model_path = { 'en-de-news': join(translator_root_path, 'en-de-news', 'model_epoch_4_145000.ckpt'), 'en-fr-news': join(translator_root_path, 'en-fr-news', 'model_epoch_4_90000.ckpt'), 'en-cs-news': join(translator_root_path, 'en-cs-news', 'model_epoch_4_70000.ckpt'), 'en-de-europarl': join(translator_root_path, 'en-de-europarl', 'model_epoch_1_440000.ckpt') } data_root_path = join(current_path, 'Data', 'translator_training_data') source_file = { 'en-de-europarl': join(data_root_path, 'europarl-v7.de-en.en'), 'en-de-news': join(data_root_path, 'news-commentary-v12.de-en.en'), 'en-fr-news': join(data_root_path, 'news-commentary-v9.fr-en.en'), 'en-cs-news': join(data_root_path, 'news-commentary-v9.cs-en.en') } target_file = { 'en-de-europarl': join(data_root_path, 'europarl-v7.de-en.de'), 'en-de-news': join(data_root_path, 'news-commentary-v12.de-en.de'), 'en-fr-news': join(data_root_path, 'news-commentary-v9.fr-en.fr'), 'en-cs-news': join(data_root_path, 'news-commentary-v9.cs-en.cs') } data_loader_options = { 'model_type': 'translation', 'source_file': source_file[task], 'target_file': target_file[task], 'bucket_quant': bucket_quant } self.dl = data_loader.Data_Loader(data_loader_options) self.buckets, self.source_vocab, self.target_vocab = self.dl.load_translation_data() config = model_config.translator_config model_options = { 'source_vocab_size' : len(self.source_vocab), 'target_vocab_size' : len(self.target_vocab), 'residual_channels' : config['residual_channels'], 'decoder_dilations' : config['decoder_dilations'], 'encoder_dilations' : config['encoder_dilations'], 'decoder_filter_width' : config['decoder_filter_width'], 'encoder_filter_width' : config['encoder_filter_width'], 'layer_norm': config['layer_norm'] } self.translator_model = translator.ByteNet_Translator( model_options ) self.translator_model.build_model() self.translator_model.build_translator(reuse=True) self.sess = tf.Session() saver = tf.train.Saver() if model_path[task]: saver.restore(self.sess, model_path[task]) self.features = {} for layer_index in range(15): dilation = int(math.pow(2, layer_index % 5)) layer_tensor_name = "bytenet_encoder_layer_%d_%d/add:0" % (layer_index, dilation) layer_name = "bytenet_encoder_layer_%d_%d" % (layer_index, dilation) self.features[layer_name] = tf.get_default_graph().get_tensor_by_name(layer_tensor_name)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--sample_size', type=int, default=300, help='Sampled output size') parser.add_argument('--top_k', type=int, default=5, help='Sample from top k predictions') parser.add_argument('--model_path', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument('--text_dir', type=str, default='Data/generator_training_data', help='Directory containing text files') parser.add_argument('--data_dir', type=str, default='Data', help='Data Directory') parser.add_argument('--seed', type=str, default='All', help='Seed for text generation') args = parser.parse_args() # model_config = json.loads( open('model_config.json').read() ) config = model_config.predictor_config dl = data_loader.Data_Loader({ 'model_type': 'generator', 'dir_name': args.text_dir }) _, vocab = dl.load_generator_data(config['sample_size']) model_options = { 'vocab_size': len(vocab), 'residual_channels': config['residual_channels'], 'dilations': config['dilations'], 'filter_width': config['filter_width'], } generator_model = generator.ByteNet_Generator(model_options) generator_model.build_generator() sess = tf.InteractiveSession() tf.initialize_all_variables().run() saver = tf.train.Saver() if args.model_path: saver.restore(sess, args.model_path) seed_sentence = np.array([dl.string_to_indices(args.seed, vocab)], dtype='int32') for col in range(args.sample_size): [probs ] = sess.run([generator_model.g_probs], feed_dict={generator_model.seed_sentence: seed_sentence}) curr_preds = [] for bi in range(probs.shape[0]): pred_word = utils.sample_top(probs[bi][-1], top_k=args.top_k) curr_preds.append(pred_word) seed_sentence = np.insert(seed_sentence, seed_sentence.shape[1], curr_preds, axis=1) print col, dl.inidices_to_string(seed_sentence[0], vocab) f = open('Data/generator_sample.txt', 'wb') f.write(dl.inidices_to_string(seed_sentence[0], vocab)) f.close()