def train(model: MemN2N, train_data, valid_data, config): """ do train Args: model (MemN2N): the model to be evaluate train_data: training data valid_data: validating data config: model and training configs Returns: no return """ lr = config.init_lr train_losses = [] train_perplexities = [] valid_losses = [] valid_perplexities = [] for epoch in range(1, config.nepoch + 1): train_loss = train_single_epoch(model, lr, train_data, config) valid_loss = eval(model, valid_data, config, "Validation") info = {'epoch': epoch, 'learning_rate': lr} # When the loss on the valid no longer drops, it's like learning rate divided by 1.5 if len(valid_losses) > 0 and valid_loss > valid_losses[-1] * 0.9999: lr /= 1.5 train_losses.append(train_loss) train_perplexities.append(math.exp(train_loss)) valid_losses.append(valid_loss) valid_perplexities.append(math.exp(valid_loss)) info["train_perplexity"] = train_perplexities[-1] info["validate_perplexity"] = valid_perplexities[-1] print(info) if epoch % config.log_epoch == 0: save_dir = os.path.join(config.checkpoint_dir, "model_%d" % epoch) paddle.save(model.state_dict(), save_dir) lr_path = os.path.join(config.checkpoint_dir, "lr_%d" % epoch) with open(lr_path, "w") as f: f.write(f"{lr}") # to get the target ppl if info["validate_perplexity"] < config.target_ppl: save_dir = os.path.join(config.checkpoint_dir, "model_good") paddle.save(model.state_dict(), save_dir) break if lr < 1e-5: break save_dir = os.path.join(config.checkpoint_dir, "model") paddle.save(model.state_dict(), save_dir)
def train_single_epoch(model: MemN2N, lr, data, config): """ train one epoch Args: model (MemN2N): model to be trained lr (float): the learning rate of this epoch data: training data config: configs Returns: float: average loss """ model.train() N = int(math.ceil(len(data) / config.batch_size)) # total train N batchs clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=config.max_grad_norm) optimizer = paddle.optimizer.SGD(learning_rate=lr, parameters=model.parameters(), grad_clip=clip) lossfn = nn.CrossEntropyLoss(reduction='sum') total_loss = 0 if config.show: ProgressBar = getattr(import_module('utils'), 'ProgressBar') bar = ProgressBar('Train', max=N) for batch in range(N): if config.show: bar.next() optimizer.clear_grad() context = np.ndarray([config.batch_size, config.mem_size], dtype=np.int64) target = np.ndarray([config.batch_size], dtype=np.int64) for i in range(config.batch_size): m = random.randrange(config.mem_size, len(data)) target[i] = data[m] context[i, :] = data[m - config.mem_size:m] batch_data = paddle.to_tensor(context) batch_label = paddle.to_tensor(target) preict = model(batch_data) loss = lossfn(preict, batch_label) loss.backward() optimizer.step() total_loss += loss if config.show: bar.finish() return total_loss / N / config.batch_size
def eval(model: MemN2N, data, config, mode="Test"): """ evaluate the model performance Args: model (MemN2N): the model to be evaluate data: evaluation data config: model and eval configs mode: Valid or Test Returns: average loss """ model.eval() lossfn = nn.CrossEntropyLoss(reduction='sum') N = int(math.ceil(len(data) / config.batch_size)) total_loss = 0 context = np.ndarray([config.batch_size, config.mem_size], dtype=np.int64) target = np.ndarray([config.batch_size], dtype=np.int64) if config.show: ProgressBar = getattr(import_module('utils'), 'ProgressBar') bar = ProgressBar(mode, max=N - 1) m = config.mem_size for batch in range(N): if config.show: bar.next() for i in range(config.batch_size): if m >= len(data): break target[i] = data[m] context[i, :] = data[m - config.mem_size:m] m += 1 if m >= len(data): break batch_data = paddle.to_tensor(context) batch_label = paddle.to_tensor(target) preict = model(batch_data) loss = lossfn(preict, batch_label) total_loss += loss if config.show: bar.finish() return total_loss / N / config.batch_size
def main(_): train_file = 'data/data_1_train.csv' source_count, target_count = [], [] data = process_data.read_data(train_file) parsed_data = process_data.parse_data(data) source_word2idx, target_word2idx = create_vocab(parsed_data) #train_data = read_data(FLAGS.train_data, source_count, source_word2idx, target_count, target_word2idx) #test_data = read_data(FLAGS.test_data, source_count, source_word2idx, target_count, target_word2idx) trainData, testData = process_data.split_data(parsed_data, 80, 20) train_data = process_data.read_and_process_data(trainData, source_word2idx, target_word2idx) test_data = process_data.read_and_process_data(testData, source_word2idx, target_word2idx) FLAGS.pad_idx = source_word2idx['<pad>'] FLAGS.nwords = len(source_word2idx) FLAGS.mem_size = train_data[ 4] if train_data[4] > test_data[4] else test_data[4] pp.pprint(flags.FLAGS.__flags) print('loading pre-trained word vectors...') FLAGS.pre_trained_context_wt = init_word_embeddings(source_word2idx) FLAGS.pre_trained_target_wt = init_word_embeddings(target_word2idx) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() model.run(train_data, test_data)
def main(_): source_word2idx, target_word2idx, word_set = {}, {}, {} max_sent_len = -1 max_sent_len = get_dataset_resources(FLAGS.train_data, source_word2idx, target_word2idx, word_set, max_sent_len) max_sent_len = get_dataset_resources(FLAGS.test_data, source_word2idx, target_word2idx, word_set, max_sent_len) train_data = get_dataset(FLAGS.train_data, source_word2idx, target_word2idx) test_data = get_dataset(FLAGS.test_data, source_word2idx, target_word2idx) # FLAGS.pad_idx = source_word2idx['<pad>'] # FLAGS.nwords = len(source_word2idx) # FLAGS.mem_size = max_sent_len pp.pprint(flags.FLAGS.__flags) print('loading pre-trained word vectors...') print('loading pre-trained word vectors for train and test data') pre_trained_context_wt, pre_trained_target_wt = get_embedding_matrix( source_word2idx, target_word2idx, FLAGS.edim) with tf.Session() as sess: model = MemN2N(FLAGS, sess, pre_trained_context_wt, pre_trained_target_wt, source_word2idx['<pad>'], len(source_word2idx), max_sent_len) model.build_model() model.run(train_data, test_data)
def main(_): count = [] word2idx = {} if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) train_data = read_data( '%s/%s.train.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) valid_data = read_data( '%s/%s.valid.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) test_data = read_data('%s/%s.test.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) idx2word = dict(zip(word2idx.values(), word2idx.keys())) FLAGS.nwords = len(word2idx) pp.pprint(flags.FLAGS.__flags) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() if FLAGS.is_test: model.run(valid_data, test_data) else: model.run(train_data, valid_data)
def run(is_test=False): count = [] word2idx = {} Config.is_test = is_test if not os.path.exists(Config.checkpoint_dir): os.makedirs(Config.checkpoint_dir) if not os.path.exists(Config.vector_dir): os.makedirs(Config.vector_dir) train_data = read_data( '%s/%s.train.txt' % (Config.data_dir, Config.data_name), count, word2idx) valid_data = read_data( '%s/%s.valid.txt' % (Config.data_dir, Config.data_name), count, word2idx) test_data = read_data( '%s/%s.test.txt' % (Config.data_dir, Config.data_name), count, word2idx) idx2word = dict(zip(word2idx.values(), word2idx.keys())) save_obj('%s/idx2word.pkl' % (Config.vector_dir), idx2word) save_obj('%s/word2idx.pkl' % (Config.vector_dir), word2idx) Config.nwords = len(word2idx) tf.reset_default_graph() with tf.Session() as sess: model = MemN2N(Config, sess, True) model.build_model() if Config.is_test: model.run(valid_data, test_data) else: model.run(train_data, valid_data) tf.summary.FileWriter("./logs", graph=tf.get_default_graph())
def main(): config = { 'batch_size': 128, 'emb_dim': 150, 'mem_size': 100, 'test': False, 'n_epoch': 50, 'n_hop': 6, 'n_words': None, 'lr': 0.001, 'std_dev': 0.05, 'cp_dir': 'checkpoints' } count = list() word2idx = dict() train_data = read_data('./data/ptb.train.txt', count, word2idx) valid_data = read_data('./data/ptb.valid.txt', count, word2idx) test_data = read_data('./data/ptb.test.txt', count, word2idx) config['n_words'] = len(word2idx) with tf.Session() as sess: print "Training..." mod = MemN2N(config, sess) mod.train(train_data, valid_data) mod.test(test_data)
def main(_): word2idx = {} if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) train_data,test_data = read_data( word2idx,FLAGS) train_data,valid_data = model_selection.train_test_split(train_data, test_size=.1) idx2word = dict(zip(word2idx.values(), word2idx.keys())) for i in range(FLAGS.mem_size): word2idx['time{}'.format(i + 1)] = 'time{}'.format(i + 1) FLAGS.nwords = len(word2idx) print ('train data len:',len(train_data)) print ('valid data len:', len(valid_data)) print ('voca len:',len(word2idx)) print ('story sample:', np.array(train_data[0][0])) # pp.pprint(flags.FLAGS.__flags) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() if FLAGS.is_test: model.run(valid_data, test_data,idx2word,FLAGS) else: model.run(train_data, valid_data,idx2word,FLAGS)
def __init__(self, config): self.eval_data = CBTestDataset(config.dataset_dir, config.word_type, perc_dict=config.perc_dict) self.eval_data.set_train_test(train=False) settings = { "use_cuda": config.cuda, "num_vocab": self.eval_data.num_vocab, "embedding_dim": 20, "sentence_size": self.eval_data.sentence_size, "max_hops": config.max_hops } print("Longest sentence length", self.eval_data.sentence_size) print("Longest story length", self.eval_data.max_story_size) print("Average story length", self.eval_data.mean_story_size) print("Number of vocab", self.eval_data.num_vocab) self.mem_n2n = MemN2N(settings) self.mem_n2n.load_state_dict(torch.load(config.check_point_path)) self.mem_n2n.eval() print(self.mem_n2n) if config.cuda: self.mem_n2n = self.mem_n2n.cuda() self.start_epoch = 0 self.config = config
def main(_): source_count, target_count = [], [] source_word2idx, target_word2idx, word_set = {}, {}, {} max_sent_len = -1 max_sent_len = get_dataset_resources(FLAGS.train_data, source_word2idx, target_word2idx, word_set, max_sent_len) max_sent_len = get_dataset_resources(FLAGS.test_data, source_word2idx, target_word2idx, word_set, max_sent_len) embeddings = load_embedding_file(FLAGS.pretrain_file, word_set) train_data = get_dataset(FLAGS.train_data, source_word2idx, target_word2idx, embeddings) test_data = get_dataset(FLAGS.test_data, source_word2idx, target_word2idx, embeddings) print "train data size - ", len(train_data[0]) print "test data size - ", len(test_data[0]) print "max sentence length - ",max_sent_len FLAGS.pad_idx = source_word2idx['<pad>'] FLAGS.nwords = len(source_word2idx) FLAGS.mem_size = max_sent_len pp.pprint(flags.FLAGS.__flags) print('loading pre-trained word vectors...') print('loading pre-trained word vectors for train and test data') FLAGS.pre_trained_context_wt, FLAGS.pre_trained_target_wt = get_embedding_matrix(embeddings, source_word2idx, target_word2idx, FLAGS.edim) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() model.run(train_data, test_data)
def run(config): print("#! preparing data...") train_iter, valid_iter, test_iter, vocab = dataloader( config.batch_size, config.memory_size, config.task, config.joint, config.tenk) print("#! instantiating model...") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = MemN2N(get_params(config), vocab).to(device) if config.file: with open(os.path.join(config.save_dir, config.file), 'rb') as f: if torch.cuda.is_available(): state_dict = torch.load( f, map_location=lambda storage, loc: storage.cuda()) else: state_dict = torch.load( f, map_location=lambda storage, loc: storage) model.load_state_dict(state_dict) if config.train: print("#! training...") optimizer = optim.Adam(model.parameters(), config.lr) train(train_iter, model, optimizer, config.num_epochs, config.max_clip, valid_iter) if not os.path.isdir(config.save_dir): os.makedirs(config.save_dir) torch.save(model.state_dict(), os.path.join(config.save_dir, get_fname(config))) print("#! testing...") with torch.no_grad(): eval(test_iter, model, config.task)
def main(_): source_count, target_count = [], [] source_word2idx, target_word2idx = {}, {} train_data = read_data(FLAGS.train_data, source_count, source_word2idx, target_count, target_word2idx) test_data = read_data(FLAGS.test_data, source_count, source_word2idx, target_count, target_word2idx) FLAGS.pad_idx = source_word2idx['<pad>'] FLAGS.nwords = len(source_word2idx) FLAGS.mem_size = train_data[ 4] if train_data[4] > test_data[4] else test_data[4] pp.pprint(flags.FLAGS.__flags) print('loading pre-trained word vectors...') FLAGS.pre_trained_context_wt = init_word_embeddings(source_word2idx) FLAGS.pre_trained_target_wt = init_word_embeddings(target_word2idx) saver = tf.train.Saver() with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() model.run(train_data, test_data)
def main(_): with tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5), device_count={'GPU': 1})) as sess: model = MemN2N(FLAGS, sess) model.build_model() model.test(example)
def __init__(self, config): if 'bAbI' in config.dataset_dir: self.train_data = bAbIDataset(config.dataset_dir, config.task) self.train_loader = DataLoader(self.train_data, batch_size=config.batch_size, num_workers=1, shuffle=True) self.test_data = bAbIDataset(config.dataset_dir, config.task, train=False) self.test_loader = DataLoader(self.test_data, batch_size=config.batch_size, num_workers=1, shuffle=False) elif 'CBTest' in config.dataset_dir: self.train_data = CBTestDataset(config.dataset_dir, config.word_type, perc_dict=config.perc_dict) print("Training set size: ", self.train_data.__len__()) self.train_loader = DataLoader(self.train_data, batch_size=config.batch_size, num_workers=1, shuffle=True) self.test_data = copy.deepcopy(self.train_data) self.test_data.set_train_test(train=False) print("Testing set size: ", self.test_data.__len__()) self.test_loader = DataLoader(self.test_data, batch_size=config.batch_size, num_workers=1, shuffle=False) settings = { "use_cuda": config.cuda, "num_vocab": self.train_data.num_vocab, "embedding_dim": 20, "sentence_size": self.train_data.sentence_size, "max_hops": config.max_hops } print("Longest sentence length", self.train_data.sentence_size) print("Longest story length", self.train_data.max_story_size) print("Average story length", self.train_data.mean_story_size) print("Number of vocab", self.train_data.num_vocab) self.mem_n2n = MemN2N(settings) self.ce_fn = nn.CrossEntropyLoss(size_average=False) self.opt = torch.optim.SGD(self.mem_n2n.parameters(), lr=config.lr) print(self.mem_n2n) if config.cuda: self.ce_fn = self.ce_fn.cuda() self.mem_n2n = self.mem_n2n.cuda() self.start_epoch = 0 self.config = config
def main(_): count = [] with open('./processed/word2idx.pkl', 'rb') as f: word2idx = pickle.load(f) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) idx2word = dict(zip(word2idx.values(), word2idx.keys())) FLAGS.nwords = len(word2idx) pp.pprint(flags.FLAGS.__flags) # train_data = read_data('%s/%s.train.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) # valid_data = read_data('%s/%s.valid.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) # test_data = read_data('%s/%s.test.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) # exit() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.3 if FLAGS.inference else 0.6 with tf.Session(config=config) as sess: model = MemN2N(FLAGS, sess) model.build_model() test_set_data = read_test_data(FLAGS.infer_set, word2idx) if FLAGS.inference: model.load() answer = model.inference(test_set_data, word2idx) import pandas as pd answer = pd.DataFrame(answer, columns=['answer']) answer.index += 1 answer.to_csv('./guess/guess.csv', index_label='id') else: if FLAGS.restore: model.load() with open('./processed/all_train.pkl', 'rb') as f: train_data = pickle.load(f) with open('./processed/all_valid.pkl', 'rb') as f: valid_data = pickle.load(f) test_data = read_our_data( './data/CBData/cbtest_CN_test_2500ex.txt', count, word2idx) if FLAGS.is_test: print('Do not use --is_test True') exit() model.run(valid_data, test_data, word2idx, test_set_data) else: model.run(train_data, valid_data, word2idx, test_set_data)
def main(_): word2idx = {} max_words = 0 max_sentences = 0 if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) #train_stories, train_questions, max_words, max_sentences = read_data('{}/train.txt'.format(FLAGS.data_dir), word2idx, max_words, max_sentences) #valid_stories, valid_questions, max_words, max_sentences = read_data('{}/train.txt'.format(FLAGS.data_dir), word2idx, max_words, max_sentences) train_stories, train_questions, max_words, max_sentences = read_data( '{}/qa{}_single-supporting-fact_train.txt'.format( FLAGS.data_dir, FLAGS.babi_task), word2idx, max_words, max_sentences) valid_stories, valid_questions, max_words, max_sentences = read_data( '{}/qa{}_single-supporting-fact_test.txt'.format( FLAGS.data_dir, FLAGS.babi_task), word2idx, max_words, max_sentences) test_stories, test_questions, max_words, max_sentences = read_data( '{}/qa{}_single-supporting-fact_test.txt'.format( FLAGS.data_dir, FLAGS.babi_task), word2idx, max_words, max_sentences) pad_data(train_stories, train_questions, max_words, max_sentences) pad_data(valid_stories, valid_questions, max_words, max_sentences) pad_data(test_stories, test_questions, max_words, max_sentences) idx2word = dict(zip(word2idx.values(), word2idx.keys())) FLAGS.nwords = len(word2idx) FLAGS.max_words = max_words FLAGS.max_sentences = max_sentences pp.pprint(flags.FLAGS.__flags) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() if FLAGS.is_test: model.run(valid_stories, valid_questions, test_stories, test_questions) else: model.run(train_stories, train_questions, valid_stories, valid_questions)
def run(context, question): word2idx = {} idx2word = {} idx2word = load_obj('%s/idx2word.pkl' % (Config.vector_dir), idx2word) word2idx = load_obj('%s/word2idx.pkl' % (Config.vector_dir), word2idx) context_data = read_txt(context, word2idx) question_data = read_txt(question, word2idx) Config.nwords = len(word2idx) tf.reset_default_graph() with tf.Session() as sess: model = MemN2N(Config, sess, False) model.build_model() results = model.predict(context_data, question_data) for result in results: print(' '.join( list(map(lambda x: idx2word.get(np.argmax(x)), result[0]))))
def main(_): count = [] # List of (word, count) for all the data word2idx = {} # Dict (word, ID) for all the data if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) # Lists of word IDs if FLAGS.preloaded_data: with open('preloaded_telenor/train.pickle', 'rb') as f: train_data = pickle.load(f) with open('preloaded_telenor/val.pickle', 'rb') as f: valid_data = pickle.load(f) word2idx = pickle.load(f) else: train_data = read_data('%s/train.pickle' % FLAGS.data_dir, count, word2idx) valid_data = read_data('%s/val.pickle' % FLAGS.data_dir, count, word2idx) if FLAGS.is_test: test_data = read_data('%s/test.pickle' % FLAGS.data_dir, count, word2idx) idx2word = dict(zip(word2idx.values(), word2idx.keys())) FLAGS.nwords = len(word2idx) pp.pprint(flags.FLAGS.__flags) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Build the Memory Network model = MemN2N(FLAGS, sess) model.build_model() if len(FLAGS.infere) > 0: print('Make sure the training and validation data supplied are the same as during the training of the model (idx2word)') question = convert_question(FLAGS.infere, word2idx) model.infere(question, idx2word) # Prediction elif FLAGS.is_test: model.run(valid_data, test_data, idx2word) # Testing else: model.run(train_data, valid_data, idx2word) # Training
def main(_): count = [] word2idx = {} train_data = read_data( '%s/%s.train.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) valid_data = read_data( '%s/%s.valid.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) test_data = read_data('%s/%s.test.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx) idx2word = dict(zip(word2idx.values(), word2idx.keys())) FLAGS.nwords = len(word2idx) pp.pprint(tf.app.flags.FLAGS.__flags) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() model.run(train_data, valid_data)
def __init__(self, config): self.train_data = bAbIDataset(config.dataset_dir, config.task) self.train_loader = DataLoader(self.train_data, batch_size=config.batch_size, num_workers=1, shuffle=True) self.test_data = bAbIDataset(config.dataset_dir, config.task, train=False) self.test_loader = DataLoader(self.test_data, batch_size=config.batch_size, num_workers=1, shuffle=False) settings = { "use_cuda": config.cuda, "num_vocab": self.train_data.num_vocab, "embedding_dim": 20, "sentence_size": self.train_data.sentence_size, "max_hops": config.max_hops } print("Longest sentence length", self.train_data.sentence_size) print("Longest story length", self.train_data.max_story_size) print("Average story length", self.train_data.mean_story_size) print("Number of vocab", self.train_data.num_vocab) self.mem_n2n = MemN2N(settings) self.ce_fn = nn.CrossEntropyLoss(size_average=False) self.opt = torch.optim.SGD(self.mem_n2n.parameters(), lr=config.lr, weight_decay=1e-5) print(self.mem_n2n) if config.cuda: self.ce_fn = self.ce_fn.cuda() self.mem_n2n = self.mem_n2n.cuda() self.start_epoch = 0 self.config = config
args.task, len(train_data), len(test_data))) settings = { "device": device, "num_vocab": train_data.num_vocab, "embedding_dim": args.embedding_dim, "sentence_size": train_data.sentence_size, "max_hops": args.max_hops } print("Longest sentence length", train_data.sentence_size) print("Longest story length", train_data.max_story_size) print("Average story length", train_data.mean_story_size) print("Number of vocab", train_data.num_vocab) torch.manual_seed(args.random_state) mem_n2n = MemN2N(settings) criterion = nn.CrossEntropyLoss(reduction='sum') opt = torch.optim.SGD(mem_n2n.parameters(), lr=args.lr) print(mem_n2n) mem_n2n = mem_n2n.to(device) for epoch in range(1, args.epochs + 1): # train single epoch total_loss = 0. correct = 0 for step, (story, query, answer) in enumerate(train_loader): story, query, answer = story.to(device), query.to(device), answer.to( device) logits = mem_n2n(story, query) preds = logits.argmax(dim=1)
break if lr < 1e-5: break save_dir = os.path.join(config.checkpoint_dir, "model") paddle.save(model.state_dict(), save_dir) if __name__ == '__main__': config = Config('config.yaml') if not os.path.exists(config.checkpoint_dir): os.makedirs(config.checkpoint_dir) word2idx, train_data, valid_data, test_data = load_data(config) idx2word = dict(zip(word2idx.values(), word2idx.keys())) config.nwords = len(word2idx) print("vacab size is %d" % config.nwords) np.random.seed(config.srand) random.seed(config.srand) paddle.seed(config.srand) model = MemN2N(config) if config.recover_train: model_path = os.path.join(config.checkpoint_dir, config.model_name) state_dict = paddle.load(model_path) model.set_dict(state_dict) train(model, train_data, valid_data, config)
def main(_): word2idx = {} cand2idx = {} max_words = 0 max_sentences = 0 if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) train_stories, train_questions, max_words, max_sentences = read_data( 'dstc/out_task4_train85.json', word2idx, cand2idx, max_words, max_sentences, test_flag=False) valid_stories, valid_questions, max_words, max_sentences = read_data( 'dstc/out_task4_valid15.json', word2idx, cand2idx, max_words, max_sentences, test_flag=False) test_stories, test_questions, max_words, max_sentences = read_data( 'dstc/out_dialog-task4INFOS-kb2_atmosphere_restrictions-distr0.5-tst1000.json', word2idx, cand2idx, max_words, max_sentences, test_flag=True) pad_data(train_stories, train_questions, max_words, max_sentences, test_flag=False) pad_data(valid_stories, valid_questions, max_words, max_sentences, test_flag=False) pad_data(test_stories, test_questions, max_words, max_sentences, test_flag=True) idx2word = dict(zip(word2idx.values(), word2idx.keys())) idx2cand = dict(zip(cand2idx.values(), cand2idx.keys())) FLAGS.nwords = len(word2idx) FLAGS.ncands = len(cand2idx) FLAGS.max_words = max_words FLAGS.max_sentences = max_sentences pp.pprint(flags.FLAGS.__flags) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() if FLAGS.is_test: model.run(valid_stories, valid_questions, test_stories, test_questions, idx2cand, answer_flag=False) else: model.run(train_stories, train_questions, valid_stories, valid_questions, idx2cand, answer_flag=True) prediction_test_valid = 0 if prediction_test_valid == 1: predictions, target = model.predict(valid_stories, valid_questions) correct_num = 0 #print(len(valid_questions)) for i in range(len(valid_questions)): index = i #depad_data(valid_stories, valid_questions) #question = valid_questions[index]['question'] answer = valid_questions[index]['answer']['utterance'] cand = valid_questions[index]['cand'] #story_index = valid_questions[index]['story_index'] #sentence_index = valid_questions[index]['sentence_index'] #story = valid_stories[story_index][:sentence_index + 1] #story = [list(map(idx2word.get, sentence)) for sentence in story] #question = list(map(idx2word.get, question)) #prediction = idx2cand[np.argmax(predictions[index])] pred_sorted = np.argsort(predictions[index][-FLAGS.ncands:]) pred_sorted = pred_sorted[::-1] cand_list = [] prediction = None for c in cand: cand_list.append(idx2cand.get(c['utterance'])) for pred in pred_sorted: if idx2cand[pred] in cand_list: prediction = idx2cand[pred] break answer = idx2cand.get(answer) #print('Story:') #pp.pprint(story) #print('\nQuestion:') #pp.pprint(question) #print('\nPrediction:') #pp.pprint(prediction) #print('\nAnswer:') #pp.pprint(answer) #print('\ncandidates') #pp.pprint(cand_list) #print('\nCorrect:') #pp.pprint(prediction == answer) if prediction == answer: correct_num += 1 print('case: ' + str(len(valid_questions)) + ' correct_num: ' + str(correct_num)) print('acc - ' + str(correct_num / len(valid_questions) * 100)) else: predictions, target = model.predict(test_stories, test_questions) correct_num = 0 #print(len(valid_questions)) responses = [] for i in range(len(test_questions)): index = i dict_answer_current = {} dict_answer_current['dialog_id'] = test_questions[index][ 'dialog_id'] candidate_rank = [] #depad_data(valid_stories, valid_questions) #question = valid_questions[index]['question'] #answer = test_questions[index]['answer']['utterance'] cand = test_questions[index]['cand'] #story_index = valid_questions[index]['story_index'] #sentence_index = valid_questions[index]['sentence_index'] #story = valid_stories[story_index][:sentence_index + 1] #story = [list(map(idx2word.get, sentence)) for sentence in story] #question = list(map(idx2word.get, question)) #prediction = idx2cand[np.argmax(predictions[index])] pred_sorted = np.argsort(predictions[index][-FLAGS.ncands:]) pred_sorted = pred_sorted[::-1] cand_list = [] prediction = None for c in cand: cand_list.append(idx2cand.get(c['utterance'])) crank = 1 flag = 0 for pred in pred_sorted: if idx2cand[pred] in cand_list: if flag == 0: prediction = idx2cand[pred] flag = 1 for c in cand: if c['utterance'] == pred: #print(idx2cand.get(c['utterance'])) candidate_rank.append({ "candidate_id": c['candidate_id'], "rank": crank }) crank = crank + 1 if crank == 11: break if crank == 11: break dict_answer_current['lst_candidate_id'] = candidate_rank responses.append(dict_answer_current) #answer = idx2cand.get(answer) #print('Story:') #pp.pprint(story) #print('\nQuestion:') #pp.pprint(question) #print('\nPrediction:') #pp.pprint(prediction) #print('\nAnswer:') #pp.pprint(answer) #print('\ncandidates') #pp.pprint(cand_list) #print('\nCorrect:') #pp.pprint(prediction == answer) #if prediction == answer: # correct_num += 1 fdout = open( "dialog-task4INFOS-kb2_atmosphere_restrictions-distr0.5-tst1000.answer.json", "w") json.dump(responses, fdout) fdout.close()
parser.add_argument('model_dir', type=str, help='trained model path') parser.add_argument('test_path', type=str, help='test data path') parser.add_argument('--maxmemsize', type=int, metavar='N', default=100, help='memory capacity') args = parser.parse_args() # loading vocabularies and the trained model dialog_vocab = Vocab.load(os.path.join(args.model_dir, 'dialog_vocab')) candidates_vocab = Vocab.load( os.path.join(args.model_dir, 'candidates_vocab')) model = MemN2N.load(os.path.join(args.model_dir, 'model')) test_data_reader_per_resp = DialogReader(args.test_path, dialog_vocab, candidates_vocab, args.maxmemsize, 1, False, False, False) test_data_reader_per_dial = DialogReader(args.test_path, dialog_vocab, candidates_vocab, args.maxmemsize, 1, False, False, True) print("Per Response Accuracy: ", calc_accuracy_per_response(model, test_data_reader_per_resp, False)) print("Per Dialog Accuracy: ", calc_accuracy_per_dialog(model, test_data_reader_per_dial))
gr_train.add_argument('--shuffle', action="store_true", default=True, help='shuffle batches before every epoch') gr_train.add_argument('--save_dir', type=str, default=None, help='path to save the model') args = parser.parse_args() # build data, initialize model and start training. dialog_vocab, candidates_vocab = build_dialog_vocab(args.train_path, args.candidates_path, 1000) trn_data_reader = DialogReader(args.train_path, dialog_vocab, candidates_vocab, args.maxmemsize, args.batchsize, False, args.shuffle, False) dev_data_reader = DialogReader(args.dev_path, dialog_vocab, candidates_vocab, args.maxmemsize, args.batchsize, False, False, False) candidate_vecs = Variable(trn_data_reader._candidate_vecs) candidate_vecs = candidate_vecs.cuda() if args.gpu else candidate_vecs model = MemN2N(args.edim, len(trn_data_reader._dialog_vocab), candidate_vecs, args.nhops, args.init_std) if args.gpu: model.cuda() train(model, trn_data_reader, dev_data_reader, args.epochs, args.lr, args.decay_factor, args.decay_every, args.maxgradnorm, 50, 500, args.gpu) # saving trained model and vocabularies. save_dir = args.save_dir if not save_dir: save_dir = os.getcwd() save_dir = os.path.join(save_dir, 'model_' + str(time.time())) if not os.path.exists(save_dir):
train_labels = np.argmax(trainA, axis=1) test_labels = np.argmax(testA, axis=1) val_labels = np.argmax(valA, axis=1) tf.set_random_seed(FLAGS.random_state) batch_size = FLAGS.batch_size batches = zip(range(0, n_train - batch_size, batch_size), range(batch_size, n_train, batch_size)) batches = [(start, end) for start, end in batches] with tf.Session() as sess: model = MemN2N(batch_size, vocab_size, sentence_size, memory_size, FLAGS.embedding_size, session=sess, hops=FLAGS.hops, max_grad_norm=FLAGS.max_grad_norm) for i in range(1, FLAGS.epochs + 1): if i - 1 <= FLAGS.anneal_stop_epoch: anneal = 2.0**((i - 1) // FLAGS.anneal_rate) else: anneal = 2.0**(FLAGS.anneal_stop_epoch // FLAGS.anneal_rate) lr = FLAGS.learning_rate / anneal np.random.shuffle(batches) total_cost = 0.0 for start, end in batches:
def main(_): word2idx = {} max_words = 0 max_sentences = 0 checkpoint_dir = "./checkpoints" data_dir = "./bAbI/en-valid" babi_task = 1 if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) #train_stories, train_questions, max_words, max_sentences = read_all_data('{}/qa{}_train.txt'.format(data_dir, babi_task), word2idx, max_words, max_sentences) #valid_stories, valid_questions, max_words, max_sentences = read_all_data('{}/qa{}_valid.txt'.format(data_dir, babi_task), word2idx, max_words, max_sentences) #test_stories, test_questions, max_words, max_sentences = read_all_data('{}/qa{}_test.txt'.format(data_dir, babi_task), word2idx, max_words, max_sentences) train_stories, train_questions, max_words, max_sentences = read_all_data( 'train', word2idx, max_words, max_sentences) valid_stories, valid_questions, max_words, max_sentences = read_all_data( 'valid', word2idx, max_words, max_sentences) test_stories, test_questions, max_words, max_sentences = read_all_data( 'test', word2idx, max_words, max_sentences) pad_data(train_stories, train_questions, max_words, max_sentences) pad_data(valid_stories, valid_questions, max_words, max_sentences) pad_data(test_stories, test_questions, max_words, max_sentences) idx2word = dict(zip(word2idx.values(), word2idx.keys())) #FLAGS.nwords = len(word2idx) #FLAGS.max_words = max_words #FLAGS.max_sentences = max_sentences #pp.pprint(flags.FLAGS.__flags) print(word2idx) is_test = True with tf.Session() as sess: model = MemN2N(is_test, len(word2idx), max_words, max_sentences, sess) model.build_model() if is_test: model.run(valid_stories, valid_questions, test_stories, test_questions) else: model.run(train_stories, train_questions, valid_stories, valid_questions) #predictions, target = model.predict(test_stories, test_questions) #cnt = 0 #for i in range(len(target)): #print(idx2word[np.argmax(predictions[i])],idx2word[np.argmax(target[i])]) #if np.argmax(predictions[i])==np.argmax(target[i]): # cnt+=1 #print("Test set accuracy ",cnt/len(target)) print(word2idx) idx2word = dict(zip(word2idx.values(), word2idx.keys())) stry = input("Enter the story: ") flag = 'y' while flag == 'y': que = input("Enter the quest: ") print(stry, type(stry), que, type(que)) story, quest = read_data_story(stry.lower(), que.lower(), word2idx, max_sentences, max_words) pad_data(story, quest, max_words, max_sentences) #print(story,quest,word2idx) prediction, target1 = model.predict(story, quest) print(idx2word[np.argmax(prediction[0])]) flag = input('You want to continue: y or n ')
def main(_): source_count, target_count = [], [] source_word2idx, target_word2idx, word_set = {}, {}, {} max_sent_len = -1 max_sent_len = get_dataset_resources(FLAGS.train_data, source_word2idx, target_word2idx, word_set, max_sent_len) max_sent_len = get_dataset_resources(FLAGS.test_data, source_word2idx, target_word2idx, word_set, max_sent_len) max_sent_len_predict = get_dataset_resources_test(FLAGS.predict_data, source_word2idx, target_word2idx, word_set, max_sent_len) embeddings = load_embedding_file(FLAGS.pretrain_file, word_set) # test_data = get_dataset(FLAGS.test_data, source_word2idx, target_word2idx, embeddings, MODE='test') print("max sentence length - " + str(max_sent_len)) FLAGS.pad_idx = source_word2idx['<pad>'] FLAGS.nwords = len(source_word2idx) FLAGS.mem_size = max_sent_len pp.pprint(flags.FLAGS.__flags) print('loading pre-trained word vectors...') print('loading pre-trained word vectors for train and test data') FLAGS.pre_trained_context_wt, FLAGS.pre_trained_target_wt = get_embedding_matrix( embeddings, source_word2idx, target_word2idx, FLAGS.edim) N_FOLDS = 2 skf = StratifiedKFold(N_FOLDS, shuffle=True, random_state=1000) train_data = get_dataset(FLAGS.train_data, source_word2idx, target_word2idx, embeddings, MODE='train') predict_data, raw_predict_data = get_dataset_test(FLAGS.predict_data, source_word2idx, target_word2idx, embeddings) # source_data_predict, source_loc_data_predict, target_data_predict = predict_data source_data, source_loc_data, target_data, target_label = train_data X = np.column_stack((source_data, source_loc_data, target_data)) y = np.array(target_label) # Use this for SKF validation # for j, (train_idx, test_idx) in enumerate(skf.split(X, y)): # X_train, y_train = X[train_idx], y[train_idx] # X_test, y_test = X[test_idx], y[test_idx] # train_data_inner = (X_train[:,0], X_train[:,1], X_train[:,2], y_train) # test_data_inner = (X_test[:,0], X_test[:,1], X_test[:,2], y_test) # tf.reset_default_graph() # with tf.Session() as sess: # model = MemN2N(FLAGS, sess) # model.build_model() # saver = tf.train.Saver() # model.run(train_data_inner, test_data_inner) # saver.save(sess, './memnet', global_step=1000) # # for i in 10, do <-. Before model =...use tf.reset_default_graph X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) train_data_inner = (X_train[:, 0], X_train[:, 1], X_train[:, 2], y_train) test_data_inner = (X_test[:, 0], X_test[:, 1], X_test[:, 2], y_test) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() # saver = tf.train.Saver() model.run(train_data_inner, test_data_inner, predict_data, raw_predict_data)
def main(_): source_count, target_count = [], [] source_word2idx, target_word2idx, word_set = {}, {}, {} max_sent_len = -1 max_sent_len = get_dataset_resources(FLAGS.train_data, source_word2idx, target_word2idx, word_set, max_sent_len) max_sent_len = get_dataset_resources(FLAGS.test_data, source_word2idx, target_word2idx, word_set, max_sent_len) # embeddings = load_embedding_file(FLAGS.pretrain_file, word_set) print "Embeddings Loaded" ''' #uncomment for the first run #required for generating data in the pickle format train_data = get_dataset(FLAGS.train_data, source_word2idx, target_word2idx, embeddings) test_data = get_dataset(FLAGS.test_data, source_word2idx, target_word2idx, embeddings) pkl.dump(train_data, open('train_data_restaurant.pkl', 'w')) pkl.dump(test_data, open('test_data_restaurant.pkl', 'w')) # pkl.dump(train_data, open('train_data_laptop.pkl', 'w')) # pkl.dump(test_data, open('test_data_laptop.pkl', 'w')) print "Dump Success!!!" return ''' #Loading the the data generated train_data = pkl.load(open('train_data_laptop.pkl', 'r')) # train_data = pkl.load(open('train_data_extra.pkl', 'r')) # train_data = pkl.load(open('train_data_restaurant_clean.pkl', 'r')) test_data = pkl.load(open('test_data_laptop.pkl', 'r')) # test_data = pkl.load(open('test_data_extra.pkl', 'r')) # test_data = pkl.load(open('test_data_restaurant_clean.pkl', 'r')) print "Dump Loaded!!!" #uncomment for Rul + Con #Concatenates the Wma from the consTree to the Wrm for (Rul + con) method, #Requires that the data in both in indexed-wise matched already GraphMemNetData = pkl.load(open('TOTAL_LAT_const_laptop.pkl','r')) # GraphMemNetData = pkl.load(open('TOTAL_data_restaurant_clean.pkl','r')) Wma_train = GraphMemNetData[0][6] Wma_test = GraphMemNetData[1][6] Wrm = train_data[5] for index, wma in enumerate(Wma_train): wam = np.reshape(wma,(1,-1)) Wrm[index] = np.concatenate((Wrm[index], wam),axis=0) Wrm = test_data[5] for index, wma in enumerate(Wma_test): wam = np.reshape(wma,(1,-1)) Wrm[index] = np.concatenate((Wrm[index], wam),axis=0) print "train data size - ", len(train_data[0]) print "test data size - ", len(test_data[0]) print "max sentence length - ",max_sent_len FLAGS.pad_idx = source_word2idx['<pad>'] FLAGS.nwords = len(source_word2idx) FLAGS.mem_size = max_sent_len pp.pprint(flags.FLAGS.__flags) print('loading pre-trained word vectors...') print('loading pre-trained word vectors for train and test data') # pre_trained_context_wt, pre_trained_target_wt = get_embedding_matrix(embeddings, source_word2idx, target_word2idx, FLAGS.edim) pre_trained_context_wt, pre_trained_target_wt = GraphMemNetData[2], GraphMemNetData[3] with tf.Session() as sess: model = MemN2N(FLAGS, sess, pre_trained_context_wt, pre_trained_target_wt) model.build_model() model.run(train_data, test_data)