def check_random_example(dataset, model, idx2w): """ Check a random test sentence and its autoencoder reconstruction Args: dataset: AutoencoderDataset model: Autoencoder model idx2w (dict): A reversed dictionary matching ids to tokens Returns (tuple): input training sentence and its reconstruction """ random_idx = np.random.randint(len(dataset)) input = dataset[random_idx] input = variable(input) output = model(input.unsqueeze(0)).squeeze() _, words = torch.max(F.softmax(output, dim=-1), dim=1) input_text = idx2text(list(input.cpu().numpy()), idx2w) predicted_text = idx2text(list(words.cpu().numpy()), idx2w) return input_text, predicted_text
def train(): if prm.optimizer.lower() == 'adam': optimizer=adam elif prm.optimizer.lower() == 'sgd': optimizer=sgd elif prm.optimizer.lower() == 'rmsprop': optimizer=rmsprop elif prm.optimizer.lower() == 'adadelta': optimizer=adadelta options = locals().copy() print 'parameters:', str(options) prm_k = vars(prm).keys() prm_d = vars(prm) prm_k.sort() for x in prm_k: if not x.startswith('__'): print x,'=', prm_d[x] print 'loading Vocabulary...' vocab = utils.load_vocab(prm.vocab_path, prm.n_words) options['vocab'] = vocab options['vocabinv'] = {} for k,v in vocab.items(): options['vocabinv'][v] = k print 'Loading Environment...' if prm.engine.lower() == 'lucene': import lucene_search options['engine'] = lucene_search.LuceneSearch() elif prm.engine.lower() == 'elastic': import elastic_search options['engine'] = elastic_search.ElasticSearch() print 'Loading Dataset...' dh5 = dataset_hdf5.DatasetHDF5(prm.dataset_path) qi_train = dh5.get_queries(dset='train') dt_train = dh5.get_doc_ids(dset='train') qi_valid = dh5.get_queries(dset='valid') dt_valid = dh5.get_doc_ids(dset='valid') qi_test = dh5.get_queries(dset='test') dt_test = dh5.get_doc_ids(dset='test') if prm.train_size == -1: train_size = len(qi_train) else: train_size = min(prm.train_size, len(qi_train)) if prm.valid_size == -1: valid_size = len(qi_valid) else: valid_size = min(prm.valid_size, len(qi_valid)) if prm.test_size == -1: test_size = len(qi_test) else: test_size = min(prm.test_size, len(qi_test)) print '%d train examples' % len(qi_train) print '%d valid examples' % len(qi_valid) print '%d test examples' % len(qi_test) # This create the initial parameters as np ndarrays. # Dict name (string) -> np ndarray params, exclude_params = init_params(options) if prm.wordemb_path: print 'loading pre-trained word embeddings' params = load_wemb(params, vocab) options['W'] = params['W'] if prm.reload_model: load_params(prm.reload_model, params) print 'Building model' # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) for kk, value in tparams.iteritems(): tparams[kk] = theano.shared(value, name=kk) iin, out, updates, f_pred, consider_constant \ = build_model(tparams, options) #get only parameters that are not in the exclude_params list tparams_ = OrderedDict([(kk, vv) for kk, vv in tparams.iteritems() if kk not in exclude_params]) grads = tensor.grad(out[0], wrt=itemlist(tparams_), consider_constant=consider_constant) lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams_, grads, iin, out, updates) history_errs = [] best_p = None if prm.validFreq == -1: validFreq = len(qi_train) / prm.batch_size_train else: validFreq = prm.validFreq if prm.saveFreq == -1: saveFreq = len(qi_train) / prm.batch_size_train else: saveFreq = prm.saveFreq uidx = 0 # the number of update done estop = False # early stop start_time = time.time() print 'Optimization' try: for eidx in xrange(prm.max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(qi_train), prm.batch_size_train, shuffle=True) for _, train_index in kf: st = time.time() uidx += 1 qi, qi_i, qi_lst, D_gt_id, D_gt_url = get_samples(qi_train, dt_train, train_index, options) # share the current queries with the search engine. options['current_queries'] = qi_lst n_samples += len(qi) is_train = 1. out = f_grad_shared(qi_i, D_gt_id, is_train) cost = out.pop(0) cost_ent = out.pop(0) lr_t = f_update(prm.lrate) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1., 1., 1. if np.mod(uidx, prm.dispFreq) == 0: print '\n================================================================================' print 'Epoch', eidx, 'Update', uidx, 'Cost', cost, 'LR_t', lr_t print 'Time Minibatch Update: ' + str(time.time() - st) print 'Input Query: ', qi[0].replace('\n','\\n') print print 'Target Docs: ', str(D_gt_url[0]) print print 'Input Query Vocab: ', utils.idx2text(qi_i[0], options['vocabinv']) for ii in range(prm.n_iterations): prob = out.pop(0) ans = out.pop(0) metrics = out.pop(0) bl = out.pop(0) cost_bl = out.pop(0) D_id = out.pop(0) print print 'Iteration', ii print 'Baseline Value', bl.mean(), 'Cost', cost_bl print ' '.join(prm.metrics_map.keys()) print metrics.mean(0) print print 'Retrieved Docs: ', str([options['engine'].id_title_map[d_id] for d_id in D_id[0]]) print print 'Reformulated Query:', options['reformulated_queries'][ii][0] print print 'Query ANS: ', for kk, word in enumerate(options['current_queries'][0][:ans.shape[1]]): if word not in options['vocab'] and word != '': word += '<unk>' if ans[0,kk] == 1: word = word.upper() print str(word), print print print 'prob[:,:,0].max(1).mean(), prob[:,:,0].mean(), prob[:,:,0].min(1).mean()', prob[:,:,0].max(1).mean(), prob[:,:,0].mean(), prob[:,:,0].min(1).mean() print 'prob[:,:,1].max(1).mean(), prob[:,:,1].mean(), prob[:,:,1].min(1).mean()', prob[:,:,1].max(1).mean(), prob[:,:,1].mean(), prob[:,:,1].min(1).mean() print '==================================================================================\n' if np.mod(uidx, validFreq) == 0 or uidx == 1: kf_train = get_minibatches_idx(len(qi_train), prm.batch_size_pred, shuffle=True, max_samples=train_size) kf_valid = get_minibatches_idx(len(qi_valid), prm.batch_size_pred, shuffle=True, max_samples=valid_size) kf_test = get_minibatches_idx(len(qi_test), prm.batch_size_pred, shuffle=True, max_samples=test_size) print '\nEvaluating - Training Set' train_metrics = pred_error(f_pred, qi_train, dt_train, options, kf_train) print '\nEvaluating - Validation Set' valid_metrics = pred_error(f_pred, qi_valid, dt_valid, options, kf_valid) print '\nEvaluating - Test Set' test_metrics = pred_error(f_pred, qi_test, dt_test, options, kf_test) his = [train_metrics, valid_metrics, test_metrics] history_errs.append(his) metric_idx = prm.metrics_map[prm.reward.upper()] if (uidx == 0 or valid_metrics[-1, metric_idx] >= np.array(history_errs)[:,1,-1,metric_idx].max()): best_p = unzip(tparams) bad_counter = 0 print '=====================================================================================================' print ' '.join(prm.metrics_map.keys()) print print 'Train:' print train_metrics print print 'Valid:' print valid_metrics print print 'Test:' print test_metrics print print '=====================================================================================================' if (len(history_errs) > prm.patience and valid_metrics[-1, metric_idx] <= np.array(history_errs)[:-prm.patience, 1,-1,metric_idx].max()): bad_counter += 1 if bad_counter > prm.patience: print 'Early Stop!' estop = True break if prm.saveto and np.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) np.savez(prm.saveto, history_errs=history_errs, **params) print 'Done' print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interupted" return
def pred_error(f_pred, queries, actions, options, iterator, verbose=False): """ Just compute the error f_pred: Theano functin computing the prediction """ valid_acc = np.zeros((prm.max_hops_train + 2), dtype=np.float32) valid_R = np.zeros((prm.max_hops_train + 2), dtype=np.float32) n = np.zeros((prm.max_hops_train + 2), dtype=np.float32) acts_pc = 0. acts_pt = 0. uidx = -1 visited_pages = [] for _, valid_index in iterator: q_i, q_m = utils.text2idx2([queries[t].lower() for t in valid_index], options['vocab'], prm.max_words_query*prm.n_consec) acts = [actions[t] for t in valid_index] #fake acts that won't be used in the prediction acts_p = -np.ones((prm.max_hops_pred+1, len(q_i) * prm.k), dtype=np.float32) root_pages = get_root_pages(acts) best_answer, best_page_idx, R, pages_idx = f_pred(q_i, q_m, root_pages, acts_p, uidx) pages_idx_ = np.swapaxes(pages_idx,0,1) pages_idx_ = pages_idx_.reshape(pages_idx_.shape[0],-1) #get pages visited: for page_idx in pages_idx_: visited_pages.append([]) for idx in page_idx: if idx != -1: visited_pages[-1].append(idx) R_binary = np.ones_like(R) R_binary[R<1.0] = 0.0 n[-1] += len(valid_index) valid_R[-1] += R.sum() valid_acc[-1] += R_binary.sum() # get correct page-actions. acts_p = get_acts(acts, prm.max_hops_pred, prm.k) pages_idx = pages_idx.reshape((pages_idx.shape[0],-1)) # Check how many page actions the model got right. mask_pc = np.logical_or((pages_idx != -1.0), (acts_p != -1.0)).astype('float32') acts_pc += ((pages_idx == acts_p).astype('float32') * mask_pc).sum() acts_pt += mask_pc.sum() #total number of actions # compute accuracy per hop for i in range(prm.max_hops_train+1): n_hops = (acts_p != -1.0).astype('float32').sum(0) n_hops= n_hops.reshape((-1, prm.k))[:,0] # beam search use only the first n_samples actions ih = (n_hops==i) valid_R[i] += R[ih].sum() valid_acc[i] += R_binary[ih].sum() n[i] += ih.astype('float32').sum() with open(prm.outpath, 'a') as fout: fout.write("\n\nQuery: " + queries[valid_index[-1]].replace("\n"," ")) nh = (acts_p[:,-1] != -1.0).astype('int32').sum() if nh == 0: fout.write('\nCorrect Path: ' + options['wiki'].get_article_title(int(root_pages[-1]))) else: path = '' for a in acts_p[:nh, -1]: path += ' -> ' + options['wiki'].get_article_title(int(a)) fout.write('\nCorrect Path: ' + path) fout.write('\nNumber of hops: ' + str(int(nh))) fout.write('\nBest answer: ' + utils.idx2text(best_answer[-1], options['vocabinv'])) fout.write('\nBest page: ' + options['wiki'].get_article_title(best_page_idx[-1])) for i, pageidx in enumerate(pages_idx[:,-1]): fout.write('\niteration: ' +str(i) + " page idx " + str(pageidx) + ' title '+ options['wiki'].get_article_title(pageidx)) uidx -= 1 valid_R = valid_R / n valid_err = 1 - valid_acc / n acts_pc = acts_pc / acts_pt return valid_err, valid_R, acts_pc, visited_pages
def train_lstm(): optimizer=adam # only adam is supported by now. options = locals().copy() with open(prm.outpath, "a") as fout: fout.write("parameters:" + str(options) + str(prm.__dict__)) print "loading dictionary..." vocab = utils.load_vocab(prm.vocab_path, prm.n_words) options['vocab'] = vocab options['vocabinv'] = {} for k,v in vocab.items(): options['vocabinv'][v] = k print 'Loading data...' options['wiki'] = wiki.Wiki(prm.pages_path) options['wikiemb'] = wiki_emb.WikiEmb(prm.pages_emb_path) #load Q&A Wiki dataset qpp = qp.QP(prm.qp_path) q_train, q_valid, q_test = qpp.get_queries() a_train, a_valid, a_test = qpp.get_paths() print 'Building model' # This create the initial parameters as np ndarrays. # Dict name (string) -> np ndarray params, exclude_params = init_params() if prm.wordemb_path: print 'loading pre-trained weights for word embeddings' params = load_wemb(params, vocab) options['W'] = params['W'] if prm.reload_model: load_params(prm.reload_model, params) params_next = OrderedDict() if prm.learning.lower() == 'q_learning' and prm.update_freq > 0: # copy params to params_next for kk, kv in params.items(): params_next[kk] = kv.copy() # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) if prm.update_freq > 0: tparams_next = init_tparams(params_next) else: tparams_next = None if prm.learning.lower() == 'reinforce': R_mean = theano.shared(0.71*np.ones((1,)), name='R_mean') R_std = theano.shared(np.ones((1,)), name='R_std') baseline_vars = {'R_mean': R_mean, 'R_std': R_std} else: baseline_vars = {} iin, out, updates, is_train, sup, max_hops, k_beam, mixer, f_pred, consider_constant \ = build_model(tparams, tparams_next, baseline_vars, options) #get only parameters that are not in the exclude_params list tparams_ = OrderedDict([(kk, vv) for kk, vv in tparams.iteritems() if kk not in exclude_params]) grads = tensor.grad(out[0], wrt=itemlist(tparams_), consider_constant=consider_constant) lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams_, grads, iin, out, updates) print 'Optimization' if prm.train_size == -1: train_size = len(q_train) else: train_size = prm.train_size if prm.valid_size == -1: valid_size = len(q_valid) else: valid_size = prm.valid_size if prm.test_size == -1: test_size = len(q_test) else: test_size = prm.test_size with open(prm.outpath, "a") as fout: fout.write("\n%d train examples" % len(q_train)) with open(prm.outpath, "a") as fout: fout.write("\n%d valid examples" % len(q_valid)) with open(prm.outpath, "a") as fout: fout.write("\n%d test examples" % len(q_test)) history_errs = [] best_p = None if prm.validFreq == -1: validFreq = len(q_train) / prm.batch_size_train else: validFreq = prm.validFreq if prm.saveFreq == -1: saveFreq = len(q_train) / prm.batch_size_train else: saveFreq = prm.saveFreq uidx = 0 # the number of update done estop = False # early stop start_time = time.time() experience = deque(maxlen=prm.replay_mem_size) # experience replay memory as circular buffer. experience_r = deque(maxlen=prm.replay_mem_size) # reward of each entry in the replay memory. try: for eidx in xrange(prm.max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(q_train), prm.batch_size_train, shuffle=True) for _, train_index in kf: st = time.time() uidx += 1 is_train.set_value(1.) max_hops.set_value(prm.max_hops_train) # select training dataset k_beam.set_value(1) # Training does not use beam search # Select the random examples for this minibatch queries = [q_train[t].lower() for t in train_index] actions = [a_train[t] for t in train_index] if prm.learning.lower() == 'supervised': sup.set_value(1.) # select supervised mode else: sup.set_value(0.) # Get correct actions (supervision signal) acts_p = get_acts(actions, prm.max_hops_train, k_beam=1) # MIXER if prm.mixer > 0 and prm.learning.lower() == 'reinforce': mixer.set_value(max(0, prm.max_hops_train - uidx // prm.mixer)) else: if prm.learning.lower() == 'supervised': mixer.set_value(prm.max_hops_train+1) else: mixer.set_value(0) root_pages = get_root_pages(actions) # Get the BoW for the queries. q_i, q_m = utils.text2idx2(queries, vocab, prm.max_words_query*prm.n_consec) n_samples += len(queries) if uidx > 1 and prm.learning.lower() == 'q_learning': # Randomly select experiences and convert them to numpy arrays. idxs = np.random.choice(np.arange(len(experience)), size=len(queries)) rvs = [] for j in range(len(experience[idxs[0]])): rv = [] for idx in idxs: rv.append(experience[idx][j]) rvs.append(np.asarray(rv)) else: rvs = [np.zeros((len(queries),prm.max_words_query*prm.n_consec),dtype=np.float32), # rs_q np.zeros((len(queries),prm.max_words_query*prm.n_consec),dtype=np.float32), # rs_q_m np.zeros((len(queries),prm.max_hops_train+1),dtype=np.int32), # rl_idx np.zeros((len(queries),prm.max_hops_train+1),dtype=np.float32), # rt np.zeros((len(queries),prm.max_hops_train+1),dtype=np.float32) # rr ] cost, R, l_idx, pages_idx, best_page_idx, best_answer, mask, dist \ = f_grad_shared(q_i, q_m, root_pages, acts_p, uidx, *rvs) f_update(prm.lrate) if prm.learning.lower() == 'q_learning': # update weights of the next_q_val network. if (prm.update_freq > 0 and uidx % prm.update_freq == 0) or (uidx == prm.replay_start): for tk, tv in tparams.items(): if tk in tparams_next: tparams_next[tk].set_value(tv.get_value().copy()) # Only update memory after freeze_mem or before replay_start. if (uidx < prm.replay_start or uidx > prm.freeze_mem) and prm.learning.lower() == 'q_learning': # Update Replay Memory. t = np.zeros((len(queries), prm.max_hops_train+1)) rR = np.zeros((len(queries), prm.max_hops_train+1)) for i in range(len(queries)): j = np.minimum(mask[i].sum(), prm.max_hops_train) # If the agent chooses to stop or the episode ends, # the reward will be the reward obtained with the chosen document. rR[i,j] = R[i] t[i,j] = 1. add = True if prm.selective_mem >= 0 and uidx > 1: # Selective memory: keep the percentage of memories # with reward=1 approximately equal to <selective_mem>. pr = float(np.asarray(experience_r).sum()) / max(1., float(len(experience_r))) if (pr < prm.selective_mem) ^ (rR[i,j] == 1.): # xor add = False if add: experience.append([q_i[i], q_m[i], l_idx[i], t[i], rR[i]]) experience_r.append(rR[i]) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1., 1., 1. #if uidx % 100 == 0: # vis_att(pages_idx[:,-1], queries[-1], alpha[:,-1,:], uidx, options) if np.mod(uidx, prm.dispFreq) == 0: with open(prm.outpath, "a") as fout: fout.write("\n\nQuery: " + queries[-1].replace("\n"," ")) fout.write('\nBest Answer: ' + utils.idx2text(best_answer[-1], options['vocabinv'])) fout.write('\nBest page: ' + options['wiki'].get_article_title(best_page_idx[-1])) for i, page_idx in enumerate(pages_idx[:,-1]): fout.write('\niteration: ' +str(i) + " page idx " + str(page_idx) + ' title: ' + options['wiki'].get_article_title(page_idx)) fout.write('\nEpoch '+ str(eidx) + ' Update '+ str(uidx) + ' Cost ' + str(cost) + \ ' Reward Mean ' + str(R.mean()) + ' Reward Max ' + str(R.max()) + \ ' Reward Min ' + str(R.min()) + \ ' Q-Value Max (avg per sample) ' + str(dist.max(2).mean()) + \ ' Q-Value Mean ' + str(dist.mean())) #fout.write("\nCost Supervised: " + str(cost_sup)) #fout.write("\nCost RL: " + str(cost_RL)) fout.write("\nTime per Minibatch Update: " + str(time.time() - st)) if prm.saveto and np.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) np.savez(prm.saveto, history_errs=history_errs, **params) pkl.dump(options, open('%s.pkl' % prm.saveto, 'wb'), -1) print 'Done' if np.mod(uidx, validFreq) == 0 or uidx == 1: if prm.visited_pages_path: shuffle = False else: shuffle = True kf_train = get_minibatches_idx(len(q_train), prm.batch_size_pred, shuffle=shuffle, max_samples=train_size) kf_valid = get_minibatches_idx(len(q_valid), prm.batch_size_pred, shuffle=shuffle, max_samples=valid_size) kf_test = get_minibatches_idx(len(q_test), prm.batch_size_pred, shuffle=shuffle, max_samples=test_size) is_train.set_value(0.) sup.set_value(0.) # supervised mode off mixer.set_value(0) # no supervision max_hops.set_value(prm.max_hops_pred) k_beam.set_value(prm.k) with open(prm.outpath, 'a') as fout: fout.write('\n\nComputing Error Training Set') train_err, train_R, train_accp, visited_pages_train = pred_error(f_pred, q_train, a_train, options, kf_train) with open(prm.outpath, 'a') as fout: fout.write('\n\nComputing Error Validation Set') valid_err, valid_R, valid_accp, visited_pages_valid = pred_error(f_pred, q_valid, a_valid, options, kf_valid) with open(prm.outpath, 'a') as fout: fout.write('\n\nComputing Error Test Set') test_err, test_R, test_accp, visited_pages_test = pred_error(f_pred, q_test, a_test, options, kf_test) if prm.visited_pages_path: pkl.dump([visited_pages_train, visited_pages_valid, visited_pages_test], open(prm.visited_pages_path, 'wb')) history_errs.append([valid_err[-1], test_err[-1]]) if (uidx == 0 or valid_err[-1] <= np.array(history_errs)[:,0].min()): best_p = unzip(tparams) bad_counter = 0 with open(prm.outpath, "a") as fout: fout.write('\n[{per hop}, Avg] Train err ' + str(train_err) + ' Valid err ' + str(valid_err) + ' Test err ' + str(test_err)) fout.write('\n[{per hop}, Avg] Train R ' + str(train_R) + ' Valid R ' + str(valid_R) + ' Test R ' + str(test_R)) fout.write('\nAccuracy Page Actions Train ' + str(train_accp) + ' Valid ' + str(valid_accp) + ' Test ' + str(test_accp)) if (len(history_errs) > prm.patience and valid_err[-1] >= np.array(history_errs)[:-prm.patience, 0].min()): bad_counter += 1 if bad_counter > prm.patience: print 'Early Stop!' estop = True break with open(prm.outpath, "a") as fout: fout.write('\nSeen %d samples' % n_samples) if estop: break except KeyboardInterrupt: print "Training interupted" end_time = time.time() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) is_train.set_value(0.) sup.set_value(0.) # supervised mode off mixer.set_value(0) # no supervision max_hops.set_value(prm.max_hops_pred) k_beam.set_value(prm.k) kf_train_sorted = get_minibatches_idx(len(q_train), prm.batch_size_train) train_err, train_R, train_accp, visited_pages_train = pred_error(f_pred, q_train, a_train, options, kf_train_sorted) valid_err, valid_R, valid_accp, visited_pages_valid = pred_error(f_pred, q_valid, a_valid, options, kf_valid) test_err, test_R, test_accp, visited_pages_test = pred_error(f_pred, q_test, a_test, options, kf_test) with open(prm.outpath, "a") as fout: fout.write('\n[{per hop}, Avg] Train err ' + str(train_err) + ' Valid err ' + str(valid_err) + ' Test err ' + str(test_err)) fout.write('\n[{per hop}, Avg] Train R ' + str(train_R) + ' Valid R ' + str(valid_R) + ' Test R ' + str(test_R)) fout.write('\nAccuracy Page Actions Train ' + str(train_accp) + ' Valid ' + str(valid_accp) + ' Test ' + str(test_accp)) if prm.saveto: np.savez(prm.saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) with open(prm.outpath, "a") as fout: fout.write('\nThe code run for %d epochs, with %f sec/epochs' % ((eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))) with open(prm.outpath, "a") as fout: fout.write('\nTraining took %.1fs' % (end_time - start_time)) return train_err, valid_err, test_err
def train_lstm(): optimizer=adam # only adam is supported by now. options = locals().copy() with open(prm.outpath, "a") as fout: fout.write("parameters:" + str(options) + str(prm.__dict__)) print "loading dictionary..." vocab = utils.load_vocab(prm.vocab_path, prm.n_words) options['vocab'] = vocab options['vocabinv'] = {} for k,v in vocab.items(): options['vocabinv'][v] = k print 'Loading data...' options['wiki'] = wiki.Wiki(prm.pages_path) options['wikiemb'] = wiki_emb.WikiEmb(prm.pages_emb_path) qpp = qp.QP(prm.qp_path) q_train, q_valid, q_test = qpp.get_queries() a_train, a_valid, a_test = qpp.get_paths() print 'Building model' # This create the initial parameters as np ndarrays. # Dict name (string) -> np ndarray params, exclude_params = init_params() if prm.reload_model: load_params(prm.reload_model, params) if prm.wordemb_path: print 'loading pre-trained weights for word embeddings' params = load_wemb(params, vocab) options['W'] = params['W'] # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) mean = theano.shared(np.zeros((prm.dim_proj,)).astype(config.floatX)) # avg of the training set std = theano.shared(np.zeros((prm.dim_proj,)).astype(config.floatX)) # std of the training set t_samples = theano.shared(np.zeros((1,)).astype(config.floatX)) # total number of samples so far stats_vars = {'mean': mean, 'std': std, 't_samples': t_samples} if prm.supervised: baseline_vars = {} else: R_mean = theano.shared(0.71*np.ones((1,)), name='R_mean') R_std = theano.shared(np.ones((1,)), name='R_std') baseline_vars = {'R_mean': R_mean, 'R_std': R_std} is_train, sup, max_hops, k_beam, tq, tq_m, troot_pages, tacts_p, f_pred, cost, \ scan_updates, baseline_updates, stats_updates, consider_constant, \ opt_out = \ build_model(tparams, baseline_vars, stats_vars, options) if prm.decay_c > 0.: decay_c = theano.shared(np_floatX(prm.decay_c), name='decay_c') weight_decay = 0. weight_decay += (tparams['U'] ** 2).sum() weight_decay *= decay_c cost += weight_decay #get only parameters that are not in the exclude_params list tparams_ = OrderedDict([(kk, vv) for kk, vv in tparams.iteritems() if kk not in exclude_params]) grads = tensor.grad(cost, wrt=itemlist(tparams_), consider_constant=consider_constant) lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams_, grads, tq, tq_m, troot_pages, tacts_p, cost, scan_updates, baseline_updates, \ stats_updates, opt_out=[opt_out['R'], opt_out['page_idx'], opt_out['best_answer'], opt_out['best_page_idx']]) print 'Optimization' if prm.train_size == -1: train_size = len(q_train) else: train_size = prm.train_size if prm.valid_size == -1: valid_size = len(q_valid) else: valid_size = prm.valid_size if prm.test_size == -1: test_size = len(q_test) else: test_size = prm.test_size with open(prm.outpath, "a") as fout: fout.write("\n%d train examples" % len(q_train)) with open(prm.outpath, "a") as fout: fout.write("\n%d valid examples" % len(q_valid)) with open(prm.outpath, "a") as fout: fout.write("\n%d test examples" % len(q_test)) history_errs = [] best_p = None if prm.validFreq == -1: validFreq = len(q_train) / prm.batch_size_train else: validFreq = prm.validFreq if prm.saveFreq == -1: saveFreq = len(q_train) / prm.batch_size_train else: saveFreq = prm.saveFreq uidx = 0 # the number of update done estop = False # early stop start_time = time.time() try: for eidx in xrange(prm.max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(q_train), prm.batch_size_train, shuffle=True) for _, train_index in kf: st = time.time() uidx += 1 is_train.set_value(1.) max_hops.set_value(prm.max_hops_train) # select training dataset k_beam.set_value(1) # Training does not use beam search # Select the random examples for this minibatch queries = [q_train[t].lower() for t in train_index] actions = [a_train[t] for t in train_index] if prm.supervised == 1: sup_ = True elif prm.supervised > 1: if uidx % (int(uidx / prm.supervised) + 1) == 0: sup_ = True else: sup_ = False else: sup_ = False if sup_: sup.set_value(1.) # select supervised mode # Get correct actions (supervision signal) acts_p = get_acts(actions, prm.max_hops_train, k_beam=1) else: sup.set_value(0.) # select non-supervised mode acts_p = -np.ones((prm.max_hops_train+1, len(queries)), dtype=np.float32) root_pages = get_root_pages(actions) # Get the BoW for the queries q_bow, q_m = utils.BOW2(queries, vocab, prm.max_words_query*prm.n_consec) n_samples += len(queries) cost, R, pagesidx, best_answer, best_page_idx = f_grad_shared(q_bow, q_m, root_pages, acts_p) f_update(prm.lrate) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1., 1., 1. if np.mod(uidx, prm.dispFreq) == 0: with open(prm.outpath, "a") as fout: fout.write("\n\nQuery: " + queries[-1].replace("\n"," ")) fout.write('\nBest Answer: ' + utils.idx2text(best_answer[-1], options['vocabinv'])) fout.write('\nBest page: ' + options['wiki'].get_article_title(best_page_idx[-1])) for i, pageidx in enumerate(pagesidx[:,-1]): fout.write('\niteration: ' +str(i) + " page idx " + str(pageidx) + ' title: ' + options['wiki'].get_article_title(pageidx)) fout.write('\nEpoch '+ str(eidx) + ' Update '+ str(uidx) + ' Cost ' + str(cost) + \ ' Reward Mean ' + str(R.mean()) + ' Reward Max ' + str(R.max()) + \ ' Reward Min ' + str(R.min())) fout.write("\nTime per Minibatch Update: " + str(time.time() - st)) if prm.saveto and np.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) np.savez(prm.saveto, history_errs=history_errs, **params) pkl.dump(options, open('%s.pkl' % prm.saveto, 'wb'), -1) print 'Done' if np.mod(uidx, validFreq) == 0: kf_train = get_minibatches_idx(len(q_train), prm.batch_size_pred, shuffle=True, max_samples=train_size) kf_valid = get_minibatches_idx(len(q_valid), prm.batch_size_pred, shuffle=True, max_samples=valid_size) kf_test = get_minibatches_idx(len(q_test), prm.batch_size_pred, shuffle=True, max_samples=test_size) is_train.set_value(0.) sup.set_value(0.) # supervised mode off max_hops.set_value(prm.max_hops_pred) k_beam.set_value(prm.k) with open(prm.outpath, 'a') as fout: fout.write('\n\nComputing Error Training Set') train_err, train_R, train_accp = pred_error(f_pred, q_train, a_train, options, kf_train) with open(prm.outpath, 'a') as fout: fout.write('\n\nComputing Error Validation Set') valid_err, valid_R, valid_accp = pred_error(f_pred, q_valid, a_valid, options, kf_valid) with open(prm.outpath, 'a') as fout: fout.write('\n\nComputing Error Test Set') test_err, test_R, test_accp = pred_error(f_pred, q_test, a_test, options, kf_test) history_errs.append([valid_err[-1], test_err[-1]]) if (uidx == 0 or valid_err[-1] <= np.array(history_errs)[:,0].min()): best_p = unzip(tparams) bad_counter = 0 with open(prm.outpath, "a") as fout: fout.write('\n[{per hop}, Avg] Train err ' + str(train_err) + ' Valid err ' + str(valid_err) + ' Test err ' + str(test_err)) fout.write('\n[{per hop}, Avg] Train R ' + str(train_R) + ' Valid R ' + str(valid_R) + ' Test R ' + str(test_R)) fout.write('\nAccuracy Page Actions Train ' + str(train_accp) + ' Valid ' + str(valid_accp) + ' Test ' + str(test_accp)) if (len(history_errs) > prm.patience and valid_err[-1] >= np.array(history_errs)[:-prm.patience, 0].min()): bad_counter += 1 if bad_counter > prm.patience: print 'Early Stop!' estop = True break with open(prm.outpath, "a") as fout: fout.write('\nSeen %d samples' % n_samples) if estop: break except KeyboardInterrupt: print "Training interupted" end_time = time.time() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) is_train.set_value(0.) sup.set_value(0.) # supervised mode off max_hops.set_value(prm.max_hops_pred) k_beam.set_value(prm.k) kf_train_sorted = get_minibatches_idx(len(q_train), prm.batch_size_train) train_err, train_R, train_accp = pred_error(f_pred, q_train, a_train, options, kf_train_sorted) valid_err, valid_R, valid_accp = pred_error(f_pred, q_valid, a_valid, options, kf_valid) test_err, test_R, test_accp = pred_error(f_pred, q_test, a_test, options, kf_test) with open(prm.outpath, "a") as fout: fout.write('\n[{per hop}, Avg] Train err ' + str(train_err) + ' Valid err ' + str(valid_err) + ' Test err ' + str(test_err)) fout.write('\n[{per hop}, Avg] Train R ' + str(train_R) + ' Valid R ' + str(valid_R) + ' Test R ' + str(test_R)) fout.write('\nAccuracy Page Actions Train ' + str(train_accp) + ' Valid ' + str(valid_accp) + ' Test ' + str(test_accp)) if prm.saveto: np.savez(prm.saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) with open(prm.outpath, "a") as fout: fout.write('\nThe code run for %d epochs, with %f sec/epochs' % ((eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))) with open(prm.outpath, "a") as fout: fout.write('\nTraining took %.1fs' % (end_time - start_time)) return train_err, valid_err, test_err