def main(load_path, params, mode='test'): nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] dataset = params['data'] nlayers = params['nlayers'] sub2vec = params['sub2vec'] train_emb = params['train_emb'] sub_dim = params['sub_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] use_subs = sub_dim > 0 dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(dataset, no_training_set=True, use_subs=use_subs) inv_vocab = data.inv_dictionary print("building minibatch loaders ...") if mode == 'test': batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.test, BATCH_SIZE) else: batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.validation, BATCH_SIZE) print("building network ...") W_init, embed_dim = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) S_init, sub_dim = Helpers.load_sub_embeddings(data.dictionary[1], sub2vec) m = model.Model(nlayers, data.vocab_size, data.num_chars, W_init, S_init, nhidden, embed_dim, dropout, train_emb, sub_dim, use_feat, gating_fn, save_attn=True) m.load_model('%s/best_model.p' % load_path) print("testing ...") pr = np.zeros((len(batch_loader_test.questions), batch_loader_test.max_num_cand)).astype('float32') fids, attns = [], [] total_loss, total_acc, n = 0., 0., 0 for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_test: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) loss, acc, probs = outs[:3] attns += [[fnames[0], probs[0, :]] + [o[0, :, :] for o in outs[3:]] ] # store one attention bsize = dw.shape[0] total_loss += bsize * loss total_acc += bsize * acc fids += fnames n += bsize print("step" + str(n) + ",acc" + str(acc)) logger = open(load_path + '/log', 'a', 0) message = '%s Loss %.4e acc=%.4f' % (mode.upper(), total_loss / n, total_acc / n) print message logger.write(message + '\n') logger.close() np.save('%s/%s.probs' % (load_path, mode), np.asarray(pr)) pkl.dump(attns, open('%s/%s.attns' % (load_path, mode), 'w')) f = open('%s/%s.ids' % (load_path, mode), 'w') for item in fids: f.write(item + '\n') f.close()
def main(save_path, params): nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] sub2vec = params['sub2vec'] subdict = params['subdic'] dataset = params['data'] nlayers = params['nlayers'] train_emb = params['train_emb'] sub_dim = params['sub_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] # save settings shutil.copyfile('config.py', '%s/config.py' % save_path) use_subs = sub_dim > 0 dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(dataset, no_training_set=False, use_subs=use_subs, subdict=subdict) print "building minibatch loaders ...", datetime.now().strftime( '%Y-%m-%d %H:%M:%S') batch_loader_train = MiniBatchLoader.MiniBatchLoader(data.training, BATCH_SIZE, sample=1) batch_loader_val = MiniBatchLoader.MiniBatchLoader(data.validation, BATCH_SIZE) print "building network ...", datetime.now().strftime('%Y-%m-%d %H:%M:%S') W_init, embed_dim, = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) S_init, sub_dim = Helpers.load_sub_embeddings(data.dictionary[1], sub2vec) m = model.Model(nlayers, data.vocab_size, data.num_chars, W_init, S_init, nhidden, embed_dim, dropout, train_emb, sub_dim, use_feat, gating_fn) print "training ...", datetime.now().strftime('%Y-%m-%d %H:%M:%S') num_iter = 0 max_acc = 0. deltas = [] logger = open(save_path + '/log', 'a', 0) if os.path.isfile('%s/best_model.p' % save_path): print 'loading previously saved model', datetime.now().strftime( '%Y-%m-%d %H:%M:%S') m.load_model('%s/best_model.p' % save_path) print "model loaded" else: print 'saving init model', datetime.now().strftime('%Y-%m-%d %H:%M:%S') m.save_model('%s/model_init.p' % save_path) print 'loading init model', datetime.now().strftime( '%Y-%m-%d %H:%M:%S') m.load_model('%s/model_init.p' % save_path) for epoch in xrange(NUM_EPOCHS): print "epochs training ...", datetime.now().strftime( '%Y-%m-%d %H:%M:%S') estart = time.time() new_max = False for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_train: loss, tr_acc, probs = m.train(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) message = "Epoch %d TRAIN loss=%.4e acc=%.4f elapsed=%.1f" % ( epoch, loss, tr_acc, time.time() - estart) print message logger.write(message + '\n') num_iter += 1 if num_iter % VALIDATION_FREQ == 0: total_loss, total_acc, n, n_cand = 0., 0., 0, 0. for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_val: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) loss, acc, probs = outs[:3] bsize = dw.shape[0] total_loss += bsize * loss total_acc += bsize * acc n += bsize print('validate on ', str(n) + 'validation data') val_acc = total_acc / n if val_acc > max_acc: max_acc = val_acc m.save_model('%s/best_model.p' % save_path) new_max = True message = "Epoch %d VAL loss=%.4e acc=%.4f max_acc=%.4f" % ( epoch, total_loss / n, val_acc, max_acc) print message logger.write(message + '\n') m.save_model('%s/model_%d.p' % (save_path, epoch)) message = "After Epoch %d: Train acc=%.4f, Val acc=%.4f" % ( epoch, tr_acc, val_acc) print message logger.write(message + '\n') # learning schedule if epoch >= 2: m.anneal() # stopping criterion if not new_max: break logger.close()