def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok) # Creat validation environments val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen']} # Build models and train enc_hidden_size = hidden_size//2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
def test_submission(): ''' Train on combined training and validation sets, and generate test submission. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAINVAL_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok) # Build models and train enc_hidden_size = hidden_size//2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters) # Generate test submission test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok) agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len) agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 20000) agent.test(use_dropout=False, feedback='argmax') agent.write_results()
def eval(args): transsys_lookup = { "Cov": Covington, "NCov": NewCovington, "Cov2": Covington2, "Cov3": Covington3 } transsys = transsys_lookup[args.transsys] vocab, vecs, pretrained = read_vocab(conll_file=args.conll_file, wordvec_file=args.wordvec_file, vocab_file=args.vocab_file, wordvec_dim=args.wordvec_dim, min_count=args.min_count, log=log) mappings, invmappings = read_mappings(args.mappings_file, transsys, log=log) data, sent_length, trans_length = read_data(conll_file=args.conll_file, seq_file=args.seq_file, vocab=vocab, mappings=mappings, transsys=transsys, fpos=args.fpos, log=log) if args.transsys == 'NCov': sent_length = 70 feat_shape = [5] if args.transsys == 'Cov' else [sent_length, 5] transsys = transsys(mappings, invmappings) parser = Parser(args, vecs, pretrained, mappings, invmappings, sent_length, trans_length, -1, log, train=False) trans_predictors = parser.trans_predictors log.info('Computational graph successfully built.') log.info('Setting up tensorflow session...') saver = tf.train.Saver(max_to_keep=10000) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) for epoch in reversed(xrange(int(args.epochs * args.epoch_multiplier))): #with tf.Session(config=config) as sess: savedpath = '%s/model_epoch%d' % (args.model_dir, epoch) if not op.exists(savedpath + '.meta'): continue log.info('Evaluating Epoch %3d...' % (epoch)) saver.restore(sess, savedpath) #print "aki empieza a crear los estados" states = [[(0, ParserState(datum[0], transsys=transsys))] for datum in data] #print "aki termina++++++++++++++++++++++++++++++++++++++++++++++" with smart_open( '%s/%s_pos_eval_beam_%d_output_epoch%d.txt' % (args.model_dir, args.eval_dataset, args.beam_size, epoch), 'w') as outf2: with smart_open( '%s/%s_eval_beam_%d_output_epoch%d.txt' % (args.model_dir, args.eval_dataset, args.beam_size, epoch), 'w') as outf: for batch in xrange( (len(data) + args.batch_size - 1) / args.batch_size): #print "Empieza un nuevo batch" idx = range(batch * args.batch_size, min((batch + 1) * args.batch_size, len(data))) batch_size = len(idx) batch_data = [data[i] for i in idx] batch_states = [states[i] for i in idx] # prepare data in tensor shape batch_sent_lengths = np.array( [len(datum[0]) for datum in batch_data] + [sent_length] * (args.batch_size - batch_size), dtype=np.int32) batch_words = np.zeros((args.batch_size, sent_length), dtype=np.int32) batch_words2 = np.zeros((args.batch_size, sent_length), dtype=np.int32) batch_gold_pos = np.zeros((args.batch_size, sent_length), dtype=np.int32) for i in xrange(batch_size): batch_words[ i, :batch_sent_lengths[i]] = batch_data[i][0] batch_words2[ i, :batch_sent_lengths[i]] = batch_data[i][0] batch_gold_pos[ i, :batch_sent_lengths[i]] = batch_data[i][2] batch_trans_feat_ids = np.zeros( tuple([args.batch_size * args.beam_size] + feat_shape), dtype=np.int32) batch_trans_feat_sizes = np.zeros( (args.batch_size * args.beam_size), dtype=np.int32) preds_list = [ parser.combined_head, parser.combined_dep, parser.pos_preds ] if args.transsys == 'NCov' or args.transsys == 'Cov2' or args.transsys == 'Cov3': preds_list += [parser.transition_logit] if args.fpos: preds_list += [parser.fpos_preds] preds = sess.run(preds_list, feed_dict={ parser.words: batch_words, parser.words2: batch_words2, parser.sent_lengths: batch_sent_lengths, parser.gold_pos: batch_gold_pos, }) # unpack predictions batch_combined_head, batch_combined_dep, pos_preds = preds[: 3] preds = preds[3:] if args.transsys == 'NCov' or args.transsys == 'Cov2' or args.transsys == 'Cov3': batch_trans_logit = preds[0] preds = preds[1:] if args.fpos: fpos_preds = preds[0] preds = preds[1:] if args.fpos: for i in xrange(batch_size): for j in xrange(batch_sent_lengths[i] - 1): outf2.write( "%s\t%s\n" % (invmappings['pos'][pos_preds[i][j]], invmappings['fpos'][fpos_preds[i][j]])) outf2.write("\n") else: for i in xrange(batch_size): for j in xrange(batch_sent_lengths[i] - 1): outf2.write( "%s\t_\n" % invmappings['pos'][pos_preds[i][j]]) outf2.write("\n") j = 0 updated = range(batch_size) batch_finished = [[] for _ in range(batch_size)] feat_lengths = [[] for _ in range(batch_size)] #print 'dale_____________2222222______________________________' while True: batch_feats = [[ featurize_state(batch_states[i][k][1], mappings) for k in range(len(batch_states[i])) ] for i in updated] #print 'bach feats' #print batch_feats for i, beam_feats in zip(updated, batch_feats): #print '=====' #print beam_feats feats = beam_feats[0] if len(feats) > 0: if args.transsys == 'NCov' or args.transsys == 'Cov2' or args.transsys == 'Cov3': feat_lengths[i] += [len(feats)] else: feat_lengths[i] += [ len(batch_states[i][0] [1].transitionset()) ] ##print batch_states[i][0][1].transitionset() preds = [] predsid = [] for i, beam_feats in zip(updated, batch_feats): for k, feats in enumerate(beam_feats): if len(feats) <= 0: if len(batch_finished[i]) < args.beam_size: heappush(batch_finished[i], batch_states[i][k]) else: heappushpop(batch_finished[i], batch_states[i][k]) continue beamidx = i * args.beam_size + k if args.transsys == 'NCov' or args.transsys == 'Cov2' or args.transsys == 'Cov3': #print('sent_length',sent_length) #print('btfi size', len(batch_trans_feat_ids)) #print feats #print [args.batch_size * args.beam_size] + feat_shape batch_trans_feat_ids[ beamidx, :len(feats)] = feats else: batch_trans_feat_ids[beamidx] = feats batch_trans_feat_sizes[beamidx] = len(feats) assert (batch_trans_feat_sizes[beamidx] > 0) predsid.append((i, k)) preds.append(trans_predictors[i][k]) if len(predsid) <= 0: break if args.transsys == 'NCov' or args.transsys == 'Cov2' or args.transsys == 'Cov3': p = sess.run(preds, feed_dict={ parser.combined_head_placeholder: batch_combined_head, parser.combined_dep_placeholder: batch_combined_dep, parser.trans_logit_placeholder: batch_trans_logit, parser.trans_feat_ids: batch_trans_feat_ids, parser.trans_feat_sizes: batch_trans_feat_sizes }) else: p = sess.run(preds, feed_dict={ parser.combined_head_placeholder: batch_combined_head, parser.combined_dep_placeholder: batch_combined_dep, parser.trans_feat_ids: batch_trans_feat_ids, parser.trans_feat_sizes: batch_trans_feat_sizes }) next_batchstates = [[] for _ in xrange(batch_size)] updated = set() for ik, pred in izip(predsid, p): i, k = ik updated.add(i) #print("deberia ser 0 al final", len(batch_states[i][k][1].transitionset())) if len(batch_states[i][k][1].transitionset()) > 0: # model outputs NLLs so the lower the better sort = sorted(enumerate(pred), key=lambda x: x[1]) expanded_beams = 0 for choice, score in sort: newscore = batch_states[i][k][0] - score #print 'transition set' #print sort #print( 'choice', choice) #print transsys.tuple_trans_from_int(batch_states[i][k][1].transitionset(), choice)[0] #print 'allowed antes de entrar' #print batch_states[i][k][1].transitionset() if transsys.tuple_trans_from_int( batch_states[i][k] [1].transitionset(), choice)[0] in batch_states[i][k][ 1].transitionset(): candidate = (newscore, batch_states[i][k][1], choice) if len(next_batchstates[i] ) < args.beam_size: heappush(next_batchstates[i], candidate) elif newscore > next_batchstates[i][0][ 0]: heappushpop( next_batchstates[i], candidate) #print 'candidadte' #print candidate expanded_beams += 1 if expanded_beams >= args.beam_size: break #print 'dale____________ini pred______________________________' for i in updated: next_batchstates[i] = nlargest(args.beam_size, next_batchstates[i], key=lambda x: x[0]) for k, t in enumerate(next_batchstates[i]): #print '------------config executing------------------' score, state, choice = t state = state.clone() transsys.advance(state, choice) next_batchstates[i][k] = (score, state) #print 'dale____________fin pred______________________________' batch_states = next_batchstates j += 1 #print 'dale_______escribe____________________________________' for i in xrange(batch_size): assert len(batch_finished) == batch_size assert len( batch_finished[i]) > 0, "nothing finished: %d" % ( i) assert len(batch_finished[i][0]) > 1, "%s" % ( batch_finished[i][0]) state_pred = nlargest(1, batch_finished[i], key=lambda x: x[0])[0][1] for t in state_pred.head[1:]: outf.write("%d\t%s\n" % (t[0], invmappings['rel'][t[1]])) outf.write("\n") log.info('Epoch %3d batch %4d' % (epoch, batch)) log.info('Use exclusively the model of last epoch' ) #Added to just use the last model break sess.close()
def train(language, embed_size, mode='proj', model_save_path=utils.DEFAULT_MODEL_PATH, data_path=utils.DEFAULT_DATA_PATH, batch_size=DEFAULT_BATCH_SIZE, weighted=False, normalize=False, use_bias=True): # type: (str, int, str, str, str, int, bool, bool, bool) -> None assert os.path.isdir(data_path), 'Data path %s doesn\'t exist' assert language == 'global' or language in utils.ECOSYSTEMS, \ 'Unknown programming language' assert mode in _MODE_FNAMES, ( "Invalid mode '%s'. Should be one of %s" % ( mode, ','.join(_MODE_FNAMES))) model_save_fname = os.path.join(model_save_path, '%s_%s_%d%s%s%s.model' % ( language, mode, embed_size, '_norm' if normalize else '', '_no_bias' if not use_bias else '', '_weighted' if weighted else '', )) sys.stderr.write('The model will be saved to: %s\n' % model_save_fname) if not os.path.isdir(model_save_path): os.mkdir(model_save_path) vocab_path = os.path.join(data_path, language + '_vocab.csv') if language == 'global' and not os.path.isfile(vocab_path): # TODO: implement raise NotImplementedError # build_global_data(language) sys.stderr.write('Reading vocabulary..\n') idx2namespace, namespace2idx = utils.read_vocab(vocab_path) vocab_size = len(idx2namespace) sys.stderr.write('Reading dataset..\n') csv.field_size_limit(2147483647) # DANGER ZONE, but won't read otherwise imports_prefix = '%s_%s_imports_' % (language, mode) input_data_train, input_offsets_train = utils.read_dev( os.path.join(data_path, imports_prefix + 'train.csv'), namespace2idx) input_data_val, input_offsets_val = utils.read_dev( os.path.join(data_path, imports_prefix + 'val.csv'), namespace2idx) dataset_train = dev2vecSequence( input_data_train, input_offsets_train, vocab_size=vocab_size, batch_size=batch_size) dataset_val = dev2vecSequence( input_data_val, input_offsets_val, vocab_size=vocab_size, batch_size=batch_size) model = get_nn_model( vocab_size, embed_size, normalize=False, batch_size=DEFAULT_BATCH_SIZE) loss_fn = 'binary_crossentropy' if weighted: counts_fname = os.path.join( data_path, '%s_namespace_counts_by_%s.csv' % ( language, 'projects' if mode == 'proj' else mode)) counts = df = pd.read_csv( counts_fname, header=None, index_col=0, squeeze=True) counts = counts[ [idx2namespace[idx] for idx in range(len(idx2namespace))]] label_weights = counts / counts.median() loss_fn = get_weighted_loss_fn(label_weights) model.compile(optimizer='adam', loss=loss_fn) # in most cases, model starts to overfit after less than one epoch callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4) sys.stderr.write('Training..\n') model.fit(dataset_train, epochs=2, callbacks=[callback], validation_data=dataset_val) sys.stderr.write('Saving the model..\n') model.save(model_save_fname)
def eval(args): transsys_lookup = {"ASw": ArcSwift, "AER" : ArcEagerReduce, "AES": ArcEagerShift, "ASd" : ArcStandard, "AH" : ArcHybrid,} transsys = transsys_lookup[args.transsys] vocab, vecs, pretrained = read_vocab(conll_file=args.conll_file, wordvec_file=args.wordvec_file, vocab_file=args.vocab_file, wordvec_dim=args.wordvec_dim, min_count=args.min_count, log=log) mappings, invmappings = read_mappings(args.mappings_file, transsys, log=log) data, sent_length, trans_length = read_data(conll_file=args.conll_file, seq_file=args.seq_file, vocab=vocab, mappings=mappings, transsys=transsys, fpos=args.fpos, log=log) feat_shape = [5] if args.transsys != 'ASw' else [sent_length, 5] transsys = transsys(mappings, invmappings) parser = Parser(args, vecs, pretrained, mappings, invmappings, sent_length, trans_length, -1, log, train=False) trans_predictors = parser.trans_predictors log.info('Computational graph successfully built.') log.info('Setting up tensorflow session...') saver = tf.train.Saver(max_to_keep=10000) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) for epoch in reversed(xrange(int(args.epochs * args.epoch_multiplier))): #with tf.Session(config=config) as sess: savedpath = '%s/model_epoch%d' % (args.model_dir, epoch) if not op.exists(savedpath + '.meta'): continue log.info('Evaluating Epoch %3d...' % (epoch)) saver.restore(sess, savedpath) states = [[(0, ParserState(datum[0], transsys=transsys))] for datum in data] with smart_open('%s/%s_pos_eval_beam_%d_output_epoch%d.txt' % (args.model_dir, args.eval_dataset, args.beam_size, epoch), 'w') as outf2: with smart_open('%s/%s_eval_beam_%d_output_epoch%d.txt' % (args.model_dir, args.eval_dataset, args.beam_size, epoch), 'w') as outf: for batch in xrange((len(data)+args.batch_size-1) / args.batch_size): idx = range(batch * args.batch_size, min((batch+1) * args.batch_size, len(data))) batch_size = len(idx) batch_data = [data[i] for i in idx] batch_states = [states[i] for i in idx] # prepare data in tensor shape batch_sent_lengths = np.array([len(datum[0]) for datum in batch_data] + [sent_length] * (args.batch_size - batch_size), dtype=np.int32) batch_words = np.zeros((args.batch_size, sent_length), dtype=np.int32) batch_words2 = np.zeros((args.batch_size, sent_length), dtype=np.int32) batch_gold_pos = np.zeros((args.batch_size, sent_length), dtype=np.int32) for i in xrange(batch_size): batch_words[i, :batch_sent_lengths[i]] = batch_data[i][0] batch_words2[i, :batch_sent_lengths[i]] = batch_data[i][0] batch_gold_pos[i, :batch_sent_lengths[i]] = batch_data[i][2] batch_trans_feat_ids = np.zeros(tuple([args.batch_size * args.beam_size] + feat_shape), dtype=np.int32) batch_trans_feat_sizes = np.zeros((args.batch_size * args.beam_size), dtype=np.int32) preds_list = [parser.combined_head, parser.combined_dep, parser.pos_preds] if args.transsys == 'ASw': preds_list += [parser.transition_logit] if args.fpos: preds_list += [parser.fpos_preds] preds = sess.run(preds_list, feed_dict={parser.words: batch_words, parser.words2: batch_words2, parser.sent_lengths: batch_sent_lengths, parser.gold_pos: batch_gold_pos,}) # unpack predictions batch_combined_head, batch_combined_dep, pos_preds = preds[:3] preds = preds[3:] if args.transsys == 'ASw': batch_trans_logit = preds[0] preds = preds[1:] if args.fpos: fpos_preds = preds[0] preds = preds[1:] if args.fpos: for i in xrange(batch_size): for j in xrange(batch_sent_lengths[i]-1): outf2.write("%s\t%s\n" % (invmappings['pos'][pos_preds[i][j]], invmappings['fpos'][fpos_preds[i][j]])) outf2.write("\n") else: for i in xrange(batch_size): for j in xrange(batch_sent_lengths[i]-1): outf2.write("%s\t_\n" % invmappings['pos'][pos_preds[i][j]]) outf2.write("\n") j = 0 updated = range(batch_size) batch_finished = [[] for _ in range(batch_size)] feat_lengths = [[] for _ in range(batch_size)] while True: batch_feats = [[featurize_state(batch_states[i][k][1], mappings) for k in range(len(batch_states[i]))] for i in updated] for i, beam_feats in zip(updated, batch_feats): feats = beam_feats[0] if len(feats) > 0: if args.transsys == 'ASw': feat_lengths[i] += [len(feats)] else: feat_lengths[i] += [len(batch_states[i][0][1].transitionset())] preds = [] predsid = [] for i, beam_feats in zip(updated, batch_feats): for k, feats in enumerate(beam_feats): if len(feats) <= 0: if len(batch_finished[i]) < args.beam_size: heappush(batch_finished[i], batch_states[i][k]) else: heappushpop(batch_finished[i], batch_states[i][k]) continue beamidx = i * args.beam_size + k if args.transsys == 'ASw': batch_trans_feat_ids[beamidx, :len(feats)] = feats else: batch_trans_feat_ids[beamidx] = feats batch_trans_feat_sizes[beamidx] = len(feats) assert(batch_trans_feat_sizes[beamidx] > 0) predsid.append((i, k)) preds.append(trans_predictors[i][k]) if len(predsid) <= 0: break if args.transsys == 'ASw': p = sess.run(preds, feed_dict={parser.combined_head_placeholder: batch_combined_head, parser.combined_dep_placeholder: batch_combined_dep, parser.trans_logit_placeholder:batch_trans_logit, parser.trans_feat_ids: batch_trans_feat_ids, parser.trans_feat_sizes: batch_trans_feat_sizes}) else: p = sess.run(preds, feed_dict={parser.combined_head_placeholder: batch_combined_head, parser.combined_dep_placeholder: batch_combined_dep, parser.trans_feat_ids: batch_trans_feat_ids, parser.trans_feat_sizes: batch_trans_feat_sizes}) next_batchstates = [[] for _ in xrange(batch_size)] updated = set() for ik, pred in izip(predsid, p): i, k = ik updated.add(i) if len(batch_states[i][k][1].transitionset()) > 0: # model outputs NLLs so the lower the better sort = sorted(enumerate(pred), key=lambda x: x[1]) expanded_beams = 0 for choice, score in sort: newscore = batch_states[i][k][0] - score if transsys.tuple_trans_from_int(batch_states[i][k][1].transitionset(), choice)[0] in batch_states[i][k][1].transitionset(): candidate = (newscore, batch_states[i][k][1], choice) if len(next_batchstates[i]) < args.beam_size: heappush(next_batchstates[i], candidate) elif newscore > next_batchstates[i][0][0]: heappushpop(next_batchstates[i], candidate) expanded_beams += 1 if expanded_beams >= args.beam_size: break for i in updated: next_batchstates[i] = nlargest(args.beam_size, next_batchstates[i], key=lambda x:x[0]) for k, t in enumerate(next_batchstates[i]): score, state, choice = t state = state.clone() transsys.advance(state, choice) next_batchstates[i][k] = (score, state) batch_states = next_batchstates j += 1 for i in xrange(batch_size): assert len(batch_finished) == batch_size assert len(batch_finished[i]) > 0, "nothing finished: %d" % (i) assert len(batch_finished[i][0]) > 1, "%s" % (batch_finished[i][0]) state_pred = nlargest(1, batch_finished[i], key=lambda x:x[0])[0][1] for t in state_pred.head[1:]: outf.write("%d\t%s\n" % (t[0], invmappings['rel'][t[1]])) outf.write("\n") log.info('Epoch %3d batch %4d' % (epoch, batch)) sess.close()
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # Set which GPU to use device = torch.device('cuda', hparams.device_id) # Load hyperparameters from checkpoint (if exists) if os.path.exists(hparams.load_path): print('Load model from %s' % hparams.load_path) ckpt = load(hparams.load_path, device) start_iter = ckpt['iter'] else: if not hparams.forward_agent and not hparams.random_agent and not hparams.shortest_agent: if hasattr(hparams, 'load_path') and hasattr( hparams, 'eval_only') and hparams.eval_only: sys.exit('load_path %s does not exist!' % hparams.load_path) ckpt = None start_iter = 0 end_iter = hparams.n_iters if not hasattr(hparams, 'ask_baseline'): hparams.ask_baseline = None if not hasattr(hparams, 'instruction_baseline'): hparams.instruction_baseline = None # Set random seeds torch.manual_seed(hparams.seed) torch.cuda.manual_seed(hparams.seed) np.random.seed(hparams.seed) random.seed(hparams.seed) # Create or load vocab train_vocab_path = os.path.join(hparams.data_path, 'vocab.txt') if not os.path.exists(train_vocab_path): raise Exception('Vocab file not found at %s' % train_vocab_path) vocab = read_vocab([train_vocab_path]) hparams.instr_padding_idx = vocab.index('<PAD>') tokenizer = Tokenizer(vocab=vocab, encoding_length=hparams.max_instr_len) featurizer = ImageFeatures(hparams.img_features, device) simulator = Simulator(hparams) # Create train environment train_env = Batch(hparams, simulator, featurizer, tokenizer, split='train') # Create validation environments val_splits = ['val_seen', 'val_unseen'] eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only if eval_mode: if 'val_seen' in hparams.load_path: val_splits = ['test_seen'] elif 'val_unseen' in hparams.load_path: val_splits = ['test_unseen'] else: val_splits = ['test_seen', 'test_unseen'] end_iter = start_iter + 1 if hparams.eval_on_val: val_splits = [x.replace('test_', 'val_') for x in val_splits] val_envs_tmp = { split: (Batch(hparams, simulator, featurizer, tokenizer, split=split), Evaluation(hparams, [split], hparams.data_path)) for split in val_splits } val_envs = {} for key, value in val_envs_tmp.items(): if '_seen' in key: val_envs[key + '_env_seen_anna'] = value val_envs[key + '_env_unseen_anna'] = value else: assert '_unseen' in key val_envs[key] = value # Build model and optimizer model = AgentModel(len(vocab), hparams, device).to(device) optimizer = optim.Adam(model.parameters(), lr=hparams.lr, weight_decay=hparams.weight_decay) best_metrics = {env_name: -1 for env_name in val_envs.keys()} best_metrics['combined'] = -1 # Load model paramters from checkpoint (if exists) if ckpt is not None: model.load_state_dict(ckpt['model_state_dict']) optimizer.load_state_dict(ckpt['optim_state_dict']) best_metrics = ckpt['best_metrics'] train_env.ix = ckpt['data_idx'] if hparams.log_every == -1: hparams.log_every = round(len(train_env.data) / \ (hparams.batch_size * 100)) * 100 print('') pprint(vars(hparams), width=1) print('') print(model) print('Number of parameters:', sum(p.numel() for p in model.parameters() if p.requires_grad)) if hparams.random_agent or hparams.forward_agent or hparams.shortest_agent: assert eval_mode agent = SimpleAgent(hparams) else: agent = VerbalAskAgent(model, hparams, device) return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter, best_metrics, eval_mode)
# !/usr/bin/env python # -*- coding:utf-8 -*- import tensorflow as tf from model import NerModel from utils import tokenize,read_vocab,format_result import tensorflow_addons as tf_ad from args_help import args import json vocab2id, id2vocab = read_vocab(args.vocab_file) tag2id, id2tag = read_vocab(args.tag_file) # lables {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13} text_sequences ,label_sequences= tokenize(args.test_path,vocab2id,tag2id) optimizer = tf.keras.optimizers.Adam(args.lr) model = NerModel(hidden_num = args.hidden_num, vocab_size =len(vocab2id), label_size = len(tag2id), embedding_size = args.embedding_size) # restore model ckpt = tf.train.Checkpoint(optimizer=optimizer,model=model) ckpt.restore(tf.train.latest_checkpoint(args.output_dir)) while True: text = input("input:") dataset = tf.keras.preprocessing.sequence.pad_sequences([[vocab2id.get(char,0) for char in text]], padding='post') print('dataset',dataset) logits, text_lens = model.predict(dataset) print('logits.hape',logits.shape)
def main(args): model_prefix = '{}_{}'.format(args.model_type, args.train_id) log_path = args.LOG_DIR + model_prefix + '/' checkpoint_path = args.CHK_DIR + model_prefix + '/' result_path = args.RESULT_DIR + model_prefix + '/' cp_file = checkpoint_path + "best_model.pth.tar" init_epoch = 0 if not os.path.exists(log_path): os.makedirs(log_path) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) ## set up the logger set_logger(os.path.join(log_path, 'train.log')) ## save argparse parameters with open(log_path+'args.yaml', 'w') as f: for k, v in args.__dict__.items(): f.write('{}: {}\n'.format(k, v)) logging.info('Training model: {}'.format(model_prefix)) ## set up vocab txt # create txt here setup(args, clear=True) print(args.__dict__) # indicate src and tgt language src, tgt = 'en', 'zh' maps = {'en':args.TRAIN_VOCAB_EN, 'zh':args.TRAIN_VOCAB_ZH} vocab_src = read_vocab(maps[src]) tok_src = Tokenizer(language=src, vocab=vocab_src, encoding_length=args.MAX_INPUT_LENGTH, zh_tok='jieba') vocab_tgt = read_vocab(maps[tgt]) tok_tgt = Tokenizer(language=tgt, vocab=vocab_tgt, encoding_length=args.MAX_INPUT_LENGTH, zh_tok='jieba') logging.info('Vocab size src/tgt:{}/{}'.format( len(vocab_src), len(vocab_tgt)) ) ## Setup the training, validation, and testing dataloaders train_loader, val_loader, test_loader = create_split_loaders(args.DATA_DIR, (tok_src, tok_tgt), args.batch_size, args.MAX_VID_LENGTH, (src, tgt), num_workers=4, pin_memory=True) logging.info('train/val/test size: {}/{}/{}'.format( len(train_loader), len(val_loader), len(test_loader) )) ## init model if args.model_type == 's2s': encoder = Encoder(vocab_size=len(vocab_src), embed_size=args.wordembed_dim, hidden_size=args.enc_hid_size).cuda() decoder = Decoder(embed_size=args.wordembed_dim, hidden_size=args.dec_hid_size, vocab_size=len(vocab_tgt)).cuda() encoder.train() decoder.train() ## define loss criterion = nn.CrossEntropyLoss(ignore_index=padding_idx).cuda() ## init optimizer dec_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=args.decoder_lr, weight_decay=args.weight_decay) enc_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=args.encoder_lr, weight_decay=args.weight_decay) count_paras(encoder, decoder, logging) ## track loss during training total_train_loss, total_val_loss = [], [] best_val_bleu, best_epoch = 0, 0 ## init time zero_time = time.time() # Begin training procedure earlystop_flag = False rising_count = 0 for epoch in range(init_epoch, args.epochs): ## train for one epoch start_time = time.time() train_loss = train(train_loader, encoder, decoder, criterion, enc_optimizer, dec_optimizer, epoch) val_loss, sentbleu, corpbleu = validate(val_loader, encoder, decoder, criterion, tok_tgt) end_time = time.time() epoch_time = end_time - start_time total_time = end_time - zero_time logging.info('Total time used: %s Epoch %d time uesd: %s train loss: %.4f val loss: %.4f sentbleu: %.4f corpbleu: %.4f' % ( str(datetime.timedelta(seconds=int(total_time))), epoch, str(datetime.timedelta(seconds=int(epoch_time))), train_loss, val_loss, sentbleu, corpbleu)) if corpbleu > best_val_bleu: best_val_bleu = corpbleu save_checkpoint({ 'epoch': epoch, 'enc_state_dict': encoder.state_dict(), 'dec_state_dict': decoder.state_dict(), 'enc_optimizer': enc_optimizer.state_dict(), 'dec_optimizer': dec_optimizer.state_dict(), }, cp_file) best_epoch = epoch logging.info("Finished {0} epochs of training".format(epoch+1)) total_train_loss.append(train_loss) total_val_loss.append(val_loss) logging.info('Best corpus bleu score {:.4f} at epoch {}'.format(best_val_bleu, best_epoch)) ### the best model is the last model saved in our implementation logging.info ('************ Start eval... ************') # Evaluate on validation dataset eval(test_loader, encoder, decoder, cp_file, tok_tgt, result_path)
def train(opt): device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu") if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } # max_word_length, max_sent_length = get_max_lengths(opt.train_set) max_word_length, max_sent_length = 13, 24 vocab = read_vocab('data/yelp_review_full_csv/train.csv.txt') emb, word_to_ix = get_pretrained_word_embedding(opt.word2vec_path, vocab) df = pd.read_csv(opt.train_set, names=['label', 'text']) texts = np.array(df['text']) labels = np.array(df['label']) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0) for train_index, test_index in sss.split(texts, labels): X_train, X_valid = texts[train_index], texts[test_index] y_train, y_valid = labels[train_index], labels[test_index] training_set = Custom_Dataset(X_train, y_train, word_to_ix, max_sent_length, max_word_length) valid_set = Custom_Dataset(X_valid, y_valid, word_to_ix, max_sent_length, max_word_length) training_generator = DataLoader(training_set, num_workers=32, **training_params) valid_generator = DataLoader(valid_set, num_workers=32, **training_params) df_test = pd.read_csv(opt.test_set, names=['label', 'text']) test_texts = np.array(df_test['text']) test_labels = np.array(df_test['label']) test_set = Custom_Dataset(test_texts, test_labels, word_to_ix, max_sent_length, max_word_length) test_generator = DataLoader(test_set, num_workers=32, **test_params) model = nn.DataParallel( HierarchicalAttention(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes, emb, max_sent_length, max_word_length)) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if torch.cuda.is_available(): model.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True, min_lr=1e-8) best_acc = 0. best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): print("Epoch " + str(epoch)) for feature, label, doc_len, sent_len in training_generator: if torch.cuda.is_available(): sent_len = torch.stack(sent_len, dim=1).to(device) doc_len = doc_len.to(device) feature = feature.to(device) label = label.to(device) optimizer.zero_grad() predictions = model(feature, sent_len, doc_len) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label, te_doc_len, te_sent_len in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_sent_len = torch.stack(te_sent_len, dim=1).to(device) te_doc_len = te_doc_len.to(device) te_feature = te_feature.to(device) te_label = te_label.to(device) with torch.no_grad(): te_predictions = model(te_feature, te_sent_len, te_doc_len) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) vl_loss_ls = [] vl_label_ls = [] vl_pred_ls = [] for vl_feature, vl_label, vl_doc_len, vl_sent_len in valid_generator: num_sample = len(vl_label) if torch.cuda.is_available(): vl_sent_len = torch.stack(vl_sent_len, dim=1).to(device) vl_doc_len = vl_doc_len.to(device) vl_feature = vl_feature.to(device) vl_label = vl_label.to(device) with torch.no_grad(): vl_predictions = model(vl_feature, vl_sent_len, vl_doc_len) vl_loss = criterion(vl_predictions, vl_label) vl_loss_ls.append(vl_loss * num_sample) vl_label_ls.extend(vl_label.clone().cpu()) vl_pred_ls.append(vl_predictions.clone().cpu()) vl_loss = sum(vl_loss_ls) / valid_set.__len__() vl_pred = torch.cat(vl_pred_ls, 0) vl_label = np.array(vl_label_ls) vl_metrics = get_evaluation( vl_label, vl_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nValid loss: {} Valid accuracy: {} \nValid confusion matrix: \n{}\nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, vl_loss, vl_metrics["accuracy"], vl_metrics["confusion_matrix"], te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print( "Epoch: {}/{}, Lr: {},Valid Loss: {}, Valid Accuracy: {}, Test Loss: {}, Test Accuracy: {}" .format(epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], vl_loss, vl_metrics["accuracy"], te_loss, test_metrics["accuracy"])) scheduler.step(vl_metrics["accuracy"]) model.train() if vl_metrics["accuracy"] > best_acc: best_acc = vl_metrics["accuracy"] best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def read_words(filename, vocab_filename): vocab = read_vocab(vocab_filename) return [ word for line in read_file(filename) for word in line.split() if word in vocab ]
def main(args): #################### # Arguments gpu = args.gpu model_name = args.model initial_tree_sampling = args.initial_tree_sampling path_config = args.config data_augmentation = args.data_augmentation trial_name = args.name actiontype = args.actiontype max_epoch = args.max_epoch dev_size = args.dev_size # Check assert actiontype in ["train", "evaluate"] if actiontype == "train": assert max_epoch > 0 assert len(initial_tree_sampling.split("_")) == 3 for type_ in initial_tree_sampling.split("_"): assert type_ in ["X", "BU", "TD", "RB", "LB", "RB2"] assert initial_tree_sampling.split("_")[2] != "X" assert initial_tree_sampling.split("_")[1] != "RB2" assert initial_tree_sampling.split("_")[2] != "RB2" if trial_name is None or trial_name == "None": trial_name = utils.get_current_time() #################### # Path setting config = utils.Config(path_config) basename = "%s.%s.%s.aug_%s.%s" \ % (model_name, initial_tree_sampling, utils.get_basename_without_ext(path_config), data_augmentation, trial_name) if actiontype == "train": path_log = os.path.join(config.getpath("results"), basename + ".training.log") elif actiontype == "evaluate": path_log = os.path.join(config.getpath("results"), basename + ".evaluation.log") path_train = os.path.join(config.getpath("results"), basename + ".training.jsonl") path_valid = os.path.join(config.getpath("results"), basename + ".validation.jsonl") path_snapshot = os.path.join(config.getpath("results"), basename + ".model") path_pred = os.path.join(config.getpath("results"), basename + ".evaluation.ctrees") path_eval = os.path.join(config.getpath("results"), basename + ".evaluation.json") utils.set_logger(path_log) #################### # Random seed random_seed = trial_name random_seed = utils.hash_string(random_seed) random.seed(random_seed) np.random.seed(random_seed) cuda.cupy.random.seed(random_seed) #################### # Log so far utils.writelog("gpu=%d" % gpu) utils.writelog("model_name=%s" % model_name) utils.writelog("initial_tree_sampling=%s" % initial_tree_sampling) utils.writelog("path_config=%s" % path_config) utils.writelog("data_augmentation=%s" % data_augmentation) utils.writelog("trial_name=%s" % trial_name) utils.writelog("actiontype=%s" % actiontype) utils.writelog("max_epoch=%s" % max_epoch) utils.writelog("dev_size=%s" % dev_size) utils.writelog("path_log=%s" % path_log) utils.writelog("path_train=%s" % path_train) utils.writelog("path_valid=%s" % path_valid) utils.writelog("path_snapshot=%s" % path_snapshot) utils.writelog("path_pred=%s" % path_pred) utils.writelog("path_eval=%s" % path_eval) utils.writelog("random_seed=%d" % random_seed) #################### # Data preparation begin_time = time.time() train_databatch = dataloader.read_rstdt("train", relation_level="coarse-grained", with_root=False) test_databatch = dataloader.read_rstdt("test", relation_level="coarse-grained", with_root=False) vocab_word = utils.read_vocab( os.path.join(config.getpath("data"), "rstdt-vocab", "words.vocab.txt")) vocab_postag = utils.read_vocab( os.path.join(config.getpath("data"), "rstdt-vocab", "postags.vocab.txt")) vocab_deprel = utils.read_vocab( os.path.join(config.getpath("data"), "rstdt-vocab", "deprels.vocab.txt")) if data_augmentation: external_train_databatch = dataloader.read_ptbwsj_wo_rstdt( with_root=False) # Remove documents with only one leaf node filtering_function = lambda d, i: len(d.batch_edu_ids[i]) == 1 external_train_databatch = utils.filter_databatch( external_train_databatch, filtering_function) end_time = time.time() utils.writelog("Loaded the corpus. %f [sec.]" % (end_time - begin_time)) #################### # Hyper parameters word_dim = config.getint("word_dim") postag_dim = config.getint("postag_dim") deprel_dim = config.getint("deprel_dim") lstm_dim = config.getint("lstm_dim") mlp_dim = config.getint("mlp_dim") n_init_epochs = config.getint("n_init_epochs") negative_size = config.getint("negative_size") batch_size = config.getint("batch_size") weight_decay = config.getfloat("weight_decay") gradient_clipping = config.getfloat("gradient_clipping") optimizer_name = config.getstr("optimizer_name") utils.writelog("word_dim=%d" % word_dim) utils.writelog("postag_dim=%d" % postag_dim) utils.writelog("deprel_dim=%d" % deprel_dim) utils.writelog("lstm_dim=%d" % lstm_dim) utils.writelog("mlp_dim=%d" % mlp_dim) utils.writelog("n_init_epochs=%d" % n_init_epochs) utils.writelog("negative_size=%d" % negative_size) utils.writelog("batch_size=%d" % batch_size) utils.writelog("weight_decay=%f" % weight_decay) utils.writelog("gradient_clipping=%f" % gradient_clipping) utils.writelog("optimizer_name=%s" % optimizer_name) #################### # Model preparation cuda.get_device(gpu).use() # Initialize a model utils.mkdir(os.path.join(config.getpath("data"), "caches")) path_embed = config.getpath("pretrained_word_embeddings") path_caches = os.path.join( config.getpath("data"), "caches", "cached." + os.path.basename(path_embed) + ".npy") if os.path.exists(path_caches): utils.writelog("Loading cached word embeddings ...") initialW = np.load(path_caches) else: initialW = utils.read_word_embedding_matrix(path=path_embed, dim=word_dim, vocab=vocab_word, scale=0.0) np.save(path_caches, initialW) if model_name == "spanbasedmodel": # Span-based model w/ template features template_feature_extractor = models.TemplateFeatureExtractor( databatch=train_databatch) utils.writelog("Template feature size=%d" % template_feature_extractor.feature_size) if actiontype == "train": for template in template_feature_extractor.templates: dim = template_feature_extractor.template2dim[template] utils.writelog("Template feature #%s %s" % (dim, template)) model = models.SpanBasedModel( vocab_word=vocab_word, vocab_postag=vocab_postag, vocab_deprel=vocab_deprel, word_dim=word_dim, postag_dim=postag_dim, deprel_dim=deprel_dim, lstm_dim=lstm_dim, mlp_dim=mlp_dim, initialW=initialW, template_feature_extractor=template_feature_extractor) elif model_name == "spanbasedmodel2": # Span-based model w/o template features model = models.SpanBasedModel2(vocab_word=vocab_word, vocab_postag=vocab_postag, vocab_deprel=vocab_deprel, word_dim=word_dim, postag_dim=postag_dim, deprel_dim=deprel_dim, lstm_dim=lstm_dim, mlp_dim=mlp_dim, initialW=initialW) else: raise ValueError("Invalid model_name=%s" % model_name) utils.writelog("Initialized the model ``%s''" % model_name) # Load pre-trained parameters if actiontype != "train": serializers.load_npz(path_snapshot, model) utils.writelog("Loaded trained parameters from %s" % path_snapshot) model.to_gpu(gpu) #################### # Decoder preparation decoder = decoders.IncrementalCKYDecoder() #################### # Initializer preparation sampler = treesamplers.TreeSampler(initial_tree_sampling.split("_")) #################### # Training / evaluation if actiontype == "train": with chainer.using_config("train", True): if dev_size > 0: # Training with cross validation train_databatch, dev_databatch = dataloader.randomsplit( n_dev=dev_size, databatch=train_databatch) with open( os.path.join(config.getpath("results"), basename + ".valid_gold.ctrees"), "w") as f: for sexp in dev_databatch.batch_nary_sexp: f.write("%s\n" % " ".join(sexp)) else: # Training with the full training set dev_databatch = None if data_augmentation: train_databatch = utils.concat_databatch( train_databatch, external_train_databatch) training.train( model=model, decoder=decoder, sampler=sampler, max_epoch=max_epoch, n_init_epochs=n_init_epochs, negative_size=negative_size, batch_size=batch_size, weight_decay=weight_decay, gradient_clipping=gradient_clipping, optimizer_name=optimizer_name, train_databatch=train_databatch, dev_databatch=dev_databatch, path_train=path_train, path_valid=path_valid, path_snapshot=path_snapshot, path_pred=os.path.join(config.getpath("results"), basename + ".valid_pred.ctrees"), path_gold=os.path.join(config.getpath("results"), basename + ".valid_gold.ctrees")) elif actiontype == "evaluate": with chainer.using_config("train", False), chainer.no_backprop_mode(): # Test parsing.parse(model=model, decoder=decoder, databatch=test_databatch, path_pred=path_pred) scores = rst_parseval.evaluate( pred_path=path_pred, gold_path=os.path.join(config.getpath("data"), "rstdt", "renamed", "test.labeled.nary.ctrees")) old_scores = old_rst_parseval.evaluate( pred_path=path_pred, gold_path=os.path.join(config.getpath("data"), "rstdt", "renamed", "test.labeled.nary.ctrees")) out = { "Morey2018": { "Unlabeled Precision": scores["S"]["Precision"] * 100.0, "Precision_info": scores["S"]["Precision_info"], "Unlabeled Recall": scores["S"]["Recall"] * 100.0, "Recall_info": scores["S"]["Recall_info"], "Micro F1": scores["S"]["Micro F1"] * 100.0 }, "Marcu2000": { "Unlabeled Precision": old_scores["S"]["Precision"] * 100.0, "Precision_info": old_scores["S"]["Precision_info"], "Unlabeled Recall": old_scores["S"]["Recall"] * 100.0, "Recall_info": old_scores["S"]["Recall_info"], "Micro F1": old_scores["S"]["Micro F1"] * 100.0 } } utils.write_json(path_eval, out) utils.writelog(utils.pretty_format_dict(out)) utils.writelog("Done: %s" % basename)
msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) model.session.run(model.optim, feed_dict=feed_dict) # 运行优化 total_batch += 1 if total_batch - last_improved > require_improvement: # 验证集正确率长期不提升,提前结束训练 print("No optimization for a long time, auto-stopping...") flag = True break # 跳出循环 if flag: # 同上 break print('Configuring CNN model...') config = TCNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_sentiment_category() # 训练情感 # categories, cat_to_id = read_type_category() # 训练新闻类型 print(cat_to_id) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) model = TextCNN(config) train()
def train_val(seed=None): ''' Train on the training set, and validate on seen and unseen splits. ''' # which GPU to use device = torch.device('cuda', hparams.device_id) # Resume from lastest checkpoint (if any) if os.path.exists(hparams.load_path): # only present in os if not first time. present in hparam but not in os ckpt = load(hparams.load_path, device) start_iter = ckpt['iter'] # iter is a key of ckpt object that gives start_iter # print("start_iter:") # print(start_iter) # input() else: if hasattr(args, 'load_path') and hasattr(args, 'eval_only') and args.eval_only: sys.exit('load_path %s does not exist!' % hparams.load_path) # exit only if no path and eval, can still train ckpt = None start_iter = 0 end_iter = hparams.n_iters # from config # Setup seed and read vocab setup(seed=seed) train_vocab_path = os.path.join(hparams.data_path, 'train_vocab.txt') if hasattr(hparams, 'external_main_vocab') and hparams.external_main_vocab: train_vocab_path = hparams.external_main_vocab # external_main_vocab likely from command line arg if present # verbal advisor means vocab is a list of navigation action for the agent. if 'verbal' in hparams.advisor: subgoal_vocab_path = os.path.join(hparams.data_path, hparams.subgoal_vocab) # data/asknav/verbal_hard_vocab.txt vocab = read_vocab([train_vocab_path, subgoal_vocab_path]) else: vocab = read_vocab([train_vocab_path]) tok = Tokenizer(vocab=vocab, encoding_length=hparams.max_input_length) # tokenize vocab # Create a training environment train_env = VNLABatch(hparams, split='train', tokenizer=tok) # Create validation environments val_splits = ['val_seen', 'val_unseen'] # eval_mode code eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only # if command line indicates eval and value of test seen/unseen if eval_mode: if '_unseen' in hparams.load_path: val_splits = ['test_unseen'] if '_seen' in hparams.load_path: val_splits = ['test_seen'] end_iter = start_iter + hparams.log_every # end # create object/dict containing envs, key is 'val_seen' or 'val_unseen' values are VNLABatch respectively. val_envs = { split: (VNLABatch(hparams, split=split, tokenizer=tok, from_train_env=train_env, traj_len_estimates=train_env.traj_len_estimates), Evaluation(hparams, [split], hparams.data_path)) for split in val_splits} # evaluate val for both seen and unseen # Build models model = AttentionSeq2SeqModel(len(vocab), hparams, device).to(device) optimizer = optim.Adam(model.parameters(), lr=hparams.lr, weight_decay=hparams.weight_decay) best_metrics = { 'val_seen' : -1, 'val_unseen': -1, 'combined' : -1 } # probably the best scores so far if ckpt has it # Load model parameters from a checkpoint (if any) if ckpt is not None: model.load_state_dict(ckpt['model_state_dict']) optimizer.load_state_dict(ckpt['optim_state_dict']) best_metrics = ckpt['best_metrics'] train_env.ix = ckpt['data_idx'] print('') pprint(vars(hparams), width=1) print('') print(model) # Initialize agent if 'verbal' in hparams.advisor: agent = VerbalAskAgent(model, hparams, device) elif hparams.advisor == 'direct': agent = AskAgent(model, hparams, device) # agent, as well as model (in attentionSeq2SeqModel), depends on whether the advisor is direct or hint / verbal # Train return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter, best_metrics, eval_mode) # eval mode has splits that gives different environments.
def train_val_augment(test_only=False): """ Train the listener with the augmented data """ setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features, test_only=test_only) if test_only: featurized_scans = None val_env_names = ['val_train_seen'] else: featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] if not args.test_obj: print('Loading compact pano-caffe object features ... (~3 seconds)') import pickle as pkl with open('img_features/objects/pano_object_class.pkl', 'rb') as f_pc: pano_caffe = pkl.load(f_pc) else: pano_caffe = None aug_path = args.aug # Create the training environment train_env = R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=['train'], tokenizer=tok) aug_env = R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok, name='aug') stats = train_env.get_statistics() print("The training data_size is : %d" % train_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) stats = aug_env.get_statistics() print("The augmentation data size is %d" % aug_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) val_envs = { split: (R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in val_env_names } train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)
def main(_): vocab = read_vocab('data/yelp-2013-w2i.pkl') glove_embs = load_glove('glove.6B.{}d.txt'.format(FLAGS.emb_size), FLAGS.emb_size, vocab) data_reader = DataReader(train_file='data/yelp-2013-train.pkl', dev_file='data/yelp-2013-dev.pkl', test_file='data/yelp-2013-test.pkl') config = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement) with tf.Session(config=config) as sess: model = Model(cell_dim=FLAGS.cell_dim, att_dim=FLAGS.att_dim, vocab_size=len(vocab), emb_size=FLAGS.emb_size, num_classes=FLAGS.num_classes, dropout_rate=FLAGS.dropout_rate, pretrained_embs=glove_embs) loss = loss_fn(model.labels, model.logits) train_op, global_step = train_fn(loss) batch_acc, total_acc, acc_update, metrics_init = eval_fn( model.labels, model.logits) summary_op = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) train_writer.add_graph(sess.graph) saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoints) print('\n{}> Start training'.format(datetime.now())) epoch = 0 valid_step = 0 test_step = 0 train_test_prop = len(data_reader.train_data) / len( data_reader.test_data) test_batch_size = int(FLAGS.batch_size / train_test_prop) best_acc = float('-inf') while epoch < FLAGS.num_epochs: epoch += 1 print('\n{}> Epoch: {}'.format(datetime.now(), epoch)) sess.run(metrics_init) for batch_docs, batch_labels in data_reader.read_train_set( FLAGS.batch_size, shuffle=True): _step, _, _loss, _acc, _ = sess.run( [global_step, train_op, loss, batch_acc, acc_update], feed_dict=model.get_feed_dict(batch_docs, batch_labels, training=True)) if _step % FLAGS.display_step == 0: _summary = sess.run(summary_op, feed_dict=model.get_feed_dict( batch_docs, batch_labels)) train_writer.add_summary(_summary, global_step=_step) print('Training accuracy = {:.2f}'.format( sess.run(total_acc) * 100)) sess.run(metrics_init) for batch_docs, batch_labels in data_reader.read_valid_set( test_batch_size): _loss, _acc, _ = sess.run([loss, batch_acc, acc_update], feed_dict=model.get_feed_dict( batch_docs, batch_labels)) valid_step += 1 if valid_step % FLAGS.display_step == 0: _summary = sess.run(summary_op, feed_dict=model.get_feed_dict( batch_docs, batch_labels)) valid_writer.add_summary(_summary, global_step=valid_step) print('Validation accuracy = {:.2f}'.format( sess.run(total_acc) * 100)) sess.run(metrics_init) for batch_docs, batch_labels in data_reader.read_test_set( test_batch_size): _loss, _acc, _ = sess.run([loss, batch_acc, acc_update], feed_dict=model.get_feed_dict( batch_docs, batch_labels)) test_step += 1 if test_step % FLAGS.display_step == 0: _summary = sess.run(summary_op, feed_dict=model.get_feed_dict( batch_docs, batch_labels)) test_writer.add_summary(_summary, global_step=test_step) test_acc = sess.run(total_acc) * 100 print('Testing accuracy = {:.2f}'.format(test_acc)) if test_acc > best_acc: best_acc = test_acc saver.save(sess, './' + FLAGS.checkpoint_dir) print('Best testing accuracy = {:.2f}'.format(test_acc)) print("{} Optimization Finished!".format(datetime.now())) print('Best testing accuracy = {:.2f}'.format(best_acc))
import math from lm import UnigramLM,BigramLM,InterpolatedBigramModel from utils import read_sentence, read_vocab, calculate_bigram_perplexity, calc_average_log_likelihood,calculate_bigram_document_perplexity,calc_average_document_log_likelihood import numpy as np # reading train/valid/test data train_data = read_sentence('train.txt') valid_data = read_sentence('valid.txt') test_data = read_sentence('test.txt') vocab = read_vocab('vocab.txt') #run print("--------------------") print("Backoff Bigram Model") print("--------------------") bigram = BigramLM(train_data, vocab, smoothing=True) print("Average log likelihood of first line in test: %s" % (calc_average_log_likelihood(bigram,test_data[:1]))) print("Average ppl of first line in test: %s" % (calculate_bigram_perplexity(bigram, test_data[:1]))) loglikelihood = calc_average_log_likelihood(bigram,test_data[:100]) ppl = calculate_bigram_perplexity(bigram, test_data[:100]) print("Mean of loglikelihood of first 100 line: %s" % np.mean(loglikelihood)) print("Variance of loglikelihood of first 100 line: %s" % np.var(loglikelihood)) print("Mean of ppl of first 100 line: %s" % np.mean(ppl)) print("Variance of ppl of first 100 line: %s" % np.var(ppl)) print("Average ppl of document: %s" % (calculate_bigram_document_perplexity(bigram,test_data))) print("Average log likelihood of document: %s" % (calc_average_document_log_likelihood(bigram,test_data))) print("\n")
PLACE365_CANDIDATE_FEATURES = 'img_features/ResNet-152-places365-candidate.tsv' if args.place365: features = PLACE365_FEATURES CANDIDATE_FEATURES = PLACE365_CANDIDATE_FEATURES else: features = IMAGENET_FEATURES CANDIDATE_FEATURES = IMAGENET_CANDIDATE_FEATURES #load features and feature_candidates feature_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) #load glove and vocab glove_path = 'tasks/R2R/data/train_glove.npy' glove = np.load(glove_path) vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab) #intantialize listener and load pre-trained model listner = Seq2SeqAgent(None, "", tok, feat=feature_dict, candidates=candidate_dict, episode_len=args.maxAction) listner.load( 'snap/long/ablation_cand_0208_accuGrad_envdrop_ty/state_dict/best_val_unseen' ) # nav graph loader from env.py
def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind, args): ''' Train on the training set, and validate on seen and unseen splits. ''' nav_graphs = setup(args.action_space, args.navigable_locs_path) # Create a batch training environment that will also preprocess text use_bert = (args.encoder_type in ['bert', 'vlbert']) # for tokenizer and dataloader if use_bert: tok = BTokenizer(MAX_INPUT_LENGTH) else: vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) #train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok, # path_type=path_type, history=history, blind=blind) feature_store = Feature(features, args.panoramic) train_env = R2RBatch(feature_store, nav_graphs, args.panoramic, args.action_space, batch_size=args.batch_size, splits=['train'], tokenizer=tok, path_type=path_type, history=history, blind=blind) # Creat validation environments #val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], # tokenizer=tok, path_type=path_type, history=history, blind=blind), # Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']} val_envs = { split: (R2RBatch(feature_store, nav_graphs, args.panoramic, args.action_space, batch_size=args.batch_size, splits=[split], tokenizer=tok, path_type=path_type, history=history, blind=blind), Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen'] } # Build models and train #enc_hidden_size = hidden_size//2 if bidirectional else hidden_size if args.encoder_type == 'vlbert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" % (args.pretrain_model_name)) encoder = DicEncoder(FEATURE_ALL_SIZE, args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.vl_layers, args.la_layers, args.bert_type) premodel = DicAddActionPreTrain.from_pretrained( args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings( len(tok)) # remember to resize tok embedding size encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() else: encoder = DicEncoder(FEATURE_ALL_SIZE, args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.vl_layers, args.la_layers, args.bert_type).cuda() encoder.bert._resize_token_embeddings( len(tok)) # remember to resize tok embedding size elif args.encoder_type == 'bert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" % (args.pretrain_model_name)) encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type) premodel = BertForMaskedLM.from_pretrained( args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings( len(tok)) # remember to resize tok embedding size #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() pdb.set_trace() else: encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda() encoder.bert._resize_token_embeddings(len(tok)) else: enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() #decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), # action_embedding_size, args.hidden_size, args.dropout_ratio).cuda() ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1) if use_bert and not args.top_lstm: ctx_hidden_size = 768 decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio, FEATURE_SIZE, args.panoramic, args.action_space, args.dec_h_type).cuda() train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs, args=args)
def make_env_and_models(args, train_vocab_path, train_splits, test_splits, test_instruction_limit=None, instructions_per_path=None): setup() if args.env == 'r2r': EnvBatch = R2RBatch ImgFeatures = ImageFeatures elif args.env == 'refer360': EnvBatch = Refer360Batch ImgFeatures = Refer360ImageFeatures else: raise NotImplementedError( 'this {} environment is not implemented.'.format(args.env)) image_features_list = ImgFeatures.from_args(args) feature_size = sum( [featurizer.feature_dim for featurizer in image_features_list]) + 128 if args.use_visited_embeddings: feature_size += 64 if args.use_oracle_embeddings: feature_size += 64 action_embedding_size = feature_size vocab = read_vocab(train_vocab_path, args.language) tok = Tokenizer(vocab=vocab) train_env = EnvBatch(image_features_list, splits=train_splits, tokenizer=tok, args=args) enc_hidden_size = args.hidden_size // 2 if args.bidirectional else args.hidden_size wordvec = np.load(args.wordvec_path) word_embedding_size = get_word_embedding_size(args) enc_hidden_size = 600 # refer360 >>> enc_hidden_size = 512 # refer360 >>> # enc_hidden_size = 512 # r2r >>> encoder = try_cuda( SpeakerEncoderLSTM(action_embedding_size, feature_size, enc_hidden_size, args.dropout_ratio, bidirectional=args.bidirectional)) word_embedding_size = 300 # refer360 >>>> word_embedding_size = 300 # r2r >>>> hidden_size = 600 # refer360 >>> hidden_size = 512 # refer360 >>> # hidden_size = 512 # >>> r2r #hidden_size = args.hidden_size decoder = try_cuda( SpeakerDecoderLSTM(len(vocab), word_embedding_size, hidden_size, args.dropout_ratio, wordvec=wordvec, wordvec_finetune=args.wordvec_finetune)) test_envs = {} for split in test_splits: b = EnvBatch(image_features_list, splits=[split], tokenizer=tok, args=args) e = eval_speaker.SpeakerEvaluation( [split], instructions_per_path=instructions_per_path, args=args) test_envs[split] = (b, e) # TODO # test_envs = { # split: (BatchEnv(image_features_list, batch_size=batch_size, # splits=[split], tokenizer=tok, # instruction_limit=test_instruction_limit, # prefix=args.prefix), # eval_speaker.SpeakerEvaluation( # [split], instructions_per_path=instructions_per_path, )) # for split in test_splits} return train_env, test_envs, encoder, decoder
def test(): print('current directory', os.getcwd()) os.chdir('..') print('current directory', os.getcwd()) # os.environ["CUDA_VISIBLE_DEVICES"] = "0" visible_gpu = "0" os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu args.name = 'SSM' args.attn = 'soft' args.train = 'listener' args.featdropout = 0.3 args.angle_feat_size = 128 args.feedback = 'sample' args.ml_weight = 0.2 args.sub_out = 'max' args.dropout = 0.5 args.optim = 'adam' args.lr = 3e-4 args.iters = 80000 args.maxAction = 15 args.batchSize = 4 args.target_batch_size = 4 args.pe_dim = 128 args.self_train = True args.aug = 'tasks/R2R/data/aug_paths.json' args.featdropout = 0.4 args.iters = 200000 if args.optim == 'rms': print("Optimizer: Using RMSProp") args.optimizer = torch.optim.RMSprop elif args.optim == 'adam': print("Optimizer: Using Adam") args.optimizer = torch.optim.Adam elif args.optim == 'sgd': print("Optimizer: sgd") args.optimizer = torch.optim.SGD TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt' TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt' IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv' if args.features == 'imagenet': features = IMAGENET_FEATURES if args.fast_train: name, ext = os.path.splitext(features) features = name + "-fast" + ext print(args) def setup(): torch.manual_seed(1) torch.cuda.manual_seed(1) # Check for vocabs if not os.path.exists(TRAIN_VOCAB): write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB) if not os.path.exists(TRAINVAL_VOCAB): write_vocab( build_vocab(splits=['train', 'val_seen', 'val_unseen']), TRAINVAL_VOCAB) # setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) print('start extract keys...') featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) print('keys extracted...') val_envs = { split: R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok) for split in ['train', 'val_seen', 'val_unseen'] } evaluators = { split: Evaluation([split], featurized_scans, tok) for split in ['train', 'val_seen', 'val_unseen'] } learner = Learner(val_envs, "", tok, args.maxAction, process_num=4, max_node=17, visible_gpu=visible_gpu) learner.eval_init() ckpt = 'snap/%s/state_dict/ssm_ckpt' % args.name learner.load_eval(ckpt) results = learner.eval() loss_str = '' for key in results: evaluator = evaluators[key] result = results[key] score_summary, score_details = evaluator.score(result) loss_str += ", %s \n" % key for metric, val in score_summary.items(): loss_str += ', %s: %.3f' % (metric, val) loss_str += '\n' print(loss_str)
from env import R2RBatch from refer360_env import Refer360Batch from utils import Tokenizer, read_vocab from vocab import TRAIN_VOCAB from train import make_arg_parser from utils import get_arguments from pprint import pprint import os arg_parser = make_arg_parser() arg_parser.add_argument('--cache_path', type=str, required=True) args = get_arguments(arg_parser) vocab = read_vocab(TRAIN_VOCAB, args.language) tok = Tokenizer(vocab) if args.env == 'r2r': EnvBatch = R2RBatch elif args.env in ['refer360']: EnvBatch = Refer360Batch if args.prefix in ['refer360', 'r2r', 'R2R', 'REVERIE', 'r360tiny', 'RxR_en-ALL']: val_splits = ['val_unseen', 'val_seen'] target = 'val_unseen' elif args.prefix in ['touchdown', 'td']: val_splits = ['dev'] target = 'dev' env = EnvBatch(['none'], splits=['train'] + val_splits, tokenizer=tok, args=args) if args.env == 'r2r':
def main(opts): # set manual_seed and build vocab setup(opts, opts.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # create a batch training environment that will also preprocess text vocab = read_vocab(opts.train_vocab) tok = Tokenizer(opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length) # create language instruction encoder encoder_kwargs = { 'opts': opts, 'vocab_size': len(vocab), 'embedding_size': opts.word_embedding_size, 'hidden_size': opts.rnn_hidden_size, 'padding_idx': padding_idx, 'dropout_ratio': opts.rnn_dropout, 'bidirectional': opts.bidirectional == 1, 'num_layers': opts.rnn_num_layers } print('Using {} as encoder ...'.format(opts.lang_embed)) if 'lstm' in opts.lang_embed: encoder = EncoderRNN(**encoder_kwargs) else: raise ValueError('Unknown {} language embedding'.format(opts.lang_embed)) print(encoder) # create policy model policy_model_kwargs = { 'opts':opts, 'img_fc_dim': opts.img_fc_dim, 'img_fc_use_batchnorm': opts.img_fc_use_batchnorm == 1, 'img_dropout': opts.img_dropout, 'img_feat_input_dim': opts.img_feat_input_dim, 'rnn_hidden_size': opts.rnn_hidden_size, 'rnn_dropout': opts.rnn_dropout, 'max_len': opts.max_cap_length, 'max_navigable': opts.max_navigable } if opts.arch == 'self-monitoring': model = SelfMonitoring(**policy_model_kwargs) elif opts.arch == 'speaker-baseline': model = SpeakerFollowerBaseline(**policy_model_kwargs) else: raise ValueError('Unknown {} model for seq2seq agent'.format(opts.arch)) print(model) encoder = encoder.to(device) model = model.to(device) params = list(encoder.parameters()) + list(model.parameters()) optimizer = torch.optim.Adam(params, lr=opts.learning_rate) # optionally resume from a checkpoint if opts.resume: model, encoder, optimizer, best_success_rate = resume_training(opts, model, encoder, optimizer) # if a secondary exp name is specified, this is useful when resuming from a previous saved # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data if opts.exp_name_secondary: opts.exp_name += opts.exp_name_secondary feature, img_spec = load_features(opts.img_feat_dir) if opts.test_submission: assert opts.resume, 'The model was not resumed before running for submission.' test_env = ('test', (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, splits=['test'], tokenizer=tok), Evaluation(['test']))) agent_kwargs = { 'opts': opts, 'env': test_env[1][0], 'results_path': "", 'encoder': encoder, 'model': model, 'feedback': opts.feedback } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer) epoch = opts.start_epoch - 1 trainer.eval(epoch, test_env) return # set up R2R environments if not opts.train_data_augmentation: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['train'], tokenizer=tok) else: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['synthetic'], tokenizer=tok) val_envs = {split: (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen']} # create agent agent_kwargs = { 'opts': opts, 'env': train_env, 'results_path': "", 'encoder': encoder, 'model': model, 'feedback': opts.feedback } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch) if opts.eval_beam or opts.eval_only: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None)) return # set up tensorboard logger tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume) best_success_rate = best_success_rate if opts.resume else 0.0 for epoch in range(opts.start_epoch, opts.max_num_epochs + 1): trainer.train(epoch, train_env, tb_logger) if epoch % opts.eval_every_epochs == 0: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(epoch, val_env, tb_logger)) success_rate_compare = success_rate[1] if is_experiment(): # remember best val_seen success rate and save checkpoint is_best = success_rate_compare >= best_success_rate best_success_rate = max(success_rate_compare, best_success_rate) print("--> Highest val_unseen success rate: {}".format(best_success_rate)) # save the model if it is the best so far save_checkpoint({ 'opts': opts, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'encoder_state_dict': encoder.state_dict(), 'best_success_rate': best_success_rate, 'optimizer': optimizer.state_dict(), 'max_episode_len': opts.max_episode_len, }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name) if opts.train_data_augmentation and epoch == opts.epochs_data_augmentation: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['train'], tokenizer=tok) print("--> Finished training")
import numpy as np def check(ar): ar = ar.cpu().detach().numpy() return np.any(np.isnan(ar)) def check2(ar): # ar = ar.cpu().numpy() return np.any(np.isnan(ar)) import utils TRAIN_VOCAB = '../tasks/R2R/data/train_vocab.txt' vocab = utils.read_vocab(TRAIN_VOCAB) tok = utils.Tokenizer(vocab=vocab, encoding_length=args.maxInput) # class EncoderLSTM(nn.Module): ''' Encodes navigation instructions, returning hidden state context (for attention methods) and a decoder initial state. ''' def __init__(self, vocab_size, embedding_size, hidden_size, padding_idx, dropout_ratio, bidirectional=False, num_layers=1):
def get_dataloaders(args): model_prefix = '{}_{}'.format(args.model_type, args.train_id) log_path = args.LOG_DIR + model_prefix + '/' checkpoint_path = args.CHK_DIR + model_prefix + '/' result_path = args.RESULT_DIR + model_prefix + '/' cp_file = checkpoint_path + "best_model.pth.tar" init_epoch = 0 if not os.path.exists(log_path): os.makedirs(log_path) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) ## set up the logger set_logger(os.path.join(log_path, 'train.log')) ## save argparse parameters with open(log_path + 'args.yaml', 'w') as f: for k, v in args.__dict__.items(): f.write('{}: {}\n'.format(k, v)) logging.info('Training model: {}'.format(model_prefix)) ## set up vocab txt # create txt here print('running setup') setup(args, clear=False) print(args.__dict__) # indicate src and tgt language if args.source_language == 'en': src, tgt = 'en', 'zh' else: src, tgt = 'zh', 'en' maps = {'en': args.TRAIN_VOCAB_EN, 'zh': args.TRAIN_VOCAB_ZH} vocab_src = read_vocab(maps[src]) tok_src = Tokenizer(language=src, vocab=vocab_src, encoding_length=args.MAX_INPUT_LENGTH, zh_tok='jieba') vocab_tgt = read_vocab(maps[tgt]) tok_tgt = Tokenizer(language=tgt, vocab=vocab_tgt, encoding_length=args.MAX_INPUT_LENGTH, zh_tok='jieba') logging.info('Vocab size src/tgt:{}/{}'.format(len(vocab_src), len(vocab_tgt))) ## Setup the training, validation, and testing dataloaders train_loader, val_loader, test_loader = create_split_loaders( args.DATA_DIR, (tok_src, tok_tgt), args.batch_size, args.MAX_VID_LENGTH, (src, tgt), num_workers=4, pin_memory=True) logging.info('train/val/test size: {}/{}/{}'.format( len(train_loader), len(val_loader), len(test_loader))) return train_loader, val_loader, test_loader, tok_src, tok_tgt, len( vocab_src), len(vocab_tgt)
def main(_): vocab = read_vocab('data/ICLR_Review_all-w2i.pkl') glove_embs = load_glove('glove.6B.{}d.txt'.format(FLAGS.emb_size), FLAGS.emb_size, vocab) data_reader = DataReader(train_file='data/ICLR_Review_all-train.pkl', dev_file='data/ICLR_Review_all-dev.pkl', test_file='data/ICLR_Review_all-test.pkl') config = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement) with tf.Session(config=config) as sess: model = Model(cell_dim=FLAGS.cell_dim, att_dim=FLAGS.att_dim, vocab_size=len(vocab), emb_size=FLAGS.emb_size, num_classes=FLAGS.num_classes, dropout_rate=FLAGS.dropout_rate, pretrained_embs=glove_embs) loss = loss_fn(model.labels, model.logits) train_op, global_step = train_fn(loss) batch_acc, total_acc, acc_update, metrics_init, predictions = eval_fn( model.labels, model.logits) summary_op = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) train_writer.add_graph(sess.graph) saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoints) print('\n{}> Start training'.format(datetime.now())) result_save_folder = str(datetime.now()) output_folder = os.path.join('.', 'output') create_folder_if_not_exists(output_folder) stats_graph_folder = os.path.join( output_folder, result_save_folder) # Folder where to save graphs create_folder_if_not_exists(stats_graph_folder) epoch = 0 valid_step = 0 test_step = 0 train_test_prop = len(data_reader.train_data) / len( data_reader.test_data) test_batch_size = int(FLAGS.batch_size / train_test_prop) best_acc = float('-inf') while epoch < FLAGS.num_epochs: epoch += 1 print('\n{}> Epoch: {}'.format(datetime.now(), epoch)) sess.run(metrics_init) all_labels = [] all_y_pred = [] for batch_docs, batch_labels in data_reader.read_train_set( FLAGS.batch_size, shuffle=True): _step, _, _loss, _acc, _, y_pred_batch = sess.run( [ global_step, train_op, loss, batch_acc, acc_update, predictions ], feed_dict=model.get_feed_dict(batch_docs, batch_labels, training=True)) all_labels += batch_labels #y_pred_batch_array = y_pred_batch.eval(session=sess) y_pred_batch_list = y_pred_batch.tolist() all_y_pred += y_pred_batch_list if _step % FLAGS.display_step == 0: _summary = sess.run(summary_op, feed_dict=model.get_feed_dict( batch_docs, batch_labels)) train_writer.add_summary(_summary, global_step=_step) print('Training accuracy = {:.2f}'.format( sess.run(total_acc) * 100)) save_results(all_labels, all_y_pred, stats_graph_folder, 'train', epoch) sess.run(metrics_init) all_valid_labels = [] all_valid_y_pred = [] for batch_docs, batch_labels in data_reader.read_valid_set( test_batch_size): _loss, _acc, _, valid_y_pred_batch = sess.run( [loss, batch_acc, acc_update, predictions], feed_dict=model.get_feed_dict(batch_docs, batch_labels)) all_valid_labels += batch_labels valid_y_pred_batch_list = valid_y_pred_batch.tolist() all_valid_y_pred += valid_y_pred_batch_list valid_step += 1 if valid_step % FLAGS.display_step == 0: _summary = sess.run(summary_op, feed_dict=model.get_feed_dict( batch_docs, batch_labels)) valid_writer.add_summary(_summary, global_step=valid_step) print('Validation accuracy = {:.2f}'.format( sess.run(total_acc) * 100)) save_optimized_presicion(all_valid_labels, all_valid_y_pred, stats_graph_folder, 'valid', epoch) save_distance_measure(all_valid_labels, all_valid_y_pred, stats_graph_folder, 'valid', epoch) save_results(all_valid_labels, all_valid_y_pred, stats_graph_folder, 'valid', epoch) sess.run(metrics_init) all_test_labels = [] all_test_y_pred = [] for batch_docs, batch_labels in data_reader.read_test_set( test_batch_size): _loss, _acc, _, test_y_pred_batch = sess.run( [loss, batch_acc, acc_update, predictions], feed_dict=model.get_feed_dict(batch_docs, batch_labels)) all_test_labels += batch_labels test_y_pred_batch_list = test_y_pred_batch.tolist() all_test_y_pred += test_y_pred_batch_list test_step += 1 if test_step % FLAGS.display_step == 0: _summary = sess.run(summary_op, feed_dict=model.get_feed_dict( batch_docs, batch_labels)) test_writer.add_summary(_summary, global_step=test_step) test_acc = sess.run(total_acc) * 100 print('Testing accuracy = {:.2f}'.format(test_acc)) save_optimized_presicion(all_test_labels, all_test_y_pred, stats_graph_folder, 'test', epoch) save_distance_measure(all_test_labels, all_test_y_pred, stats_graph_folder, 'test', epoch) save_results(all_test_labels, all_test_y_pred, stats_graph_folder, 'test', epoch) if test_acc > best_acc: best_acc = test_acc saver.save(sess, FLAGS.checkpoint_dir) print('Best testing accuracy = {:.2f}'.format(best_acc)) print("{} Optimization Finished!".format(datetime.now())) print('Best testing accuracy = {:.2f}'.format(best_acc))
raise ValueError( """usage: python run_cnn.py [train / test] [cnn/rnn]""") if sys.argv[2] not in ['cnn', 'rnn']: raise ValueError( """usage: python run_cnn.py [train / test] [cnn/rnn]""") print('Configuring {0} model...'.format(sys.argv[2])) model_name = sys.argv[2] if model_name == 'cnn': config = TCNNconfig() elif model_name == 'rnn': config = TRNNconfig() word2id, id2word = read_vocab( "/search/odin/wts/my-pytorch-try/text_cnn_rnn/data/cnews/cnews.vocab.txt" ) config.word2index = word2id config.index2word = id2word config.vocab_size = len(word2id) # 词汇表大小 config.category2index = { "财经": 0, "房产": 1, "家居": 2, "教育": 3, "科技": 4, "时尚": 5, "时政": 6, "体育": 7, "游戏": 8, "娱乐": 9
import utils as utils import numpy as np path = '20news-bydate-matlab/matlab' features = utils.read_features("expanded.txt") label_array = utils.read_label(path, 'train.label') print len(label_array) answer_label_array = utils.read_label(path, 'test.label') test_features = utils.read_features("test_expanded.txt") vocab = utils.read_vocab("vocabulary.txt") #remove stop words from stop_words import get_stop_words stop_words = get_stop_words('en') # vocab = set(vocab) # example_count=11269# total examples in training set #nb.train(features[0:example_count,],label_array[0:example_count,]) clf.train(features, label_array, vocab=vocab) # # Y=[9,6] # print "\nTrying to predict: "+str(Y) correct_count = 0
def train(): vocab = read_vocab(FLAGS.vocab_data) glove = load_glove("glove.6B.{}d.txt".format(FLAGS.emb_size), FLAGS.emb_size, vocab) train = Dataset(filepath=FLAGS.train_data) valid = Dataset(filepath=FLAGS.valid_data) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): han = HieAttNet(cell_type=FLAGS.cell_type, hid_size=FLAGS.hid_size, att_size=FLAGS.att_size, vocab_size=len(vocab), emb_size=FLAGS.emb_size, num_classes=FLAGS.num_classes, pretrained_embs=glove, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define training procedure global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( han.loss, global_step=global_step) acc, acc_op = tf.metrics.accuracy(labels=han.labels, predictions=han.predictions, name="metrics/acc") metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics") metrics_init_op = tf.variables_initializer(var_list=metrics_vars) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", han.loss) acc_summary = tf.summary.scalar("accuracy", han.accuracy) # Train summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Valid summaries valid_step = 0 valid_summary_op = tf.summary.merge([loss_summary, acc_summary]) valid_summary_dir = os.path.join(out_dir, "summaries", "valid") valid_summary_writer = tf.summary.FileWriter( valid_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # initialize all variables best_valid_acc = 0.0 sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # training and validating loop for epoch in range(FLAGS.num_epochs): print('-' * 100) print('\n{}> epoch: {}\n'.format( datetime.datetime.now().isoformat(), epoch)) sess.run(metrics_init_op) # Training process for batch in train.bacth_iter(FLAGS.batch_size, desc="Training", shuffle=True): labels, docs = zip(*batch) padded_docs, sent_length, max_sent_length, word_length, max_word_length = normalize( docs) feed_dict = { han.docs: padded_docs, han.labels: labels, han.sent_length: sent_length, han.word_length: word_length, han.max_sent_length: max_sent_length, han.max_word_length: max_word_length, han.is_training: True, han.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy, _ = sess.run([ train_op, global_step, train_summary_op, han.loss, han.accuracy, acc_op ], feed_dict) train_summary_writer.add_summary(summaries, step) # training log display # if step % FLAGS.display_every == 0: # time_str = datetime.datetime.now().isoformat() # print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) # Model checkpoint # if step % FLAGS.checkpoint_every == 0: # path = saver.save(sess, checkpoint_prefix, global_step=step) # print("saved model checkpoint to {}\n".format(path)) print("\ntraining accuracy = {:.2f}\n".format( sess.run(acc) * 100)) sess.run(metrics_init_op) # Validating process for batch in valid.bacth_iter(FLAGS.batch_size, desc="Validating", shuffle=False): valid_step += 1 labels, docs = zip(*batch) padded_docs, sent_length, max_sent_length, word_length, max_word_length = normalize( docs) feed_dict = { han.docs: padded_docs, han.labels: labels, han.sent_length: sent_length, han.max_sent_length: max_sent_length, han.word_length: word_length, han.max_word_length: max_word_length, han.is_training: False, han.dropout_keep_prob: 1.0 } summaries, loss, accuracy, _ = sess.run( [valid_summary_op, han.loss, han.accuracy, acc_op], feed_dict) valid_summary_writer.add_summary(summaries, global_step=valid_step) valid_acc = sess.run(acc) * 100 print("\nvalidating accuracy = {:.2f}\n".format(valid_acc)) print("previous best validating accuracy = {:.2f}\n".format( best_valid_acc)) # model checkpoint if valid_acc > best_valid_acc: best_valid_acc = valid_acc path = saver.save(sess, checkpoint_prefix) print("saved model checkpoint to {}\n".format(path)) print("{} optimization finished!\n".format( datetime.datetime.now())) print("best validating accuracy = {:.2f}\n".format(best_valid_acc))
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) # load object feature obj_s_feat = None if args.sparseObj: print("Start loading the object sparse feature") start = time.time() obj_s_feat = np.load(sparse_obj_feat, allow_pickle=True).item() print( "Finish Loading the object sparse feature from %s in %0.4f seconds" % (sparse_obj_feat, time.time() - start)) obj_d_feat = None if args.denseObj: print("Start loading the object dense feature") start = time.time() obj_d_feat1 = np.load(dense_obj_feat1, allow_pickle=True).item() obj_d_feat2 = np.load(dense_obj_feat2, allow_pickle=True).item() obj_d_feat = {**obj_d_feat1, **obj_d_feat2} print( "Finish Loading the dense object dense feature from %s and %s in %0.4f seconds" % (dense_obj_feat1, dense_obj_feat2, time.time() - start)) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict val_env_names = ['val_unseen', 'val_seen'] if args.submit: val_env_names.append('test') else: pass #val_env_names.append('train') if not args.beam: val_env_names.append("train") val_envs = OrderedDict(((split, (R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
def train(): lines = [line.strip() for line in open("data/data.csv", "r").readlines()] lines = [(json.loads(line)["dream"], json.loads(line)["decode"]) for line in lines] inputs = [" ".join(list(q)) for q, a in lines] outputs = [" ".join(list(a)) for q, a in lines] all_info = ' '.join(inputs + outputs).split() if os.path.exists(args.vocab_file): dictionary_input, rev_dictionary_input = read_vocab(args.vocab_file) else: dictionary_input, rev_dictionary_input = build_vocab( all_info, args.vocab_file) dictionary_output, rev_dictionary_output = dictionary_input, rev_dictionary_input min_line_length = 2 max_line_length = 100 data_filter = [(q, a) for q, a in zip(inputs, outputs) if len_check(q, min_line_length, max_line_length) and len_check(a, min_line_length, max_line_length)] random.shuffle(data_filter) inputs = [q for q, a in data_filter] outputs = [a + ' EOS' for q, a in data_filter] tf.logging.info("sample size: %s", len(inputs)) inputs_dev = inputs[0:100] outputs_dev = outputs[0:100] inputs_train = inputs[100:] outputs_train = outputs[100:] inputs_train = str_idx(inputs_train, dictionary_input, dictionary_input['UNK']) print(inputs_train[:2]) outputs_train = str_idx(outputs_train, dictionary_output, dictionary_output['UNK']) print(outputs_train[:2]) inputs_dev = str_idx(inputs_dev, dictionary_input, dictionary_input['UNK']) outputs_dev = str_idx(outputs_dev, dictionary_output, dictionary_output['UNK']) model = Seq2Seq(args.size_layer, args.num_layers, args.embedded_size, len(dictionary_input), len(dictionary_output), args.learning_rate, dictionary_input) with tf.Session() as sess: with tf.device("/cpu:0"): ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): tf.logging.info("restore model from patch: %s", ckpt.model_checkpoint_path) # 加载预训练模型 saver = tf.train.Saver(max_to_keep=4) saver.restore(sess, ckpt.model_checkpoint_path) else: saver = tf.train.Saver(max_to_keep=4) sess.run(tf.global_variables_initializer()) global_step = 0 for epoch_index in range(args.epoch): total_loss, total_accuracy = 0, 0 batch_num = 0 for k in range(0, len(inputs_train), args.batch_size): batch_num = batch_num + 1 index = min(k + args.batch_size, len(inputs_train)) batch_x, seq_x = pad_sentence_batch( inputs_train[k:index], dictionary_input["PAD"]) batch_y, seq_y = pad_sentence_batch( outputs_train[k:index], dictionary_input["PAD"]) predicted, accuracy, loss, _, global_step = sess.run( fetches=[ model.predicting_ids, model.accuracy, model.cost, model.optimizer, model.global_step ], feed_dict={ model.X: batch_x, model.Y: batch_y }) total_loss += loss total_accuracy += accuracy if global_step % 100 == 0: print( '%s epoch: %d, global_step: %d, loss: %f, accuracy: %f' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), epoch_index + 1, global_step, loss, accuracy)) saver.save(sess, os.path.join(args.checkpoint_dir, "seq2seq.ckpt"), global_step=global_step) print("+" * 20) for i in range(4): print('row %d' % (i + 1)) print( 'dream:', ''.join([ rev_dictionary_input[n] for n in batch_x[i] if n not in [0, 1, 2, 3] ])) print( 'real meaning:', ''.join([ rev_dictionary_output[n] for n in batch_y[i] if n not in [0, 1, 2, 3] ])) print( 'dream decoding:', ''.join([ rev_dictionary_output[n] for n in predicted[i] if n not in [0, 1, 2, 3] ]), '') index = list(range(len((inputs_dev)))) random.shuffle(index) batch_x, _ = pad_sentence_batch([ inputs_dev[i] for i in index ][:args.batch_size], dictionary_input["PAD"]) batch_y, _ = pad_sentence_batch([ outputs_dev[i] for i in index ][:args.batch_size], dictionary_input["PAD"]) predicted = sess.run(model.predicting_ids, feed_dict={model.X: batch_x}) print("-" * 20) for i in range(4): print('row %d' % (i + 1)) # print(batch_x[i]) # print(predicted[i]) print( 'dream:', ''.join([ rev_dictionary_input[n] for n in batch_x[i] if n not in [0, 1, 2, 3] ])) print( 'real meaning:', ''.join([ rev_dictionary_output[n] for n in batch_y[i] if n not in [0, 1, 2, 3] ])) print( 'dream decoding:', ''.join([ rev_dictionary_output[n] for n in predicted[i] if n not in [0, 1, 2, 3] ]), '') total_loss /= batch_num total_accuracy /= batch_num print( '***%s epoch: %d, global_step: %d, avg loss: %f, avg accuracy: %f' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), epoch_index + 1, global_step, total_loss, total_accuracy))