def main(unused_args): ''' Trains model from data ''' if not FLAGS.input_data: raise ValueError("Must set --input_data to the filename of input dataset") if not FLAGS.train_dir: raise ValueError("Must set --train_dir to the directory where training files will be saved") if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) with tf.Graph().as_default(), tf.Session() as session: ''' To make tf.train.Saver write parameters as part of the saved file, add params to the graph as variables (hackish? - MK)''' with tf.variable_scope("params", reuse=None): num_layers_var = tf.Variable(FLAGS.num_layers, trainable=False, name='num_layers') hidden_size_var = tf.Variable(FLAGS.hidden_size, trainable=False, name='hidden_size') ''' If pre-trained model loaded from file, use loaded vocabulary and NN geometry. Else, compute vocabulary and use command-line params for num_layers and hidden_size ''' if FLAGS.load_model: vocab_size_var = tf.Variable(0, trainable=False, name='vocab_size') tf.train.Saver([num_layers_var, hidden_size_var, vocab_size_var]).restore(session, FLAGS.load_model) vocab_var = tf.Variable([0] * vocab_size_var.eval(), trainable=False, name='vocab') tf.train.Saver([vocab_var]).restore(session, FLAGS.load_model) FLAGS.num_layers = np.asscalar(num_layers_var.eval()) # need np.asscalar to upcast np.int32 to Python int FLAGS.hidden_size = np.asscalar(hidden_size_var.eval()) vocab = Vocab.from_array(vocab_var.eval()) train_data, valid_data, test_data, vocab = reader.read_datasets(FLAGS.input_data, FLAGS.train_fraction, FLAGS.valid_fraction, vocab=vocab) else: train_data, valid_data, test_data, vocab = reader.read_datasets(FLAGS.input_data, FLAGS.train_fraction, FLAGS.valid_fraction, vocab_size=FLAGS.vocab_size) vocab_size_var = tf.Variable(vocab.size, trainable=False, name='vocab_size') vocab_var = tf.Variable(vocab.to_array(), trainable=False, name='vocab') ''' build training graph ''' initializer = tf.random_uniform_initializer(-FLAGS.init_scale, FLAGS.init_scale) with tf.variable_scope("model", initializer=initializer): m = graph.inference_graph(vocab.size, FLAGS.num_layers, FLAGS.hidden_size, FLAGS.batch_size, FLAGS.num_steps, FLAGS.dropout_rate) m.update(graph.cost_graph(m.logits, FLAGS.batch_size, FLAGS.num_steps, vocab.size)) m.update(graph.training_graph(m.cost, FLAGS.grad_clip)) # create saver before creating more graph nodes, so that we do not save any vars defined below saver = tf.train.Saver(max_to_keep=50) ''' build graph for validation and testing (shares parameters with the training graph!) ''' with tf.variable_scope("model", reuse=True): mvalid = graph.inference_graph(vocab.size, FLAGS.num_layers, FLAGS.hidden_size, FLAGS.batch_size, FLAGS.num_steps) mvalid.update(graph.cost_graph(mvalid.logits, FLAGS.batch_size, FLAGS.num_steps, vocab.size)) if FLAGS.load_model: saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model) else: print('Created model') print('\tnum_layers:', FLAGS.num_layers) print('\thidden_size:', FLAGS.hidden_size) print('\tvocab_size:', vocab.size) print() print('Training parameters') print('\tbatch_size:', FLAGS.batch_size) print('\tnum_steps:', FLAGS.num_steps) print('\tlearning_rate:', FLAGS.learning_rate) print('\tbeta1:', FLAGS.beta1) print('\tbeta2:', FLAGS.beta2) print() print('Datasets') print('\ttraining dataset size:', len(train_data)) print('\tvalidation dataset size:', len(valid_data)) print('\ttest dataset size:', len(test_data)) print() ''' create two summaries: training cost and validation cost ''' summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph=session.graph) summary_train = summary_graph('Training cost', ema_decay=0.95) summary_valid = summary_graph('Validation cost') session.run([ m.lr.initializer, m.beta1.initializer, m.beta2.initializer, ]) tf.initialize_all_variables().run() session.run([ tf.assign(m.lr, FLAGS.learning_rate), tf.assign(m.beta1, FLAGS.beta1), tf.assign(m.beta2, FLAGS.beta2), ]) state = session.run(m.initial_state) iterations = len(train_data) // FLAGS.batch_size // FLAGS.num_steps * FLAGS.max_epochs for i, (x, y) in enumerate(reader.next_batch(train_data, FLAGS.batch_size, FLAGS.num_steps)): if i >= iterations: break start_time = time.time() cost, state, _ = session.run([m.cost, m.final_state, m.train_op], { m.input_data: x, m.targets: y, m.initial_state: state }) epoch = float(i) / (len(train_data) // FLAGS.batch_size // FLAGS.num_steps) time_elapsed = time.time() - start_time print('%d/%d (epoch %.3f), train_loss = %6.8f, time/batch = %.4fs' % (i+1, iterations, epoch, cost, time_elapsed)) session.run([summary_train.update], {summary_train.x: cost}) if (i+1) % FLAGS.eval_val_every == 0 or i == iterations-1: # evaluate loss on validation data cost = run_test(session, mvalid, valid_data, FLAGS.batch_size, FLAGS.num_steps) print("validation cost = %6.8f" % cost) save_as = '%s/epoch%.2f_%.4f.model' % (FLAGS.train_dir, epoch, cost) saver.save(session, save_as) ''' write out summary events ''' buffer, = session.run([summary_train.summary]) summary_writer.add_summary(buffer, i) session.run([summary_valid.update], {summary_valid.x: cost}) buffer, = session.run([summary_valid.summary]) summary_writer.add_summary(buffer, i) summary_writer.flush() if len(test_data) > FLAGS.batch_size * FLAGS.num_steps: cost = run_test(session, mvalid, test_data, FLAGS.batch_size, FLAGS.num_steps) print("Test cost: %.3f" % test_loss)
choices=('age', 'gender', 'both', 'joint'), default='both') # parser.add_argument('--trainer', help='which training algorithm to use', # choices=('adagrad', 'sgd', 'adam', 'adadelta', 'momentum'), default='adam') parser.add_argument('--num-epochs', help='Number of epochs', default=50) # parser.add_argument('--status', help='number of processed instances between status updates', default=10000, type=int) # parser.add_argument('--noise', help='amount of noise added to embeddings', default=0.1, type=float) parser.add_argument('--dim-rnn', help='dimensionality of hidden RNN layer', default=50, type=int) parser.add_argument('--dim-emb', help='dimensionality of word embeddings', default=100, type=int) parser.add_argument('--dim-out', help='dimensionality of output layer', default=32, type=int) # parser.add_argument('--dropout', help='dropout probability for final sentence representation', default=0.0, type=float) parser.add_argument('--batch-size', help='batch size', default=1, type=int) parser.add_argument('--max-len', help='Maximum length of input', default=100, type=int) args = parser.parse_args() # Read in data, mapping words to integers datasets = read_datasets(args.train, args.test, args.dev, args.target) train_dataset = datasets['train'] dev_dataset = datasets['dev'] test_dataset = datasets['test'] word_mapper = datasets['word_mapper'] label_mapper = datasets['label_mapper'] word_embs = tf.Variable(tf.random_uniform([len(word_mapper), args.dim_emb], -1, 1, tf.float32), name='word_embs') # Create dense representations of datasets X_train, y_train = make_dense(train_dataset['sentences'], args.max_len, train_dataset['labels'], len(label_mapper)) train_sent_lens = np.array([len(sent) for sent in train_dataset['sentences']]) X_dev, y_dev = make_dense(dev_dataset['sentences'], args.max_len, dev_dataset['labels'], len(label_mapper))
def pretrain_mdnet(datasets, init_model_path, result_dir, load_path=None, shuffle=True, norm=False, dropout=True, regularization=True): config = Config() # print parameters print('shuffle', shuffle) print('norm', norm) print('dropout', dropout) print('regularization', regularization) print('init_model_path', init_model_path) print('result_dir', result_dir) # create directory if not os.path.exists(result_dir): os.makedirs(result_dir) # load sequences train_data = reader.read_datasets(datasets) K = len(train_data.data) # create session and saver gpu_config = tf.ConfigProto(allow_soft_placement=True) sess = tf.InteractiveSession(config=gpu_config) # load model, weights model = MDNet(config) model.build_trainer(K, config.batch_size, dropout=dropout, regularization=regularization) tf.global_variables_initializer().run() model.load(init_model_path, sess) sess.run(model.lr_rate.assign(config.lr_rate)) # create saver saver = tf.train.Saver( [v for v in tf.global_variables() if 'fc6' not in v.name]) # restore from model if load_path is not None: saver.restore(sess, load_path) # prepare roidb and frame list train_loss_file = open(os.path.join(result_dir, 'train_loss.txt'), 'w') n_frames = config.batch_frames * config.num_cycle for i in range(config.num_cycle): loss_total = 0 print('###### training cycle ' + str(i) + '/' + str(config.num_cycle) + '...') seq_i = 0 for seq, seq_data in train_data.data.iteritems(): print('### training video "' + seq + '"...') seq_n_frames = len(seq_data.frames) ## prepare roidb print('- preparing roidb...') seq_data.rois = proc.seq2roidb(seq_data, config) ## prepare frame list print('- shuffle frames...') seq_data.frame_lists = [] while len(seq_data.frame_lists) < n_frames: seq_data.frame_lists = np.r_[ seq_data.frame_lists, np.random.permutation(seq_n_frames)] seq_data.frame_lists = seq_data.frame_lists[:n_frames] ## start training # extract batch_size frames frame_inds = seq_data.frame_lists[config.batch_frames * i:config.batch_frames * (i + 1)].astype(np.int) # sample boxes pos_boxes = np.concatenate([ seq_data.rois[frame_ind].pos_boxes for frame_ind in frame_inds ], axis=0) neg_boxes = np.concatenate([ seq_data.rois[frame_ind].neg_boxes for frame_ind in frame_inds ], axis=0) pos_inds = np.random.permutation( config.posPerFrame * config.batch_frames)[:config.batch_pos] neg_inds = np.random.permutation( config.negPerFrame * config.batch_frames)[:config.batch_neg] # pack as boxes, paths pos_boxes = pos_boxes[pos_inds] neg_boxes = neg_boxes[neg_inds] boxes = np.r_[pos_boxes, neg_boxes] box_relinds = np.r_[pos_inds // config.posPerFrame, neg_inds // config.negPerFrame] paths = [seq_data.frames[ind] for ind in frame_inds[box_relinds]] gts = np.repeat(np.identity(2), [config.batch_pos, config.batch_neg], axis=0) patches = proc.load_patch(paths, boxes, norm=False) # shuffle if shuffle: inds = np.random.permutation(config.batch_size) patches = patches[inds] gts = gts[inds] # training _, loss, score, weight, bias = sess.run( [ model.trainable[seq_i], model.losses['loss-' + str(seq_i)], model.layers['fc6-' + str(seq_i)], model.weights['fc6-' + str(seq_i)], model.biases['fc6-' + str(seq_i)] ], feed_dict={ model.layers['input']: patches, model.layers['y-' + str(seq_i)]: gts }) print(seq_i) print(score.reshape(-1, 2)[:5]) print(gts[:5]) print(np.mean(loss)) print(weight) print(bias) loss_total += np.mean(loss) # update seq_i seq_i += 1 ## save the model train_loss_file.write('Epoch ' + str(i) + ', Loss: ' + str(np.mean(loss))) saver.save(sess, os.path.join(result_dir, 'model_e' + str(i) + '.ckpt'), global_step=i + 1) train_loss_file.close()
parser.add_argument('--dev', help='dev files', required=True) parser.add_argument('--test', help='test files', required=True) parser.add_argument('--architecture', '-a', help="Model architecture", choices=['lstm', 'cnn']) parser.add_argument('--target', help='predict age, gender, both of them, or the joint cross-product', choices=('age', 'gender', 'both', 'joint'), default='both') parser.add_argument('--num-epochs', help='Number of epochs', default=50) parser.add_argument('--dim-rnn', help='dimensionality of hidden RNN layer', default=50, type=int) parser.add_argument('--dim-emb', help='dimensionality of word embeddings', default=100, type=int) parser.add_argument('--dim-out', help='dimensionality of output layer', default=32, type=int) # parser.add_argument('--dropout', help='dropout probability for final sentence representation', default=0.0, type=float) parser.add_argument('--batch-size', help='batch size', default=1, type=int) parser.add_argument('--max-len', help='Maximum length of input', default=100, type=int) args = parser.parse_args() # Read in data, mapping words to integers datasets = read_datasets(args.train, args.test, args.dev, "gender") # for now just gender train_dataset = datasets['train'] dev_dataset = datasets['dev'] word_mapper = datasets['word_mapper'] label_mapper = datasets['label_mapper'] # Create dense representations of datasets X_train, y_train = make_dense(train_dataset['sentences'], args.max_len, train_dataset['labels'], len(label_mapper)) train_sent_lens = np.array([len(sent) for sent in train_dataset['sentences']]) X_dev, y_dev = make_dense(dev_dataset['sentences'], args.max_len, dev_dataset['labels'], len(label_mapper)) dev_sent_lens = np.array([len(sent) for sent in dev_dataset['sentences']]) assert len(X_train) == len(y_train)