def main(unused_argv): assert FLAGS.input_file_pattern,"--input_file_pattern is required" assert FLAGS.train_dir,"--train_dir is required" model_config=configuration.ModelConfig() model_config.input_file_pattern=FLAGS.input_file_pattern model_config.inception_checkpoint_file=FLAGS.inception_checkpoint_file training_config=configuration.TrainingConfig() train_dir=FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s",train_dir) tf.gfile.MakeDirs(train_dir) g=tf.Graph() with g.as_default(): model=show_and_tell_model.ShowAndTellModel(model_config,mode="train",train_inception=FLAGS.train_inception) model.build() learning_rate_decay_fn=None if FLAGS.train_inception: learning_rate=tf.constant(training_config.train_inception_learning_rate) else: learning_rate=tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor>0: num_batches_per_epoch=(training_config.num_examples_per_epoch/model_config.batch_size) decay_steps=int(num_batches_per_epoch*training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate,global_step): return tf.train.exponential_decay(learning_rate,global_step,decay_steps=decay_steps,decay_rate=training_config.learning_rate_decay_factor,staircase=True) learning_rate_decay_fn=_learning_rate_decay_fn train_op=tf.contrib.layers.optimize_loss(loss=model.total_loss,global_step=model.global_step,learning_rate=learning_rate,optimizer=training_config.optimizer,clip_gradients=training_config.clip_gradients,learning_rate_decay_fn=learning_rate_decay_fn) saver=tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) tf.contrib.slim.learning.train(train_op,train_dir,log_every_n_steps=FLAGS.log_every_n_steps,graph=g,global_step=model.global_step,number_of_steps=FLAGS.number_of_steps,init_fn=model.init_fn,saver=saver)
def model_fn(features, labels, mode, params): im_mode = MODEKEY_TO_MODE[mode] model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() model = show_and_tell_model.ShowAndTellModel( model_config, mode=im_mode, train_inception=FLAGS.train_inception) model.build_model_for_tpu(images=features["images"], input_seqs=features["input_seqs"], target_seqs=features["target_seqs"], input_mask=features["input_mask"]) optimizer = tf.train.GradientDescentOptimizer( learning_rate=training_config.initial_learning_rate) optimizer = tf.contrib.estimator.clip_gradients_by_norm( optimizer, training_config.clip_gradients) if FLAGS.use_tpu: optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize( model.total_loss, global_step=tf.train.get_or_create_global_step()) def scaffold_fn(): """Load pretrained Inception checkpoint at initialization time.""" return tf.train.Scaffold(init_fn=model.init_fn) return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=model.total_loss, train_op=train_op, scaffold_fn=scaffold_fn)
def main(unused_argv): assert FLAGS.checkpoint_dir, "--checkpoint_dir is required" model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() model_config.batch_size = 612 # Build The tf Graph g = tf.Graph() with g.as_default(): # Build the model,care about BN and scope-prefix with tf.variable_scope("train"): model = sim_model.SimModel(model_config, mode="inference") model.build() # Set up the Saver restore = tf.train.Saver() with g.as_default(): init = tf.global_variables_initializer() # start gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) sess.run(init) ##Restore ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: restore.restore(sess, ckpt.model_checkpoint_path) print('Successfully loaded model from %s' % ckpt.model_checkpoint_path) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) return #TODO care about batch_size preds_all = [] for feats in tqdm(reader.batch_inputs()): start_time = time.time() feed_dict = { model.input_seqs: feats[0], model.input_mask: feats[1], model.labels: feats[2], } score_value = sess.run(model.preds, feed_dict) preds_all.append(np.squeeze(score_value)) # generate submit test_ids = np.load('data/test/test_ids.npy') preds = np.hstack(preds_all) # * .75 assert len(test_ids) == len(preds) submission = pd.DataFrame({ 'is_duplicate': preds.ravel(), 'test_id': test_ids }) submission.to_csv('submit_logs/' + FLAGS.submit_name + '.csv', index=False) print("done!")
def main(_): assert FLAGS.model_type in _MODEL_LIST, 'Invalid model specified.' if not tf.gfile.Exists(FLAGS.train_log_dir): tf.gfile.MakeDirs(FLAGS.train_log_dir) config = configuration.get_configuration() config.batch_size = FLAGS.batch_size training_config = configuration.TrainingConfig() g = tf.Graph() with g.as_default(): # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. if FLAGS.model_type == 'multi': vae = convolutional_multi_vae.ConvolutionalMultiVae( config, mode='train', split_name='train') elif FLAGS.model_type == 'single' or FLAGS.model_type == 'kronecker': raise NotImplementedError("%s not implemented" % (FLAGS.model_type)) vae.build_model() optimizer = training_config.optimizer(training_config.learning_rate) tf.losses.add_loss(vae.loss) total_loss = tf.losses.get_total_loss() # Set up training. train_op = slim.learning.create_train_op(total_loss, optimizer, check_numerics=False) saver = vae.setup_saver() if config.loss_type == 'fwkl': init_fn = vae.get_forward_kl_init_fn(FLAGS.fwkl_init_dir) else: init_fn = None # Run training. slim.learning.train(train_op=train_op, init_fn=init_fn, logdir=FLAGS.train_log_dir, graph=g, number_of_steps=FLAGS.max_number_of_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, saver=saver)
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = show_and_tell_model.ShowAndTellModel(model_config, mode="train") model.build() # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=None, optimizer=tf.train.AdamOptimizer()) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Run training. tf.contrib.slim.learning.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, save_interval_secs=300)
def main(unused_argv): # Parse arguments. parser = argparse.ArgumentParser() args = parse_arguments(parser) # Model configuration. model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() # Create training directory. train_dir = args.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Load MNIST data. mnist = input_data.read_data_sets('MNIST') # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. the_model = model.DAE(model_config) the_model.build() # Set up the learning rate. learning_rate = tf.constant(training_config.learning_rate) # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=the_model.total_loss, global_step=the_model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver() # Run training. print("Training") with tf.Session() as sess: print("Initializing parameters") sess.run(tf.global_variables_initializer()) for step in range(1, args.number_of_steps): # Read batch. batch = mnist.train.next_batch(model_config.batch_size)[0] # Create a noisy version of the batch. noisy_batch = utils.add_noise(batch) # Prepare the dictionnary to feed the data to the graph. feed_dict = { "images:0": batch, "noisy_images:0": noisy_batch, "phase_train:0": True } # Run training _, loss = sess.run([train_op, the_model.total_loss], feed_dict=feed_dict) if step % 50 == 0: # Save checkpoint. ave_path = saver.save(sess, train_dir + '/model.ckpt') # Print Loss. print("Step:", '%06d' % (step), "cost=", "{:.9f}".format(loss)) print('Finished training ...') print('Start testing ...') # load batch. testing_data = mnist.test.images # Plot the Original Image # Plot the Denoised Image # Create a noisy version of the data. corrupted_testing = utils.add_noise(testing_data) ori_plot = corrupted_testing[:10] count = 1 for img in ori_plot: name = 'ori_img' + str(count) path = 'img/' + name count += 1 plot_image(img.reshape((28, 28)), name, path) # Prepare the dictionnary to feed the data to the graph. feed_dict = { "images:0": testing_data, "noisy_images:0": corrupted_testing, "phase_train:0": False } # Compute the loss reconstruc, loss = sess.run( [the_model.reconstructed_images, the_model.total_loss], feed_dict=feed_dict) ori_plot = reconstruc[:10] count = 1 for img in ori_plot: name = 'de_img' + str(count) path = 'img/' + name count += 1 plot_image(img.reshape((28, 28)), name, path) print(loss) print("Testing loss= ", loss)
def main(unused_argv): '''assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required"''' model_config = configuration.ModelConfig() model_config.input_file_pattern = base_folder + "/mscoco/train-?????-of-00256" #FLAGS.input_file_pattern model_config.inception_checkpoint_file = base_folder + "/model/inception_v3.ckpt" #FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = base_folder + "/model/3m_wa_n_all/train" #FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = show_and_tell_model.ShowAndTellModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() # Set up the learning rate. learning_rate_decay_fn = None if FLAGS.train_inception: learning_rate = tf.constant( training_config.train_inception_learning_rate) else: learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = ( training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. var_list = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES) #, scope = the_scope) print("VL ", var_list) saver = tf.train.Saver( var_list=var_list, max_to_keep=training_config.max_checkpoints_to_keep, save_relative_paths=True) var_list2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='(?!attention_var)') print("test", var_list2) saver2 = tf.train.Saver(var_list=var_list2) attVar = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="attention_var") local_init_op = tf.variables_initializer(attVar) def init_fn_2(sess): tf.logging.info("Restoring the original weight %s", model_config.inception_checkpoint_file) checkpoint_path2 = model_config.base_folder + "/model/3m/train/model.ckpt-3003677" saver2.restore(sess, checkpoint_path2) sess.run(local_init_op) config = tf.ConfigProto() config.gpu_options.visible_device_list = "0" gpu_fract = .75 config.gpu_options.per_process_gpu_memory_fraction = gpu_fract # Run training. tf.contrib.slim.learning.train( train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=init_fn_2, saver=saver, #local_init_op = local_init_op, session_config=config)
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern # model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. with tf.Graph().as_default(): dataset = input_ops.process_pickles_and_augment( "/Users/hanshiyi/workspace/MatchingNetworks-OSL/data/omniglot/processed-data", 0.02, 'train') eval_dataset = input_ops.process_pickles_and_augment( "/Users/hanshiyi/workspace/MatchingNetworks-OSL/data/omniglot/processed-data", 0.02, 'validation') model = matching_networks_model.MatchingNetworks( model_config, mode="train", dataset=dataset, train_model=FLAGS.train_model) model.build() # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = train_op_fun(model.loss, model.global_step) #test_acc = tf.reduce_mean(tf.to_float(model.top_k)) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Build the summary operation from the last tower summaries. summary_op = tf.contrib.deprecated.merge_all_summaries() avg_train_acc = 0.0 summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.number_of_steps): start_time = time.time() shuffled_label_list = input_ops.label_iterator( dataset, model_config.num_classes) batch_s_sounds, batch_s_labels = input_ops.data_iterator( dataset, model.config.batch_size_s, shuffled_label_list, sess) batch_s_labels = np.expand_dims(batch_s_labels, 1) test_sound, test_label = input_ops.data_iterator( dataset, model.config.batch_size_b, shuffled_label_list, sess) test_sound = np.expand_dims(test_sound[0], 0) test_label = np.expand_dims(test_label[0], 0).reshape((1, 1)) # Prepare dictionnary to feed the session with feed_dict = { model.support_set_sounds: batch_s_sounds, model.support_set_labels: batch_s_labels, model.test_sound: test_sound, model.test_sound_labels: test_label } pred, _, train_acc, loss_val, summary = sess.run( [ model.prediction, train_op, model.train_accuracy, model.loss, summary_op ], feed_dict=feed_dict) # print (pred) duration = time.time() - start_time avg_train_acc += train_acc assert not np.isnan(loss_val), 'Model diverged with loss = NaN' if step % 10 == 0: number_of_shot = model.config.batch_size_s / float(duration) format_str = ( '%s: episode %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/shot) train_acc = %.4f') print(format_str % (datetime.now(), step, loss_val, number_of_shot, duration, avg_train_acc / 10.0)) avg_train_acc = 0 summary_writer.add_summary(summary, step) if step % 100 == 0: batch_s_sounds, batch_s_labels, batch_test, batch_test_label = input_ops.eval_data_iterator( eval_dataset, model.config.batch_size_b, model_config.num_classes, sess) batch_s_labels = np.expand_dims(batch_s_labels, 1) batch_test = np.expand_dims(batch_test[0], 0) batch_test_label = np.expand_dims(batch_test_label[0], 0).reshape((1, 1)) # Prepare dictionnary to feed the session with feed_dict = { model.support_set_sounds: batch_s_sounds, model.support_set_labels: batch_s_labels, model.test_sound: batch_test, model.test_sound_labels: batch_test_label } _, test_summary = sess.run([model.test_acc, model.test_summ], feed_dict=feed_dict) summary_writer.add_summary(test_summary, step) run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary_writer.add_run_metadata(run_metadata, 'step%d' % step) # Save the model checkpoint periodically. if step % 500 == 0 or (step + 1) == FLAGS.number_of_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def evaluate(self): g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() #initializer method initializer = tf.random_uniform_initializer( minval=-model_config.initializer_scale, maxval=model_config.initializer_scale) seq_embeddings = None image_feed = tf.placeholder(dtype=tf.float32, shape=[2048], name="image_feed") input_feed = tf.placeholder(dtype=tf.int32, shape=[None], # batch_size name="input_feed") # Process image and insert batch dimensions. image_fea = tf.expand_dims(image_feed, 0) #input_seqs = tf.expand_dims(input_feed, 1) input_seqs = input_feed #image_fea = image_feed with tf.variable_scope("seq_embedding"), tf.device("/gpu:0"): embedding_map = tf.get_variable( name="map", shape=[model_config.vocab_size, model_config.embedding_size], initializer=initializer) seq_embeddings = tf.nn.embedding_lookup(embedding_map, input_seqs) with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=image_fea, num_outputs=model_config.embedding_size, activation_fn=None, weights_initializer=initializer, biases_initializer=None, scope=scope) W = tf.get_variable('W', shape=[4, model_config.num_lstm_units, model_config.num_lstm_units], initializer=initializer) U = tf.get_variable('U', shape=[4, model_config.num_lstm_units, model_config.num_lstm_units], initializer=initializer) def step(prev, x): # gather previous internal state and output state st_1, ct_1 = tf.unstack(prev) #### # GATES # # input gate i = tf.sigmoid(tf.matmul(x,U[0]) + tf.matmul(st_1,W[0])) # forget gate f = tf.sigmoid(tf.matmul(x,U[1]) + tf.matmul(st_1,W[1])) # output gate o = tf.sigmoid(tf.matmul(x,U[2]) + tf.matmul(st_1,W[2])) # gate weights g = tf.tanh(tf.matmul(x,U[3]) + tf.matmul(st_1,W[3])) ### # new internal cell state ct = ct_1*f + g*i # output state st = tf.tanh(ct)*o return tf.stack([st, ct]) image_embeddings = tf.stack([image_embeddings,image_embeddings]) #image_embeddings = tf.expand_dims(image_embeddings, 0) image_embeddings = tf.transpose(image_embeddings,[1,0,2],name='initial_state') state_feed = tf.placeholder(dtype=tf.float32, shape=[None,2, model_config.num_lstm_units], name="state_feed") #state_tuple = tf.split(value=state_feed, num_or_size_splits=2, axis=1) state_feed = tf.transpose(state_feed,[1,0,2]) seq_embeddings = tf.reshape(seq_embeddings,[-1,model_config.num_lstm_units]) states = step(state_feed,seq_embeddings) #states = tf.scan(step, #tf.transpose(seq_embeddings, [1,0,2]), #initializer=state_feed) tf.transpose(states, [1,0,2],name='state') #states = tf.Print(states, ["lstm states shape:",tf.shape(states)]) states = states[0] #states = tf.Print(states, ["lstm states REshape:",tf.shape(states)]) lstm_outputs = tf.reshape(states, [-1, model_config.num_lstm_units]) #lstm_outputs = tf.Print(lstm_outputs, [tf.shape(lstm_outputs), "lstm_outputs"]) with tf.variable_scope("logits") as logits_scope: logits = tf.contrib.layers.fully_connected( inputs=lstm_outputs, num_outputs=model_config.vocab_size, activation_fn=None, weights_initializer=initializer, scope=logits_scope) tf.nn.softmax(logits, name="softmax") global_step = tf.Variable( initial_value=0, name="global_step", trainable=False, collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) g.as_default() sess = tf.InteractiveSession(graph=g) #load the trained model with sess.as_default(): saver.restore(sess, "log/model.ckpt-19") print("finish initialization") x= self.val[0] lengths = [len(s) for s in x] n_samples = len(x) maxlen = np.max(lengths) #remove duplicate. Because one image has many captions. val_re = [] for i in range(n_samples): if self.val[1][i] not in val_re: val_re.append(self.val[1][i]) n_samples = len(val_re) print("n_samples:"+str(n_samples)+"maxlen:"+str(maxlen)) z = np.array([self.img_feats[:,val_re[t]]for t in range(n_samples)]) cap = np.zeros(( n_samples,maxlen)) generator = caption_generator_rawlstm.CaptionGenerator() #generate captions.feed word one by one to the model.Start with 6800('#').Stop when get 0('.') for num in range(n_samples): if num%100==0: print(num) if 1 : captions = generator.beam_search(sess, z[num]) for s in range(len(captions[0].sentence)-1): cap[num][s]= captions[0].sentence[s+1] else: initial_state = sess.run(fetches="initial_state:0", feed_dict={"image_feed:0": z[num]}) input_feed = np.array([6800]) state_feed = initial_state for s in range(maxlen): softmax_output, state_output = sess.run( fetches=["softmax:0", "state:0"], feed_dict={ "input_feed:0": input_feed, "state_feed:0": state_feed, }) #print(softmax_output.shape) softmax_output = softmax_output.reshape(softmax_output.shape[1]) input_feed = [np.argsort(-softmax_output)[0]] #print(softmax_output.shape) #print(input_feed) state_feed = state_output cap[num][s] = input_feed[0] if input_feed[0]==0: #print(cap[num]) break #get the real word by index precaptext=[] for i in range(n_samples): temcap=[] for j in range(maxlen): if cap[i][j]!=0: temcap.append(self.ixtoword[cap[i][j]]) else: break precaptext.append(" ".join(temcap)) #save the results to 'coco_5k_test.txt' print('write generated captions into a text file...') open('./coco_5k_test.txt', 'w').write('\n'.join(precaptext))
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # May use a different learning rate training_config.initial_learning_rate = FLAGS.learning_rate # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = polyvore_model.PolyvoreModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() # Set up the learning rate. learning_rate = tf.constant(training_config.initial_learning_rate) learning_rate_decay_fn = None if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) # saver = tf.train.Saver(keep_checkpoint_every_n_hours=0.1) # Run training. tf.contrib.slim.learning.train( train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver)
def main(_): if not FLAGS.input_train_file_pattern: raise ValueError("--input_train_file_pattern is required.") if not FLAGS.input_valid_file_pattern: raise ValueError("--input_valid_file_pattern is required.") if not FLAGS.vocab_file: raise ValueError("--vocab_file is required.") if not FLAGS.train_dir: raise ValueError("--train_dir is required.") if not tf.gfile.IsDirectory(FLAGS.train_dir): tf.gfile.MakeDirs(FLAGS.train_dir) model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() vocab = data_utils.load_vocab(FLAGS.vocab_file) model_config.vocab_size = len(vocab) pre_emb = [] if model_config.static_embedding: if not FLAGS.w2v_file: raise ValueError("--w2v_file is required.") tf.logging.info("Loading pre-trainend word embeddings.") word_vecs = data_utils.load_bin_vec(FLAGS.w2v_file, vocab) pre_emb = data_utils.load_vocab_embeddings( word_vecs, vocab, model_config.word_embedding_dim) g = tf.Graph() with g.as_default(): training_dataset = data_utils.create_input_data( FLAGS.input_train_file_pattern, model_config.shuffle, model_config.batch_size) validation_dataset = data_utils.create_input_data( FLAGS.input_valid_file_pattern, model_config.shuffle, model_config.batch_size) iterator = tf.data.Iterator.from_structure( training_dataset.output_types, training_dataset.output_shapes) next_sents, next_labels = iterator.get_next() training_init_op = iterator.make_initializer(training_dataset) validation_init_op = iterator.make_initializer(validation_dataset) tf.logging.info("Building training graph.") with tf.variable_scope("model"): training_model = text_cnn.TextCNN(model_config) training_model.build(next_sents, next_labels) # optimizer = tf.train.AdadeltaOptimizer( # learning_rate=training_config.learning_rate, # rho=training_config.learning_rate_decay_rate, # epsilon=training_config.learning_rate_epsilon) optimizer = tf.train.AdamOptimizer( learning_rate=training_config.learning_rate) grads, vars = zip(*optimizer.compute_gradients(training_model.loss)) if training_config.clip_gradients is not None: grads, _ = tf.clip_by_global_norm(grads, training_config.clip_gradients) train_op = optimizer.apply_gradients( zip(grads, vars), global_step=training_model.global_step) with tf.variable_scope("model", reuse=True): validation_model = text_cnn.TextCNN(model_config) validation_model.build(next_sents, next_labels) global_init_op = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session(graph=g) as sess: sess.run(global_init_op) max_accuracy = 0.0 epoch = 0 while epoch < training_config.num_epochs: sess.run(training_init_op) tf.logging.info("Epoch %d" % epoch) total_training_loss = 0.0 total_training_accuracy = 0.0 training_batch = 0 feed_dict = {} if model_config.static_embedding: feed_dict = {training_model.word_emb_placeholder: pre_emb} while True: try: _, training_loss, training_accuracy = sess.run( [ train_op, training_model.loss, training_model.accuracy ], feed_dict=feed_dict) tf.logging.info("Batch %d, loss: %f" % (training_batch, training_loss)) total_training_loss += training_loss total_training_accuracy += training_accuracy training_batch += 1 except tf.errors.OutOfRangeError: break training_loss = total_training_loss / training_batch training_accuracy = total_training_accuracy / training_batch tf.logging.info("Training loss: %f, accuracy: %f" % (training_loss, training_accuracy)) sess.run(validation_init_op) total_validation_loss = 0.0 total_validation_accuracy = 0.0 validation_batch = 0 if model_config.static_embedding: feed_dict = {validation_model.word_emb_placeholder: pre_emb} while True: try: validation_loss, validation_accuracy = sess.run( [validation_model.loss, validation_model.accuracy], feed_dict=feed_dict) total_validation_loss += validation_loss total_validation_accuracy += validation_accuracy validation_batch += 1 except tf.errors.OutOfRangeError: break validation_loss = total_validation_loss / validation_batch validation_accuracy = total_validation_accuracy / validation_batch tf.logging.info("Validation loss: %f, accuracy: %f" % (validation_loss, validation_accuracy)) if validation_accuracy > max_accuracy: max_accuracy = validation_accuracy saver.save(sess, os.path.join(FLAGS.train_dir, "model.ckpt"), global_step=training_model.global_step) epoch += 1
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) start_time = time.time() g = tf.Graph() with g.as_default(): model = img2txt.Model(config=model_config, mode="train", rnn_type=FLAGS.rnn_type, train_inception=FLAGS.train_inception) model.build() # Set up the learning rate. learning_rate_decay_fn = None if FLAGS.train_inception: learning_rate = tf.constant( training_config.train_inception_learning_rate) else: learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = ( training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Run training. tf.contrib.slim.learning.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver) end_time = time.time() duration = end_time - start_time print( time.strftime('Start time :%Y-%m-%d %H:%M:%S', time.localtime(start_time))) print( time.strftime('End time :%Y-%m-%d %H:%M:%S', time.localtime(end_time))) m, s = divmod(duration, 60) h, m = divmod(m, 60) print("Total time %d:%02d:%02d" % (h, m, s))
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.model_dir, "--model_dir is required" if FLAGS.use_tpu: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tpu_grpc_url = tpu_cluster_resolver.get_master() else: tpu_grpc_url = '' run_config = tf.contrib.tpu.RunConfig( master=tpu_grpc_url, model_dir=FLAGS.model_dir, save_checkpoints_steps=1000, keep_checkpoint_max=None, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, )) estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, params={ "input_file_pattern": FLAGS.input_file_pattern, "use_tpu": FLAGS.use_tpu, "mode": FLAGS.mode, }) training_config = configuration.TrainingConfig() if FLAGS.mode == "train": estimator.train( input_fn=input_fn, max_steps=FLAGS.train_steps, ) else: # Run evaluation when there"s a new checkpoint for ckpt in tf.contrib.training.checkpoints_iterator(FLAGS.model_dir): tf.logging.info("Starting to evaluate.") try: eval_results = estimator.evaluate( input_fn=input_fn, steps=(training_config.num_examples_per_epoch // FLAGS.eval_batch_size), checkpoint_path=ckpt) tf.logging.info("Eval results: %s", eval_results) current_step = int(os.path.basename(ckpt).split("-")[1]) if current_step >= FLAGS.train_steps: tf.logging.info( "Evaluation finished after training step %d" % current_step) break except tf.errors.NotFoundError: tf.logging.info( "Checkpoint %s no longer exists, skipping checkpoint" % ckpt)
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = show_and_tell_model.ShowAndTellModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() # model.images: [batch_size, 299, 299, 3] image scaled to [-1, 1] # model.input_seqs: [batch_size, 20] numpy array of int64, padding to 20 with 0s # Call visualize_input(model) to visualize input for debug purposes # Set up the learning rate. learning_rate_decay_fn = None if FLAGS.train_inception: learning_rate = tf.constant( training_config.train_inception_learning_rate) else: learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = ( training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Run training. tf.contrib.slim.learning.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver)
def main(_): if not FLAGS.input_train_file_pattern: raise ValueError("--input_train_file_pattern is required.") if not FLAGS.input_valid_file_pattern: raise ValueError("--input_valid_file_pattern is required.") if not FLAGS.glove_file: raise ValueError("--glove_file is required.") if not FLAGS.train_dir: raise ValueError("--train_dir is required.") if not tf.gfile.IsDirectory(FLAGS.train_dir): tf.gfile.MakeDirs(FLAGS.train_dir) model_config = configuration.ModelConfig() train_config = configuration.TrainingConfig() tf.logging.info("Load pre-trained Glove embeddings.") pretrained_emb = data_utils.load_pretrained_embeddings( FLAGS.glove_file, model_config.vocab_size) g = tf.Graph() with g.as_default(): # Build training and valid dataset. training_dataset = data_utils.create_input_data( FLAGS.input_train_file_pattern, model_config.shuffle, model_config.batch_size) valid_dataset = data_utils.create_input_data( FLAGS.input_valid_file_pattern, model_config.shuffle, model_config.batch_size) iterator = tf.data.Iterator.from_structure( training_dataset.output_types, training_dataset.output_shapes) (next_text_ids, next_text_mask, next_hypothesis_ids, next_hypothesis_mask, next_label) = iterator.get_next() training_iterator_init = iterator.make_initializer(training_dataset) valid_iterator_init = iterator.make_initializer(valid_dataset) tf.logging.info("Building training graph.") learning_rate_placeholder = tf.placeholder(tf.float32, [], name="learning_rate") with tf.variable_scope("model"): # model_config.encoder_dropout = 0.0 # model_config.classifier_dropout = 0.0 model_train = infer_model.InferModel(model_config, mode="train") model_train.build(next_text_ids, next_text_mask, next_hypothesis_ids, next_hypothesis_mask, next_label) optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate_placeholder) grads, vars = zip(*optimizer.compute_gradients( model_train.target_cross_entropy_loss)) if train_config.clip_gradients is not None: grads, _ = tf.clip_by_global_norm(grads, train_config.clip_gradients) train_op = optimizer.apply_gradients( zip(grads, vars), global_step=model_train.global_step) with tf.variable_scope("model", reuse=True): # model_config.encoder_dropout = 0.0 # model_config.classifier_dropout = 0.0 model_valid = infer_model.InferModel(model_config, mode="eval") model_valid.build(next_text_ids, next_text_mask, next_hypothesis_ids, next_hypothesis_mask, next_label) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session(graph=g) as sess: # Initialize global variables. sess.run(init) # Assign pre-trained word embeddings to the model. sess.run(model_train.word_emb_assign_op, feed_dict={model_train.word_emb_placeholder: pretrained_emb}) lr = train_config.initial_learning_rate prev_accuracy = 0.0 max_accuracy = 0.0 epoch = 0 while lr > train_config.learning_rate_threshold and epoch <= train_config.num_epochs: # Initialize the iterator on training and valid dataset. sess.run(training_iterator_init) tf.logging.info("Epoch %d, learning rate: %f" % (epoch, lr)) total_train_batch = 0 total_train_loss = 0.0 total_train_accuracy = 0.0 while True: try: _, train_loss, train_accuracy = sess.run( [ train_op, model_train.target_cross_entropy_loss, model_train.eval_accuracy ], feed_dict={learning_rate_placeholder: lr}) total_train_batch += 1 total_train_loss += train_loss total_train_accuracy += train_accuracy tf.logging.info("Batch %d, loss: %f" % (total_train_batch, train_loss)) except tf.errors.OutOfRangeError: break train_loss = total_train_loss / total_train_batch train_accuracy = total_train_accuracy / total_train_batch tf.logging.info("Train loss: %f, accuracy: %f" % (train_loss, train_accuracy)) sess.run(valid_iterator_init) total_valid_batch = 0 total_valid_loss = 0.0 total_valid_accuracy = 0.0 while True: try: valid_loss, valid_accuracy = sess.run([ model_valid.target_cross_entropy_loss, model_valid.eval_accuracy ]) total_valid_batch += 1 total_valid_loss += valid_loss total_valid_accuracy += valid_accuracy except tf.errors.OutOfRangeError: break valid_loss = total_valid_loss / total_valid_batch valid_accuracy = total_valid_accuracy / total_valid_batch tf.logging.info("Validate loss: %f, accuracy: %f" % (valid_loss, valid_accuracy)) if valid_accuracy > prev_accuracy: lr *= train_config.learning_rate_decay_factor else: lr /= 5 if valid_accuracy > max_accuracy: max_accuracy = valid_accuracy saver.save(sess, os.path.join(FLAGS.train_dir, "model.ckpt"), global_step=model_train.global_step) prev_accuracy = valid_accuracy epoch += 1
def main(unused_argv): model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file model_config.vgg19_checkpoint_file = FLAGS.vgg19_checkpoint_file model_config.word_embedding_file = FLAGS.word_embedding_file training_config = configuration.TrainingConfig() if cnn_model == 'InceptionV3': trained_models_dir = inception_trained_models_dir elif cnn_model == 'VGG19': trained_models_dir = vgg_trained_models_dir else: print('Unknown cnn model {0}'.format(cnn_model)) exit(0) if not tf.gfile.IsDirectory(trained_models_dir): tf.logging.info("Creating training directory: %s", trained_models_dir) tf.gfile.MakeDirs(trained_models_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model, train from scratch model = show_and_tell_model.ShowAndTellModel( model_config, mode="train", cnn_model = FLAGS.cnn_model, train_cnn_model=FLAGS.train_cnn_model, custom_word_embedding=FLAGS.custom_word_embedding) model.build() # Set up the learning rate. learning_rate_decay_fn = None if FLAGS.train_cnn_model: learning_rate = tf.constant( training_config.train_inception_learning_rate) else: learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) # Run training. tf.contrib.slim.learning.train( train_op, trained_models_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver)
def train_op(): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" vocab = vocabulary.Vocabulary(FLAGS.vocab_file) model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) logger = set_logger(logPath, time_str, os.path.basename(__file__)) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = captiongan_model.CaptionGAN(model_config, mode="train") model.build_graph() # Set up the learning rate. learning_rate_g = tf.constant(training_config.initial_learning_rate_g) learning_rate_d = tf.constant(training_config.initial_learning_rate_d) num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) # model.saveFreq = num_batches_per_epoch # learning rate decay if training_config.learning_rate_decay_factor > 0: decay_steps_g = int(num_batches_per_epoch * training_config.num_epochs_per_decay) decay_steps_d = int(num_batches_per_epoch * training_config.num_epochs_per_decay) learning_rate_g = \ tf.train.exponential_decay(learning_rate_g, model.global_step, decay_steps=decay_steps_g, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_d = \ tf.train.exponential_decay(learning_rate_d, model.global_step, decay_steps=decay_steps_d, decay_rate=training_config.learning_rate_decay_factor, staircase=True) # AdamOptimizer, AdagradOptimizer g_solver = tf.train.RMSPropOptimizer(learning_rate=learning_rate_g).\ minimize(loss=model.g_loss, global_step=model.global_step, var_list=model.generator_variables) d_solver = tf.train.RMSPropOptimizer(learning_rate=learning_rate_d). \ minimize(loss=model.d_loss, global_step=model.global_step, var_list=model.discriminator_variables) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(FLAGS.number_of_steps): if training_config.debug: print('step ' + str(step) + ' begins') if training_config.debug: logger.info('g_loss mmd before train: ') logger.info(sess.run(model.g_loss)) _, gc = sess.run([g_solver, model.g_loss]) if training_config.debug: logger.info('g_loss, mmd before train: ') logger.info(sess.run(model.g_loss)) if np.mod(step, training_config.dispFreq) == 0: fake_caption_embedding_ids = sess.run( model.caption_embedding_ids) dc = sess.run(model.d_loss) print(' cost_g ' + str(gc) + ' cost_d ' + str(dc)) print("Generated:" + " ".join([ vocab.id_to_word(x) for x in fake_caption_embedding_ids ])) if np.mod(step, training_config.dg_ratio) == 0: if training_config.debug: logger.info('model.d_loss before train: ') logger.info(sess.run([model.d_loss])) _, dc = sess.run([d_solver, model.d_loss]) if training_config.debug: logger.info('model.d_loss after train: ') logger.info(sess.run([model.d_loss])) if np.mod(step, training_config.dispFreq) == 0: logger.info('Cost D {}'.format(dc)) if np.mod(step, training_config.saveFreq) == 0: logger.info('Saving model...') save_path = saver.save(sess, logPath + time_str + ".ckpt") logger.info('Model saved in file: %s' % save_path)
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = show_and_tell_model.ShowAndTellModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() # Set up the learning rate. learning_rate_decay_fn = None if FLAGS.train_inception: learning_rate = tf.constant( training_config.train_inception_learning_rate) else: learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = ( training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Run training.automatic initialize the threads? summary_op = tf.summary.merge_all() save_summaries_secs = 10 summary_writer = tf.summary.FileWriter('./log_train') tf.contrib.slim.learning.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, summary_op=summary_op, save_summaries_secs=save_summaries_secs, summary_writer=summary_writer)
def main(unused_argv): assert FLAGS.train_dir, "--train_dir is required" np.random.seed(1) model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build The tf Graph g = tf.Graph() with g.as_default(): # Build the model with tf.variable_scope("train"): model = sim_model.SimModel(model_config, mode="train") model.build() #TODO(error ? placeholder) # with tf.variable_scope("train",reuse=True): # eval_model = sim_model.SimModel(model_config,mode="eval") # eval_model.build() # Set up the learning rate. learning_rate_decay_fn = None learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1.0) # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) ################# # Summary ################# for var in tf.trainable_variables(): tf.summary.histogram("params/" + var.op.name, var) # Set up the Saver saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # restore = tf.train.Saver() # Run training. with g.as_default(): global_step = model.global_step init = tf.global_variables_initializer() dev_summary_op = tf.summary.merge_all() train_summary_op = dev_summary_op ################## # session config ################## gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.35) sess = tf.Session(config=tf.ConfigProto( device_count={"CPU": 4}, # limit to num_cpu_core CPU usage intra_op_parallelism_threads=2, inter_op_parallelism_threads=2, gpu_options=gpu_options, allow_soft_placement=True)) sess.run(init) ################### # debug # https://www.tensorflow.org/programmers_guide/debugger # `run -f has_inf_or_nan` ################### if FLAGS.debug == True: sess = tf_debug.LocalCLIDebugWrapperSession(sess) sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) ################### # Restore checkpoint #################### ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Successfully loaded model from %s' % ckpt.model_checkpoint_path) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) summary_writer = tf.summary.FileWriter(os.path.join( FLAGS.train_dir, "summaries", "train"), graph=sess.graph) dev_summary_writer = tf.summary.FileWriter(os.path.join( FLAGS.train_dir, "summaries", "dev"), graph=sess.graph) #TODO should read data test_reader = reader.Test_batch() step = 0 for feats in reader.batch_inputs(): step = step + 1 if step > FLAGS.number_of_steps: break start_time = time.time() feed_dict = { model.input_seqs: feats[0], model.input_mask: feats[1], # model.feat:feats[2], model.labels: feats[2], } loss_value, acc_value = sess.run([train_op, model.acc], feed_dict) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 50 == 0: # examples_per_sec = model_config.batch_size / float(duration) format_str = ('%s: step %d, loss = %.2f ,acc = %.2f') print(format_str % (datetime.now(), step, np.mean(loss_value), acc_value)) if step % 200 == 0: summary_str = sess.run(train_summary_op, feed_dict) summary_writer.add_summary(summary_str, step) if step % 400 == 0: dev_data = test_reader.next() feed_dict = { model.input_seqs: dev_data[0], model.input_mask: dev_data[1], # model.feat:dev_data[2], model.labels: dev_data[2], } dev_summary_str = sess.run(dev_summary_op, feed_dict) dev_summary_writer.add_summary(dev_summary_str, step) if step % 5000 == 0 or (step + 1) == FLAGS.number_of_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
from __future__ import print_function import tensorflow as tf from datetime import datetime import configuration from ShowAndTellModel import build_model from coco_utils import load_coco_data, sample_coco_minibatch, decode_captions from image_utils import image_from_url, write_text_on_image import numpy as np import os import sys import argparse model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() FLAGS = None savedModelName = 'model1.0.ckpt' mode = 'train' def _run_validation(sess, data, batch_size, model, keep_prob): """ Make a single gradient update for batch data. """ # Make a minibatch of training data minibatch = sample_coco_minibatch(data, batch_size=batch_size, split='val') captions, features, urls = minibatch captions_in = captions[:, 0].reshape(-1, 1)
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" # Create training directory. train_dir = FLAGS.train_dir filename_saved_model = os.path.join(FLAGS.train_dir, 'im2txt') if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) save_flags(os.path.join(FLAGS.train_dir, 'flags.txt')) model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() vocab = vocabulary.Vocabulary(FLAGS.vocab_file) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model (teacher-forcing mode). model = show_and_tell_model.ShowAndTellModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() # Build the model (free-running mode). model_free = show_and_tell_model.ShowAndTellModel( model_config, mode="free", train_inception=FLAGS.train_inception, vocab=vocab, reuse=True) model_free.build([ model.images, model.input_seqs, model.target_seqs, model.input_mask ]) # Build the model for validation with variable sharing model_valid = show_and_tell_model.ShowAndTellModel(model_config, mode="inference", reuse=True) model_valid.build() # get teacher behavior teacher_outputs, [teacher_state_c, teacher_state_h] = model.behavior teacher_state_c = tf.expand_dims(teacher_state_c, axis=1) teacher_state_h = tf.expand_dims(teacher_state_h, axis=1) # get free behavior free_outputs, [free_state_c, free_state_h] = model_free.behavior free_state_c = tf.expand_dims(free_state_c, axis=1) free_state_h = tf.expand_dims(free_state_h, axis=1) # get free sentence free_sentence = model_free.free_sentence # prepare behavior to be LSTM's input teacher_behavior = tf.concat( [teacher_outputs, teacher_state_c, teacher_state_h], axis=1) free_behavior = tf.concat([free_outputs, free_state_c, free_state_h], axis=1) d_lstm_cell = tf.contrib.rnn.BasicLSTMCell(model_config.num_lstm_units) d_lstm_cell = tf.contrib.rnn.DropoutWrapper( d_lstm_cell, input_keep_prob=model_config.lstm_dropout_keep_prob, output_keep_prob=model_config.lstm_dropout_keep_prob) with tf.variable_scope("discriminator") as scope_disc: teacher_lengths = tf.reduce_sum(model.input_mask, 1) + 2 free_lengths = tf.ones_like(teacher_lengths) * (30 + 2) # teacher-behavior into Discriminator-LSTM d_outputs_teacher, _ = tf.nn.dynamic_rnn( cell=d_lstm_cell, inputs=teacher_behavior, sequence_length=teacher_lengths, dtype=tf.float32, scope=scope_disc) # gather last outputs (deals with variable length of captions) teacher_lengths = tf.expand_dims(teacher_lengths, 1) batch_range = tf.expand_dims( tf.constant(np.array(range(model_config.batch_size)), dtype=tf.int32), 1) gather_idx = tf.concat([batch_range, teacher_lengths - 1], axis=1) d_last_output_teacher = tf.gather_nd(d_outputs_teacher, gather_idx) # concat inception feature d_teacher_concat = tf.concat( [d_last_output_teacher, model.image_embeddings], 1) # FC to get T/F logits # d_logits_teacher = tf.contrib.layers.fully_connected( inputs = d_last_output_teacher, #d_teacher_concat, d_logits_teacher = tf.contrib.layers.fully_connected( inputs=d_teacher_concat, num_outputs=2, activation_fn=None, weights_initializer=model.initializer, scope=scope_disc) scope_disc.reuse_variables() # teacher with wrong image wrong_image_embeddings = tf.concat( [model.image_embeddings[1:], model.image_embeddings[0:1]], 0) d_teacher_wrong_concat = tf.concat( [d_last_output_teacher, wrong_image_embeddings], 1) d_logits_teacher_wrong = tf.contrib.layers.fully_connected( inputs=d_teacher_wrong_concat, num_outputs=2, activation_fn=None, weights_initializer=model.initializer, scope=scope_disc) # free-behavior into Discriminator-LSTM d_outputs_free, _ = tf.nn.dynamic_rnn(cell=d_lstm_cell, inputs=free_behavior, sequence_length=free_lengths, dtype=tf.float32, scope=scope_disc) d_last_output_free = d_outputs_free[:, -1, :] d_free_concat = tf.concat( [d_last_output_free, model.image_embeddings], 1) # d_logits_free = tf.contrib.layers.fully_connected( inputs = d_last_output_free, #d_free_concat, d_logits_free = tf.contrib.layers.fully_connected( inputs=d_free_concat, num_outputs=2, activation_fn=None, weights_initializer=model.initializer, scope=scope_disc) d_accuracy_teacher = tf.reduce_mean( tf.cast(tf.argmax(d_logits_teacher, axis=1), tf.float32)) d_accuracy_teacher_wrong = tf.reduce_mean( tf.cast(1 - tf.argmax(d_logits_teacher_wrong, axis=1), tf.float32)) d_accuracy_free = tf.reduce_mean( tf.cast(1 - tf.argmax(d_logits_free, axis=1), tf.float32)) d_accuracy = (d_accuracy_teacher + d_accuracy_teacher_wrong + d_accuracy_free) / 3 NLL_loss = model.total_loss d_loss_teacher = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( name='d_loss_teacher', logits=d_logits_teacher, labels=tf.ones_like(d_logits_teacher))) d_loss_teacher_wrong = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( name='d_loss_teacher_wrong', logits=d_logits_teacher_wrong, labels=tf.zeros_like(d_logits_teacher_wrong))) d_loss_free = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( name='d_loss_free', logits=d_logits_free, labels=tf.zeros_like(d_logits_free))) d_loss = d_loss_teacher + d_loss_teacher_wrong + d_loss_free g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( name='g_loss', logits=d_logits_free, labels=tf.ones_like(d_logits_free))) g_and_NLL_loss = g_loss + NLL_loss # tf.summary summary_NLL_loss = tf.summary.scalar('NLL_loss', NLL_loss) summary_temp_list = [ tf.summary.scalar('d_loss', d_loss), tf.summary.scalar('d_loss_teacher', d_loss_teacher), tf.summary.scalar('d_loss_teacher_wrong', d_loss_teacher_wrong), tf.summary.scalar('d_loss_free', d_loss_free), tf.summary.scalar('d_accuracy', d_accuracy), tf.summary.scalar('d_accuracy_teacher', d_accuracy_teacher), tf.summary.scalar('d_accuracy_teacher_wrong', d_accuracy_teacher_wrong), tf.summary.scalar('d_accuracy_free', d_accuracy_free) ] summary_disc = tf.summary.merge(summary_temp_list) summary_temp_list = [ tf.summary.scalar('g_and_NLL_loss', g_and_NLL_loss), summary_NLL_loss, tf.summary.scalar('g_loss', g_loss), tf.summary.scalar('d_accuracy_free', d_accuracy_free) ] summary_gen = tf.summary.merge(summary_temp_list) # Set up the learning rate for training ops learning_rate_decay_fn = None if FLAGS.train_inception: learning_rate = tf.constant( training_config.train_inception_learning_rate) else: learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = ( training_config.num_examples_per_epoch // model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Collect trainable variables vars_all = [ v for v in tf.trainable_variables() if v not in model.inception_variables ] d_vars = [v for v in vars_all if 'discr' in v.name] g_vars = [v for v in vars_all if 'discr' not in v.name] # Set up the training ops. train_op_NLL = tf.contrib.layers.optimize_loss( loss=NLL_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn, variables=g_vars, name='optimize_NLL_loss') train_op_disc = tf.contrib.layers.optimize_loss( loss=d_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn, variables=d_vars, name='optimize_disc_loss') train_op_gen = tf.contrib.layers.optimize_loss( loss=NLL_loss + g_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn, variables=g_vars, name='optimize_gen_loss') # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) with tf.Session() as sess: # initialize all variables tf.global_variables_initializer().run() # load inception variables model.init_fn(sess) # Set up the training ops nBatches = num_batches_per_epoch summaryWriter = tf.summary.FileWriter(train_dir, sess.graph) # start input enqueue threads coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) counter = 0 start_time = time.time() could_load, checkpoint_counter = load(sess, saver, train_dir) if could_load: counter = checkpoint_counter generator = caption_generator.CaptionGenerator(model_valid, vocab, beam_size=1) try: # for validation f_valid_text = open(os.path.join(train_dir, 'valid.txt'), 'a') filenames = os.listdir('testimgs') filenames.sort() valid_images = [] print('validation image filenames') for filename in filenames: with tf.gfile.GFile(os.path.join('testimgs', filename), 'r') as f: valid_images.append(f.read()) print(filename) # # run inference for not-trained model # for i, valid_image in enumerate(valid_images): # captions = generator.beam_search( sess, valid_image ) # f_valid_text.write( 'initial caption (beam) {}\n'.format( str(datetime.datetime.now().time())[:-7] ) ) # for j, caption in enumerate(captions): # sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] # sentence = " ".join(sentence) # sentence = " {}-{}) {} (p={:.8f})".format(i+1,j+1, sentence, math.exp(caption.logprob)) # print( sentence ) # f_valid_text.write( sentence +'\n' ) # f_valid_text.flush() # run training loop lossnames_to_print = [ 'NLL_loss', 'g_loss', 'd_loss', 'd_acc', 'g_acc' ] val_NLL_loss = float('Inf') val_g_loss = float('Inf') val_d_loss = float('Inf') val_d_acc = 0 val_g_acc = 0 for epoch in range(FLAGS.number_of_steps): for batch_idx in range(nBatches): counter += 1 is_disc_trained = False is_gen_trained = False # train NLL loss only (for im2txt sanity check) # _, val_NLL_loss, smry, val_free_sentence, val_teacher_sentence = \ # sess.run( # [train_op_NLL, NLL_loss, summary_NLL_loss,free_sentence, model.input_seqs] ) # summaryWriter.add_summary(smry, counter) if val_NLL_loss > 3: _, val_NLL_loss, smry, val_free_sentence, val_teacher_sentence = sess.run( [ train_op_NLL, NLL_loss, summary_NLL_loss, free_sentence, model.input_seqs ]) summaryWriter.add_summary(smry, counter) else: # train discriminator _, val_d_loss, val_d_acc, smry = sess.run([ train_op_disc, d_loss, d_accuracy, summary_disc ]) summaryWriter.add_summary(smry, counter) # train generator twice _, val_g_loss, val_NLL_loss, val_g_acc, smry, val_free_sentence, val_teacher_sentence = sess.run( [ train_op_gen, g_loss, NLL_loss, d_accuracy, summary_gen, free_sentence, model.input_seqs ]) summaryWriter.add_summary(smry, counter) _, val_g_loss, val_NLL_loss, val_g_acc, smry, val_free_sentence, val_teacher_sentence = sess.run( [ train_op_gen, g_loss, NLL_loss, d_accuracy, summary_gen, free_sentence, model.input_seqs ]) summaryWriter.add_summary(smry, counter) if counter % FLAGS.log_every_n_steps == 0: elapsed = time.time() - start_time log(epoch, batch_idx, nBatches, lossnames_to_print, [ val_NLL_loss, val_g_loss, val_d_loss, val_d_acc, val_g_acc ], elapsed, counter) if counter % 500 == 1 or \ (epoch==FLAGS.number_of_steps-1 and batch_idx==nBatches-1) : saver.save(sess, filename_saved_model, global_step=counter) # if True: if (batch_idx + 1) % (nBatches // 10) == 0 or batch_idx == nBatches - 1: # run test after every epoch f_valid_text.write( 'count {} epoch {} batch {}/{} ({})\n'.format( \ counter, epoch, batch_idx, nBatches, str(datetime.datetime.now().time())[:-7] ) ) for i, valid_image in enumerate(valid_images): captions = generator.beam_search( sess, valid_image) for j, caption in enumerate(captions): sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) sentence = " {}-{}) {} (p={:.8f})".format( i + 1, j + 1, sentence, math.exp(caption.logprob)) print(sentence) f_valid_text.write(sentence + '\n') # free sentence check for i, caption in enumerate(val_free_sentence): if i > 9: break sentence = [ vocab.id_to_word(w) for w in caption ] sentence = " ".join(sentence) sentence = " free %d) %s" % (i + 1, sentence) print(sentence) f_valid_text.write(sentence + '\n') sentence = [ vocab.id_to_word(w) for w in val_teacher_sentence[i, 1:] ] sentence = " ".join(sentence) sentence = " teacher %d) %s\n" % (i + 1, sentence) print(sentence) f_valid_text.write(sentence + '\n') f_valid_text.flush() except tf.errors.OutOfRangeError: print('Finished training: epoch limit reached') finally: coord.request_stop() coord.join(threads)
def main(unused_argv): # load data disk x = cPickle.load(open("./data/mscoco/data.p","rb")) train, val, test = x[0], x[1], x[2] wordtoix, ixtoword = x[3], x[4] del x n_words = len(ixtoword) x = cPickle.load(open("./data/mscoco/word2vec.p","rb")) W = x[0] del x data = scipy.io.loadmat('./data/mscoco/resnet_feats.mat') img_feats = data['feats'].astype(float) print("finish loading data") g = tf.Graph() with g.as_default(): # creat config objects which contain model and training configs model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() #initializer method initializer = tf.random_uniform_initializer( minval=-model_config.initializer_scale, maxval=model_config.initializer_scale) batch_size = model_config.batch_size # batch_size = 32 image_fea = tf.placeholder(tf.float32, shape=[None,2048]) input_seqs = tf.placeholder(tf.int32, shape=[None,None]) target_seqs = tf.placeholder(tf.int32, shape=[None,None]) input_mask = tf.placeholder(tf.int32, shape=[None,None]) #creat the seq embedding map. It is random init. with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"): embedding_map = tf.get_variable( name="map", shape=[model_config.vocab_size, model_config.embedding_size], initializer=initializer) seq_embeddings = tf.nn.embedding_lookup(embedding_map, input_seqs) #input dropout seq_embeddings = tf.nn.dropout(seq_embeddings, keep_prob=model_config.lstm_dropout_keep_prob) #creat image embedding layer. It is just fully connected layer. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=image_fea, num_outputs=model_config.embedding_size, activation_fn=None, weights_initializer=initializer, biases_initializer=None, scope=scope) ''' #creat lstm cell lstm_cell = tf.contrib.rnn.BasicLSTMCell( num_units=model_config.num_lstm_units, state_is_tuple=True) #add dropout in training module. It will be removed in testing model lstm_cell = tf.contrib.rnn.DropoutWrapper( lstm_cell, input_keep_prob=model_config.lstm_dropout_keep_prob, output_keep_prob=model_config.lstm_dropout_keep_prob) with tf.variable_scope("lstm", initializer=initializer) as lstm_scope: # Feed the image embeddings to set the initial LSTM state. zero_state = lstm_cell.zero_state( batch_size=batch_size, dtype=tf.float32) _, initial_state = lstm_cell(image_embeddings, zero_state) # Allow the LSTM variables to be reused. lstm_scope.reuse_variables() # Run the batch of sequence embeddings through the LSTM. sequence_length = tf.reduce_sum(input_mask, 1) lstm_outputs, _ = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=seq_embeddings, sequence_length=sequence_length, initial_state=initial_state, dtype=tf.float32, scope=lstm_scope) #lstm_outputs = tf.Print(lstm_outputs, [lstm_outputs, tf.shape(lstm_outputs), "anything I want"]) # Stack batches vertically. lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size]) ''' W = tf.get_variable('W', shape=[4, model_config.num_lstm_units, model_config.num_lstm_units], initializer=initializer) U = tf.get_variable('U', shape=[4, model_config.num_lstm_units, model_config.num_lstm_units], initializer=initializer) def step(prev, x): # gather previous internal state and output state st_1, ct_1 = tf.unstack(prev) #### # GATES # # input gate i = tf.sigmoid(tf.matmul(x,U[0]) + tf.matmul(st_1,W[0])) # forget gate f = tf.sigmoid(tf.matmul(x,U[1]) + tf.matmul(st_1,W[1])) # output gate o = tf.sigmoid(tf.matmul(x,U[2]) + tf.matmul(st_1,W[2])) # gate weights g = tf.tanh(tf.matmul(x,U[3]) + tf.matmul(st_1,W[3])) ### # new internal cell state ct = ct_1*f + g*i # output state st = tf.tanh(ct)*o return tf.stack([st, ct]) image_embeddings = tf.stack([image_embeddings,image_embeddings]) states = tf.scan(step, tf.transpose(seq_embeddings, [1,0,2]), initializer=image_embeddings) #states = tf.Print(states, ["lstm states shape:",tf.shape(states)]) states = tf.transpose(states, [1,2,0,3])[0] #states = tf.Print(states, ["lstm states REshape:",tf.shape(states)]) lstm_outputs = tf.reshape(states, [-1, model_config.num_lstm_units]) #lstm_outputs = tf.Print(lstm_outputs, [tf.shape(lstm_outputs), "lstm_outputs"]) #output dropout lstm_outputs = tf.nn.dropout(lstm_outputs, keep_prob=model_config.lstm_dropout_keep_prob) with tf.variable_scope("logits") as logits_scope: logits = tf.contrib.layers.fully_connected( inputs=lstm_outputs, num_outputs=model_config.vocab_size, activation_fn=None, weights_initializer=initializer, scope=logits_scope) targets = tf.reshape(target_seqs, [-1]) weights = tf.to_float(tf.reshape(input_mask, [-1])) # Compute losses. losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits) batch_loss = tf.div(tf.reduce_sum(tf.multiply(losses, weights)), tf.reduce_sum(weights), name="batch_loss") tf.losses.add_loss(batch_loss) total_loss = tf.losses.get_total_loss() # Add summaries. tf.summary.scalar("losses/batch_loss", batch_loss) tf.summary.scalar("losses/total_loss", total_loss) for var in tf.trainable_variables(): tf.summary.histogram("parameters/" + var.op.name, var) #get the steps global_step = tf.Variable( initial_value=0, name="global_step", trainable=False, collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) #learing rate learning_rate_decay_fn = None learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_r = _learning_rate_decay_fn(learning_rate, global_step) # Set up the training ops. # We change the learing_rate directly here rather than using learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=learning_rate,#learning_r, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=None)#learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) print("finish building network") g.as_default() #gpu_options = tf.GPUOptions(allow_growth=True) #sess = tf.Session(graph=g,config=tf.ConfigProto(gpu_options=gpu_options)) sess=tf.InteractiveSession(graph=g) #sess = tf.Session(graph=g) #init = tf.global_variables_initializer() with sess.as_default(): tf.global_variables_initializer().run() print("finish initialization") #prepare the data. #add a 6880('#') before the input seqs def prepare_data(seqs): # x: a list of sentences lengths = [len(s) for s in seqs] n_samples = len(seqs) maxlen = np.max(lengths) inputs = np.zeros(( n_samples,maxlen)).astype('int64') outputs = np.zeros((n_samples,maxlen)).astype('int64') x_mask = np.zeros((n_samples,maxlen)).astype(float) for idx, s in enumerate(seqs): inputs[idx,0] = 6880 inputs[idx,1:lengths[idx]] = s[:lengths[idx]-1] outputs[idx,:lengths[idx]] = s[:lengths[idx]] x_mask[idx,:lengths[idx]] = 1. return inputs, x_mask,outputs #generate data index by batches. It can shuffle data at the same time def get_minibatches_idx(n, minibatch_size, shuffle=False): idx_list = np.arange(n, dtype="int32") if shuffle: np.random.shuffle(idx_list) minibatches = [] minibatch_start = 0 for i in range(n // minibatch_size): minibatches.append(idx_list[minibatch_start: minibatch_start + minibatch_size]) minibatch_start += minibatch_size if (minibatch_start != n): # Make a minibatch out of what is left minibatches.append(idx_list[minibatch_start:]) return zip(range(len(minibatches)), minibatches) kf = get_minibatches_idx(len(val[0]), batch_size, shuffle=True) max_epochs = 20#57 #56.46 for 1000000 steps for eidx in xrange(max_epochs): print("the " + str(eidx) + " epochs") kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for steps, train_index in kf: x = [train[0][t]for t in train_index] z = np.array([img_feats[:,train[1][t]]for t in train_index]) x, mask,y = prepare_data(x) if (x.shape[0]==batch_size): feed_dict = {image_fea:z,input_seqs:x,target_seqs:y,input_mask:mask} _,loss_value = sess.run([train_op,total_loss],feed_dict=feed_dict) if steps%100==0:#print loss every 1000 steps print("steps:"+str(steps+eidx*17710)) print("loss_value:"+str(loss_value)) saver_path = saver.save(sess, "log/model.ckpt",global_step=eidx) # save/model.ckpt print("Model saved in file:", saver_path)
def evaluate(self): g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() #initializer method initializer = tf.random_uniform_initializer( minval=-model_config.initializer_scale, maxval=model_config.initializer_scale) seq_embeddings = None image_feed = tf.placeholder(dtype=tf.float32, shape=[2048], name="image_feed") input_feed = tf.placeholder( dtype=tf.int32, shape=[None], # batch_size name="input_feed") # Process image and insert batch dimensions. image_fea = tf.expand_dims(image_feed, 0) input_seqs = tf.expand_dims(input_feed, 0) with tf.variable_scope("seq_embedding"), tf.device("/gpu:0"): embedding_map = tf.get_variable(name="map", shape=[ model_config.vocab_size, model_config.embedding_size ], initializer=initializer) seq_embeddings = tf.nn.embedding_lookup(embedding_map, input_seqs) with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=image_fea, num_outputs=model_config.embedding_size, activation_fn=None, weights_initializer=initializer, biases_initializer=None, scope=scope) lstm_cell = tf.contrib.rnn.BasicLSTMCell( num_units=model_config.num_lstm_units, state_is_tuple=True) with tf.variable_scope("lstm", initializer=initializer) as lstm_scope: # Feed the image embeddings to set the initial LSTM state. zero_state = lstm_cell.zero_state( batch_size=image_embeddings.get_shape()[0], dtype=tf.float32) _, initial_state = lstm_cell(image_embeddings, zero_state) # Allow the LSTM variables to be reused. lstm_scope.reuse_variables() # In inference mode, use concatenated states for convenient feeding and # fetching. tf.concat(axis=1, values=initial_state, name="initial_state") # Placeholder for feeding a batch of concatenated states. state_feed = tf.placeholder( dtype=tf.float32, shape=[None, sum(lstm_cell.state_size)], name="state_feed") state_tuple = tf.split(value=state_feed, num_or_size_splits=2, axis=1) # Run a single LSTM step. lstm_outputs, state_tuple = lstm_cell(inputs=tf.squeeze( seq_embeddings, axis=[1]), state=state_tuple) # Concatentate the resulting state. tf.concat(axis=1, values=state_tuple, name="state") # Stack batches vertically. lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size]) with tf.variable_scope("logits") as logits_scope: logits = tf.contrib.layers.fully_connected( inputs=lstm_outputs, num_outputs=model_config.vocab_size, activation_fn=None, weights_initializer=initializer, scope=logits_scope) tf.nn.softmax(logits, name="softmax") global_step = tf.Variable(initial_value=0, name="global_step", trainable=False, collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) g.as_default() sess = tf.Session(graph=g) #load the trained model with sess.as_default(): saver.restore(sess, "log/model.ckpt") print("finish initialization") x = self.val[0] lengths = [len(s) for s in x] n_samples = len(x) maxlen = np.max(lengths) #remove duplicate. Because one image has many captions. val_re = [] for i in range(n_samples): if self.val[1][i] not in val_re: val_re.append(self.val[1][i]) n_samples = len(val_re) print("n_samples:" + str(n_samples) + "maxlen:" + str(maxlen)) z = np.array([self.img_feats[:, val_re[t]] for t in range(n_samples)]) cap = np.zeros((n_samples, maxlen)) #generate captions.feed word one by one to the model.Start with 6800('#').Stop when get 0('.') for num in range(n_samples): if num % 1000 == 0: print(num) initial_state = sess.run(fetches="lstm/initial_state:0", feed_dict={"image_feed:0": z[num]}) input_feed = np.array([6800]) state_feed = initial_state for s in range(maxlen): softmax_output, state_output = sess.run( fetches=["softmax:0", "lstm/state:0"], feed_dict={ "input_feed:0": input_feed, "lstm/state_feed:0": state_feed, }) softmax_output = softmax_output.reshape( softmax_output.shape[1]) input_feed = [np.argsort(-softmax_output)[0]] #print(softmax_output.shape) #print(input_feed) state_feed = state_output cap[num][s] = input_feed[0] if input_feed[0] == 0: #print(cap[num]) break #get the real word by index precaptext = [] for i in range(n_samples): temcap = [] for j in range(maxlen): if cap[i][j] != 0: temcap.append(self.ixtoword[cap[i][j]]) else: break precaptext.append(" ".join(temcap)) #save the results to 'coco_5k_test.txt' print('write generated captions into a text file...') open('./coco_5k_test.txt', 'w').write('\n'.join(precaptext))
def main(unused_argv): model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern if FLAGS.vgg_checkpoint_file != "": model_config.vgg_checkpoint_file = FLAGS.vgg_checkpoint_file if FLAGS.ctc_ocr_checkpoint_file != "": model_config.ctc_ocr_checkpoint_file = FLAGS.ctc_ocr_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) else: tf.logging.info("Removing the old training directory: %s", train_dir) tf.gfile.DeleteRecursively(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = ctc_ocr_model.CtcOcrModel(model_config, mode='train') model.build() # Set up the learning rate. learning_rate_decay_fn = None learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Create the restore the checkpoints init_fn = None if model.restore_fn: init_fn = model.restore_fn elif model.init_fn: init_fn = model.init_fn # Run training. tf.contrib.slim.learning.train( train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=init_fn, save_summaries_secs=20, saver=saver)
def main(unused_argv): assert FLAGS.input_file_dir, "--input_file_dir is required" assert FLAGS.train_dir, "--train_dir is required" #Load configuration model_config = configuration.ModelConfig() train_config = configuration.TrainingConfig() model_config.train_dir = FLAGS.train_dir model_config.input_file_dir = FLAGS.input_file_dir #Create train dir train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info('Create Training dir as %s', train_dir) tf.gfile.MakeDirs(train_dir) #Load chr emdedding table if train_config.embedding_random: shape = [ len(pickle.load(open('data/vocab.pkl', 'rb'))._vocab), model_config.embedding_size ] else: chr_embedding = pickle.load(open('chr_embedding.pkl', 'rb')) shape = chr_embedding.shape #Build graph g = tf.Graph() with g.as_default(): #Set embedding table with tf.variable_scope('seq_embedding') as seq_embedding_scope: chr_embedding_var = tf.get_variable( name='chr_embedding', shape=(shape[0], shape[1]), trainable=True, initializer=tf.initializers.orthogonal(-0.1, 0.1)) if not train_config.embedding_random: embedding = tf.convert_to_tensor(chr_embedding, dtype=tf.float32) embedding_assign_op = chr_embedding_var.assign(chr_embedding) #Build model model = LSTMCWS(model_config, 'train') print('Building model...') model.build() # merged = tf.summary.merge_all() # train_writer = tf.summary.FileWriter(FLAGS.logdir + '/train', # g) #Set up learning rate and learning rate decay function learning_rate_decay_fn = None learning_rate = tf.constant(train_config.initial_learning_rate) if train_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (train_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * train_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=train_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn print('Setting up training ops...') #Set up training op train_op = tf.contrib.layers.optimize_loss( loss=model.batch_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=train_config.optimizer, clip_gradients=train_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn, name='train_op') #Set up saver saver = tf.train.Saver( max_to_keep=train_config.max_checkpoints_to_keep) gpu_options = tf.GPUOptions(visible_device_list=",".join(map(str, [0])), per_process_gpu_memory_fraction=0.33) sess_config = tf.ConfigProto(gpu_options=gpu_options) print('Start Training...') # Run training. tf.contrib.slim.learning.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=train_config.training_step, saver=saver, save_summaries_secs=30, session_config=sess_config)