def main(): # Do set-up. logger = utils.setup_logging(CURR_DIR) thread_names.monkey_patch() utils.make_lease_deterministic() # Get clients and resource paths. topic_name = 't-repro-{}'.format(int(1000 * time.time())) subscription_name = 's-repro-{}'.format(int(1000 * time.time())) client_info = utils.get_client_info(topic_name, subscription_name) publisher, topic_path, subscriber, subscription_path = client_info # Create a topic, though we won't push messages to it. publisher.create_topic(topic_path) # Subscribe to the topic, even though it won't publish any messages. subscriber.create_subscription(subscription_path, topic_path) logger.info('Listening for messages on %s', subscription_path) subscription = subscriber.subscribe(subscription_path) sub_future = subscription.open(utils.AckCallback(logger)) # The subscriber is non-blocking, so we must keep the main thread from # exiting to allow it to process messages in the background. utils.heartbeats_block(logger, sub_future) # Do clean-up. publisher.delete_topic(topic_path) subscriber.delete_subscription(subscription_path) thread_names.save_tree(CURR_DIR, logger) thread_names.restore() utils.restore()
def main(): # Do set-up. logger = utils.setup_logging(CURR_DIR) thread_names.monkey_patch() utils.make_lease_deterministic() # Get clients and resource paths. topic_name = 't-repro-{}'.format(int(1000 * time.time())) subscription_name = 's-repro-{}'.format(int(1000 * time.time())) client_info = utils.get_client_info(topic_name, subscription_name) publisher, topic_path, subscriber, subscription_path = client_info # Create a topic. publisher.create_topic(topic_path) # Subscribe to the topic. We do this before the messages are # published so that we'll receive them as they come in. subscriber.create_subscription(subscription_path, topic_path) logger.info('Listening for messages on %s', subscription_path) subscription = subscriber.subscribe(subscription_path) sub_future = subscription.open(utils.AckCallback(logger)) # Set off async job to publish some messages. publish_async(publisher, topic_path, logger) # The subscriber is non-blocking, so we must keep the main thread from # exiting to allow it to process messages in the background. utils.heartbeats_block(logger, sub_future) # Do clean-up. publisher.delete_topic(topic_path) subscriber.delete_subscription(subscription_path) thread_names.save_tree(CURR_DIR, logger) thread_names.restore() utils.restore()
def main(args): # load configuration config = load_config(os.path.join(args.restore, 'config.json')) # create autoencoder ae = get_network(config['hiddens'], logger=g_logger) # build graph sess, saver, _ = build_graph(ae, input_shape=[None, 784]) restore(sess, saver, args.restore) test_result = os.path.join(args.result, 'test') # make result directory if not exists if not os.path.exists(test_result): os.makedirs(test_result) # use mnist for test mnist = tf.contrib.learn.datasets.load_dataset('mnist') row_col_size = 10 cnt = 0 for x, y in next_mnist_data(mnist, 'test', batch_size=row_col_size**2): x_ = sess.run(ae.x_, feed_dict={ae.x: x}) save_mnist_images(x, test_result, cnt, suffix='original', row_col_size=row_col_size) save_mnist_images(x_, test_result, cnt, suffix='reconstruct', row_col_size=row_col_size) cnt += 1
def change_resolution(resolution): batch_size = 1 graph = tf.Graph() store_dir = os.path.join(FLAGS.model_dir, 'resolution_' + str(resolution)) restore_dir = os.path.join(FLAGS.model_dir, 'resolution_' + str(resolution // 2)) tf.gfile.MakeDirs(store_dir) ckpt_file = store_dir + '/model.ckp' with graph.as_default(): # pylint: disable=E1129 train_input = dataset.TrainInputFunction(FLAGS.noise_dim, resolution, 'NHWC') params = { 'data_dir': FLAGS.data_dir, 'batch_size': batch_size, "resolution": resolution } features, labels = train_input(params) optimizers = model_fn(features, labels, 'RESOLUTION_CHANGE', params) global_step = tf.train.get_or_create_global_step() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) utils.restore(sess, restore_dir) utils.reset_resolution_step() for opt in optimizers: sess.run(tf.variables_initializer(opt.variables())) saver = tf.train.Saver(name='main_saver') saver.save(sess, ckpt_file, global_step=global_step)
def train_step(cfg, resolution, restore_dir, store_dir): batch_size = cfg.resolution_to_batch_size[resolution] graph = tf.Graph() tf.gfile.MakeDirs(store_dir) ckpt_file = store_dir + '/model.ckp' global_step_value = 0 with graph.as_default(): # pylint: disable=E1129 train_input = input_pipelines.TrainInputFunction( True, cfg.noise_dim, resolution, cfg.data_format) params = {'data_dir': cfg.data_dir, 'batch_size': batch_size} features, labels = train_input(params) train_ops, [g_loss, d_loss], [g_optimizer, d_optimizer] = model_fn(features, labels, 'TRAIN', cfg) global_step = tf.train.get_or_create_global_step() summary = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) utils.restore(sess, restore_dir) saver = tf.train.Saver(name='main_saver') global_step_value = global_step.eval() if global_step_value == 0: utils.print_layers('Generator') utils.print_layers('Discriminator') if restore_dir != store_dir and restore_dir is not None: utils.print_layers('Generator') utils.print_layers('Discriminator') utils.reset_resolution_step() sess.run(tf.variables_initializer(d_optimizer.variables())) sess.run(tf.variables_initializer(g_optimizer.variables())) saver.save(sess, ckpt_file, global_step=global_step) resolution_summary_writer = tf.summary.FileWriter( store_dir, sess.graph) start_time = time.time() for _ in range(cfg.train_steps_before_eval // cfg.iterations_per_loop): start_time = time.time() for _ in trange(cfg.iterations_per_loop, leave=False): sess.run(train_ops) if global_step % cfg.resolution_steps == 0 and resolution != cfg.maximum_resolution: break elapsed_time = time.time() - start_time g_loss_value, d_loss_value, global_step_value = sess.run( [g_loss, d_loss, global_step]) tf.logging.info('Step %d - g_loss %f, d_loss %f, Sec/Step %f' % (global_step_value, g_loss_value, d_loss_value, elapsed_time / cfg.iterations_per_loop)) summary_str = sess.run(summary) resolution_summary_writer.add_summary(summary_str, global_step_value) resolution_summary_writer.flush() if global_step % cfg.resolution_steps == 0 and resolution != cfg.maximum_resolution: break global_step_value = global_step.eval() tf.logging.info('Saving parameters to %s' % (ckpt_file)) saver.save(sess, ckpt_file, global_step=global_step) tf.reset_default_graph() return global_step_value
def main(): # Do set-up. logger = utils.setup_logging(CURR_DIR) thread_names.monkey_patch() random_mod = NotRandom(0.75) utils.make_lease_deterministic(random_mod) # Get clients and resource paths. topic_name = 't-repro-{}'.format(int(1000 * time.time())) subscription_name = 's-repro-{}'.format(int(1000 * time.time())) client_info = utils.get_client_info(topic_name, subscription_name, policy_class=utils.FlowControlPolicy) publisher, topic_path, subscriber, subscription_path = client_info # Create a topic and subscription (subscription must exist when # messages are published to topic). publisher.create_topic(topic_path) subscriber.create_subscription(subscription_path, topic_path) # Set off sync job to publish some messages. publish_sync(publisher, topic_path, logger) # Sleep to let the backend have some time with its thoughts. logger.info('Sleeping for 10s after publishing.') time.sleep(10.0) # Subscribe to the topic. We do this before the messages are # published so that we'll receive them as they come in. logger.info('Listening for messages on %s', subscription_path) subscription = subscriber.subscribe(subscription_path, flow_control=types.FlowControl( max_messages=MAX_MESSAGES, )) callback = TrackingCallback(SLEEP_TIME, logger) sub_future = subscription.open(callback) # The subscriber is non-blocking, so we must keep the main thread from # exiting to allow it to process messages in the background. helper = HeartbeatHelper(callback, subscription) utils.heartbeats_block(logger, sub_future, max_time=500, helper=helper) # Do clean-up. subscription.close() subscription._executor.shutdown() # Idempotent, but needed for 0.29.2. publisher.delete_topic(topic_path) subscriber.delete_subscription(subscription_path) teardown_summary(subscription, logger) thread_names.save_tree(CURR_DIR, logger) thread_names.restore() utils.restore()
def generate_step(cfg, resolution): graph = tf.Graph() restore_dir = os.path.join(cfg.model_dir, 'resolution_' + str(resolution)) with graph.as_default(): # pylint: disable=E1129 input = input_pipelines.PredictInputFunction(cfg.noise_dim, resolution) params = {'data_dir': cfg.data_dir, 'batch_size': cfg.num_eval_images} features, labels = input(params) model = model_fn(features, labels, 'PREDICT', cfg) global_step = tf.train.get_or_create_global_step() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) utils.restore(sess, restore_dir) images = sess.run(model) utils.write_images( images, cfg.model_dir + '/' + str(global_step.eval()).zfill(6) + '-' + str(resolution) + '.png', cfg.data_format) tf.reset_default_graph()
def main(ff_type, bi, input_size): print('======building model...======') if ff_type == 'magnitude': images, labels = regressor_placeholders(input_size * input_size, 1) if bi == 'true': logits = birnn(images, dim_out=1, partition=ff_type) else: logits = rnn(images, lstm_size=256, dim_out=1, num_layers=3, partition=ff_type) loss = mean_squared_error(labels, logits) else: images, labels = regressor_placeholders(input_size * input_size * 2, 2) if bi == 'true': logits = birnn(images, dim_out=2, partition=ff_type) else: logits = rnn(images, lstm_size=256, dim_out=2, num_layers=2, partition=ff_type) loss = mean_squared_error(labels, logits) acc = regressor_accuracy(labels, logits, partition=ff_type) train_op = unsupervised_optimizer(loss, lr=3e-4) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if bi == 'true': saver, save_path = utils.restore( sess, get('birnn.' + ff_type + '_checkpoint')) data = RNNDataSet(input_size=input_size, partition=ff_type, bidirection=True) else: saver, save_path = utils.restore( sess, get('rnn.' + ff_type + '_checkpoint')) data = RNNDataSet(input_size=input_size, partition=ff_type) train_rnn(sess, saver, save_path, images, labels, loss, train_op, acc, data) print('=======saving trained model...======\n') saver.save(sess, save_path) utils.hold_training_plot()
def main(): # Do set-up. logger = utils.setup_logging(CURR_DIR) CustomBatch.LOGGER = logger thread_names.monkey_patch() # Get clients and resource paths. topic_name = 't-repro-{}'.format(int(1000 * time.time())) client_info = utils.get_client_info(topic_name, 's-unused', batch_class=CustomBatch) publisher, topic_path, _, _ = client_info # Create a topic. publisher.create_topic(topic_path) # Set off sync job to publish some messages (won't fail). futures_succeed = publish_sync(publisher, topic_path, NUM_PUBLISH_SUCCEED, logger) # The publisher is non-blocking, so we watch it from the main thread. helper_succeed = HeartbeatHelper(futures_succeed) sub_future = NotFuture() utils.heartbeats_block(logger, sub_future, max_time=10, helper=helper_succeed) # Set off sync job to publish some messages (will fail, at least # in `0.29.4`). futures_fail = publish_sync(publisher, topic_path, NUM_PUBLISH_FAIL, logger) # The publisher is non-blocking, so we watch it from the main thread. helper_fail = HeartbeatHelper(futures_fail) utils.heartbeats_block(logger, sub_future, max_time=20, helper=helper_fail) # Do clean-up. publisher.delete_topic(topic_path) thread_names.save_tree(CURR_DIR, logger) thread_names.restore() utils.restore()
def predict_char(image): images, _, keep_prob = placeholders() logits = cnn(images, keep_prob) pred = predictions(logits) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver, save_path = utils.restore(sess, './checkpoints/cnn/') if not tf.train.get_checkpoint_state('./checkpoints/cnn/'): raise Error('No checkpoint found') guess = sess.run(pred, feed_dict={images: image, keep_prob: 1}) return guess
def main(): print('building model...') images, labels = supervised_placeholders() logits = cnn(images) acc = accuracy(labels, logits) loss = cross_entropy_loss(labels, logits) train_op = supervised_optimizer(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver, save_path = utils.restore(sess, get('cnn.checkpoint')) clothes = ClothesDataset(get('cnn.num_classes')) train_cnn(sess, saver, save_path, images, labels, loss, train_op, acc, clothes) print('saving trained model...\n') saver.save(sess, save_path) utils.hold_training_plot()
def main(): print('building model...') images, labels, keep_prob = placeholders() logits = cnn(images, keep_prob) acc = accuracy(labels, logits) loss = cross_entropy_loss(labels, logits) train_op = optimizer(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver, save_path = utils.restore(sess, './checkpoints/cnn/') dataset = Dataset() report_test_accuracy(sess, images, labels, keep_prob, acc, dataset) train_cnn(sess, saver, save_path, images, labels, keep_prob, loss, train_op, acc, dataset) print('saving trained model...\n') saver.save(sess, save_path) utils.hold_training_plot()
def main(): bleu_test = Bleu() vocab, reverse_vocab = utils.load_dict(conf.dictionary_path) pkl_path = os.path.join(conf.val_data_path, 'val_caption.pkl') with open(pkl_path, 'rb') as f: caption_data = pickle.load(f) image_id_list = caption_data.keys() image_to_show = set(random.sample(image_id_list, 10)) with tf.device('/cpu:0'): val_dataset = get_data.batch_val_data('val', conf.batch_size, 6, conf.val_data_path) val_id_batch, val_image_batch = get_data.make_val_iterator(val_dataset) pass logging.info("The input graph defined!") with tf.variable_scope(tf.get_variable_scope()) as scope: train_model = ShowAttendTell(first_time=False, start_token_index=vocab[conf.start_token], pad_token_index=vocab[conf.pad_token], max_timestep=conf.sentence_length) caption_generator = InferenceWrapper(train_model, vocab[conf.start_token], vocab[conf.end_token], beam_size=3) caption_generator.build_inference_model() pass # saver = tf.train.Saver() result = {} counter = 0 with tf.Session() as sess: newest_checkpoint = tf.train.latest_checkpoint(conf.ckpt_upper_path) utils.restore(sess, newest_checkpoint) while True: counter += 1 logging.info("Batch %d " % counter) try: val_id_batch_data, val_image_batch_data = sess.run([val_id_batch, val_image_batch]) pass except tf.errors.OutOfRangeError: break for index, image_id in enumerate(val_id_batch_data): caption = caption_generator.run_inference(sess, val_image_batch_data[index]) if len(caption) == 0: sentence = "" else: sentence = utils.get_sentence(caption[0][0], reverse_vocab) pass result[int(image_id)] = [sentence] if image_id in image_to_show: scipy.misc.imsave(str(image_id) + ".png", val_image_batch_data[index]) logging.info("%d : %s" % (image_id, sentence)) pass pass score, _ = bleu_test.compute_score(caption_data, result) logging.info("Bleu1 %f " % (score[0])) logging.info("Bleu2 %f " % (score[1])) logging.info("Bleu3 %f " % (score[2])) logging.info("Bleu4 %f " % (score[3])) pass
def main(_): print(FLAGS.epsilon, FLAGS.top_bn) np.random.seed(seed=FLAGS.seed) tf.set_random_seed(np.random.randint(1234)) with tf.Graph().as_default() as g: with tf.device("/cpu:0"): if FLAGS.data_set == 'CelebA': (images, labels), (_,_),(_,_) = d.get_data(batch_size=FLAGS.batch_size,image_size=FLAGS.img_size) (images_eval_train, labels_eval_train), (_,_),(images_eval_test, labels_eval_test) = \ d.get_data(batch_size=FLAGS.eval_batch_size,image_size=FLAGS.img_size) ul_images = images ul_images_eval_train = images_eval_train else: images, labels = inputs(batch_size=FLAGS.batch_size, train=True, validation=FLAGS.validation, shuffle=True) ul_images = unlabeled_inputs(batch_size=FLAGS.ul_batch_size, validation=FLAGS.validation, shuffle=True) images_eval_train, labels_eval_train = inputs(batch_size=FLAGS.eval_batch_size, train=True, validation=FLAGS.validation, shuffle=True) ul_images_eval_train = unlabeled_inputs(batch_size=FLAGS.eval_batch_size, validation=FLAGS.validation, shuffle=True) images_eval_test, labels_eval_test = inputs(batch_size=FLAGS.eval_batch_size, train=False, validation=FLAGS.validation, shuffle=True) lr = tf.placeholder(tf.float32, shape=[], name="learning_rate") mom = tf.placeholder(tf.float32, shape=[], name="momentum") loss, train_op, x_adv, x_reconst = build_training_graph(images, labels, ul_images, lr, mom) # Build eval graph if not FLAGS.draw_adv_img: losses_eval_train, _ = build_eval_graph(images_eval_train, labels_eval_train, ul_images_eval_train) losses_eval_test, results = build_eval_graph(images_eval_test, labels_eval_test, images_eval_test) saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.allocator_type = 'BFC' sess = tf.Session(config=config) if FLAGS.method == 'lvat': print('-------------------------------------------') print("... restore the variables from frozen model.") u.restore(sess, SCOPE_ENCODER, CKPT_AE) print('-------------------------------------------') #if FLAGS.draw_adv_img: if False: print("... restore the variables of the classifier. log__dir:", FLAGS.log__dir) ckpt = tf.train.get_checkpoint_state(FLAGS.log__dir) if ckpt and ckpt.model_checkpoint_path: u.restore(sess, SCOPE_CLASSIFIER, FLAGS.log__dir) op_init = u.init_uninitialized_vars(sess) sess.run(op_init, feed_dict={lr: FLAGS.learning_rate, mom: FLAGS.mom1}) else: sys.exit('failed to restore') else: print("... init the variables for the classifier to be trained.") classifier_vars = tf.get_collection( tf.GraphKeys.VARIABLES, scope=SCOPE_CLASSIFIER) print('classifier_vars:', classifier_vars) op_init = tf.variables_initializer(classifier_vars) optimizer_vars = tf.get_collection( tf.GraphKeys.VARIABLES, scope='scope_optimizer') print('optimizer_vars:', optimizer_vars) op_init_optimiser = tf.variables_initializer(optimizer_vars) sess.run([op_init, op_init_optimiser], feed_dict={lr: FLAGS.learning_rate, mom: FLAGS.mom1}) tf.train.start_queue_runners(sess=sess) if FLAGS.draw_adv_img: print('... skip training') _x, _x_adv, _x_reconst = sess.run([ul_images, x_adv, x_reconst]) _N = 7 print(math.floor(FLAGS.ul_batch_size // _N)) for i in range(math.floor(FLAGS.ul_batch_size // _N)): draw_x(_x, _x_reconst, _x_adv, n_x=_N, offset=i, show_reconst=(FLAGS.method == 'lvat'), filename='ep_%s_%.2f_%d'%(FLAGS.method, FLAGS.epsilon, i)) sys.exit('exit draw_adv_img') else: print('... start training') for ep in range(FLAGS.num_epochs): if ep < FLAGS.epoch_decay_start: feed_dict = {lr: FLAGS.learning_rate, mom: FLAGS.mom1} else: decayed_lr = ((FLAGS.num_epochs - ep) / float( FLAGS.num_epochs - FLAGS.epoch_decay_start)) * FLAGS.learning_rate feed_dict = {lr: decayed_lr, mom: FLAGS.mom2} sum_loss = 0 start = time.time() for i in tqdm(range(FLAGS.num_iter_per_epoch), leave=False): _, batch_loss = sess.run([train_op, loss], feed_dict=feed_dict) sum_loss += batch_loss end = time.time() print("Epoch:", ep, "CE_loss_train:", sum_loss / FLAGS.num_iter_per_epoch, "elapsed_time:", end - start, flush=True) if (ep >= FLAGS.eval_start) and ((ep + 1) % FLAGS.eval_freq == 0 or ep + 1 == FLAGS.num_epochs): test(sess, losses_eval_train, ep, "train-") test(sess, losses_eval_test, ep, "test-") if ep % 10 == 0: print("Model saved in file: %s" % saver.save(sess, FLAGS.log__dir + '/model.ckpt')) return
def load_model(self, file_path): utils.restore(self, file_path, self.device)
def main(args): hiddens = args.hiddens if args.restore: config = load_config(os.path.join(args.restore, 'config.json')) hiddens = config['hiddens'] # create autoencoder ae = get_network(hiddens, logger=g_logger) # build graph sess, saver, init_op = build_graph(ae, [None, 784]) if args.restore: restore(sess, saver, args.restore) else: g_logger.info('Initialize the model') sess.run(init_op) train_result = os.path.join(args.result, 'train') # make result directory if not exists if not os.path.exists(train_result): os.makedirs(train_result) # save configuraion save_dict = args.__dict__ save_dict['hiddens'] = hiddens save_config(save_dict, os.path.join(args.result, 'config.json')) # use mnist for test mnist = tf.contrib.learn.datasets.load_dataset('mnist') figure = plt.figure(figsize=(8, 8)) scatter_data = {} last_epoch = 0 try: # Learn number of epoch times nodes = [ae.train, ae.loss, ae.z, ae.x_] for i in range(1, args.epoch + 1): losses = 0 cnt = 0 # get data with batch size for x, y in next_mnist_data(mnist, 'train'): _, loss, z, x_ = sess.run(nodes, feed_dict={ae.x: x}) # make scatter data with latent variables(z) make_scatter_data(scatter_data, z, y) losses += loss cnt += 1 last_epoch = i g_logger.info('epoch: {}, loss: {}'.format(i, losses / cnt)) scatter(scatter_data, train_result, i) figure.clear() scatter_data.clear() # save checkpoint saver.save(sess, args.result + '/checkpoint', global_step=args.epoch) except KeyboardInterrupt: saver.save(sess, args.result + '/checkpoint', global_step=last_epoch)
def predict_rnn(ff_type, ff, input_size, bi): hs = input_size // 2 pred = [] testX = [] for i in range(8, 17): for j in range(8, 17): if bi: if ff_type == 'magnitude': testX.append( np.concatenate([ ff[:5, i - hs:i + hs + 1, j - hs:j + hs + 1], ff[6:, i - hs:i + hs + 1, j - hs:j + hs + 1] ]).reshape(9, -1)) else: testX.append( np.concatenate([ ff[:5, i - hs:i + hs + 1, j - hs:j + hs + 1, :], ff[6:, i - hs:i + hs + 1, j - hs:j + hs + 1, :] ]).reshape(9, -1)) else: if ff_type == 'magnitude': testX.append(ff[:9, i - hs:i + hs + 1, j - hs:j + hs + 1].reshape(9, -1)) else: testX.append(ff[:9, i - hs:i + hs + 1, j - hs:j + hs + 1, :].reshape(9, -1)) testX = np.array(testX) tf.reset_default_graph() if ff_type == 'magnitude': images, labels = regressor_placeholders(input_size=input_size * input_size, output_size=1, num_step=9) if bi: logits = birnn(images, lstm_size=500, dim_out=1, partition=ff_type) else: logits = rnn(images, lstm_size=256, dim_out=1, num_layers=3, partition=ff_type) else: images, labels = regressor_placeholders(input_size=input_size * input_size * 2, output_size=2, num_step=9) if bi: logits = birnn(images, lstm_size=500, dim_out=2, partition=ff_type) else: logits = rnn(images, lstm_size=256, dim_out=2, num_layers=2, partition=ff_type) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if bi: saver, save_path = utils.restore( sess, get('birnn.' + ff_type + '_checkpoint')) else: saver, save_path = utils.restore( sess, get('rnn.' + ff_type + '_checkpoint')) t0 = time.time() pred = sess.run(logits, feed_dict={images: testX.swapaxes(0, 1)}) print("It takes %s second to predict the target flow field." % (str(round(time.time() - t0, 5)))) return pred
def train(args): iter_per_epoch = int(math.ceil(conf.num_coco_data * 1.0 / conf.batch_size)) pkl_path = os.path.join(conf.val_small_data_path, 'val_caption.pkl') with open(pkl_path, 'rb') as f: caption_data = pickle.load(f) pass image_id_list = caption_data.keys() bleu_test = Bleu() vocab, reverse_vocab = utils.load_dict(conf.dictionary_path) with tf.device('/cpu:0'): train_image_batch, train_sequence_batch = get_data.batch_train_data( 'train', conf.batch_size, conf.shuffer_buffer_size, 6, conf.train_data_path) val_dataset = get_data.batch_val_data('val', conf.batch_size, 6, conf.val_small_data_path) val_id_batch, val_image_batch = get_data.make_val_iterator(val_dataset) pass logging.info("The input graph defined!") with tf.variable_scope(tf.get_variable_scope()) as scope: train_model = ShowAttendTell(first_time=args.first_time, start_token_index=vocab[conf.start_token], pad_token_index=vocab[conf.pad_token], mat_file=conf.vgg_checkpoint, max_timestep=conf.sentence_length, train_vgg=conf.train_vgg) batch_loss, perplexity, _ = train_model.build_model() scope.reuse_variables() generated_words = train_model.build_validation() pass ave_train_loss = tf.Variable(0, name='ave_train_loss', dtype=tf.float32, trainable=False) bleu1 = tf.Variable(0, name='bleu1', dtype=tf.float32, trainable=False) bleu2 = tf.Variable(0, name='bleu2', dtype=tf.float32, trainable=False) bleu3 = tf.Variable(0, name='bleu3', dtype=tf.float32, trainable=False) bleu4 = tf.Variable(0, name='bleu4', dtype=tf.float32, trainable=False) tf.summary.scalar('ave_train_loss', ave_train_loss) tf.summary.scalar('batch_loss', batch_loss) tf.summary.scalar('batch_perplexity', perplexity) tf.summary.scalar('bleu1', bleu1) tf.summary.scalar('bleu2', bleu2) tf.summary.scalar('bleu3', bleu3) tf.summary.scalar('bleu4', bleu4) all_variable = tf.trainable_variables() for variable in all_variable: tf.summary.histogram(variable.op.name, variable) pass all_gradient = tf.gradients(batch_loss, all_variable) for index, variable in enumerate(all_variable): tf.summary.histogram(variable.op.name + "/gradient", all_gradient[index]) pass with open(conf.global_step_file ) as fd1: # for logging the last global step saved number = int(fd1.readline().strip()) pass global_step_t = tf.Variable(number, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay(conf.learning_rate, global_step_t, conf.decay_step, conf.decay_rate, staircase=True) # optimizer = tf.train.AdamOptimizer(learning_rate=conf.learning_rate) optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # for updating the moving average and variance in batch norm update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(batch_loss, global_step=global_step_t) pass logging.info("The optimization operation defined!") saver = tf.train.Saver(max_to_keep=80) ckpt_filename = os.path.join(conf.ckpt_upper_path, 'model.ckpt') with tf.Session() as sess: if args.load_ckpt: newest_checkpoint = tf.train.latest_checkpoint( conf.ckpt_upper_path) utils.restore(sess, newest_checkpoint) pass new_folder_name = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") log_whole_path = os.path.join(conf.model_log_path, new_folder_name) if not os.path.exists(log_whole_path): os.makedirs(log_whole_path) pass merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(log_whole_path) summary_writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) total_loss = 0.0 start_time = time.time() all_time = 0 counter = 0 # b = 30 # for e in range(1): # for i in range(b): for _ in range(conf.epoch): for _ in range(iter_per_epoch): counter += 1 logging.info("In iter %d " % (counter)) image_batch_data, sequence_batch_data = sess.run( [train_image_batch, train_sequence_batch]) feed_dict = { train_model.input_image: image_batch_data, train_model.input_caption: sequence_batch_data } batch_loss_value, batch_perplexity_value, _ = sess.run( [batch_loss, perplexity, train_op], feed_dict=feed_dict) logging.info("batch loss: %s " % batch_loss_value) logging.info("batch perplexity value: %s " % batch_perplexity_value) total_loss += batch_loss_value if counter % 100 == 0: prediction = {} while True: try: val_id_batch_data, val_image_batch_data = sess.run( [val_id_batch, val_image_batch]) pass except tf.errors.OutOfRangeError: with tf.device('/cpu:0'): val_id_batch, val_image_batch = get_data.make_val_iterator( val_dataset) pass break val_feed_dict = { train_model.input_image: val_image_batch_data } caption = sess.run(generated_words, feed_dict=val_feed_dict) for index, id in enumerate(val_id_batch_data): sentence = utils.get_sentence( caption[index], reverse_vocab) prediction[int(id)] = [sentence] pass random_id = random.choice(image_id_list) logging.info("Prediction %s " % prediction[random_id][0]) logging.info("Label %s " % caption_data[random_id][0]) print len(caption_data.keys()) print len(prediction.keys()) score, _ = bleu_test.compute_score(caption_data, prediction) # print "score ", score logging.info("Bleu1 %f " % (score[0])) logging.info("Bleu2 %f " % (score[1])) logging.info("Bleu3 %f " % (score[2])) logging.info("Bleu4 %f " % (score[3])) sess.run(bleu1.assign(score[0])) sess.run(bleu2.assign(score[1])) sess.run(bleu3.assign(score[2])) sess.run(bleu4.assign(score[3])) pass if counter % 50 == 0: sess.run( ave_train_loss.assign(total_loss * 1.0 / (counter))) logging.info("train average loss %f " % (total_loss * 1.0 / (counter))) summary = sess.run(merged_summary, feed_dict=feed_dict) summary_writer.add_summary( summary, tf.train.global_step(sess, global_step_t)) summary_writer.flush() pass if counter % 300 == 0: with open(conf.global_step_file, 'w') as fd: fd.write(str(tf.train.global_step(sess, global_step_t))) pass saver.save(sess, ckpt_filename, global_step=global_step_t) new_time = time.time() time_range = new_time - start_time start_time = new_time all_time += time_range logging.info("batch %d take %f \n" % (counter, time_range)) pass pass pass logging.info("Average time %f " % (all_time * 1.0 / counter)) summary_writer.close() pass
def load_model(self, file_path): utils.restore(self, file_path)
def main(ff_type, input_size): tf.reset_default_graph() if ff_type == 'magnitude': images1, labels1 = regressor_placeholders(input_size=input_size * input_size, output_size=1) logits1 = birnn(images1, dim_out=1, partition=ff_type) else: images1, labels1 = regressor_placeholders(input_size=input_size * input_size * 2, output_size=2) logits1 = birnn(images1, dim_out=2, partition=ff_type) data1 = RNNDataSet(input_size=input_size, training=False, partition=ff_type, bidirection=True) true_Y1 = np.array(data1.get_test_label()) true_Y1_mean = np.mean(true_Y1, axis=0) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver, save_path = utils.restore( sess, get('birnn.' + ff_type + '_checkpoint')) pred1 = pred_rnn(sess, saver, save_path, images1, logits1, data1, ff_type) print("Coefficient of determination R^2 for BiRnn is:", \ np.sum((pred1-true_Y1_mean)**2)/np.sum((true_Y1-true_Y1_mean)**2)) print("Mean norm percentage error for BiRnn is:", \ np.sum(np.linalg.norm(pred1-true_Y1, axis=1))/np.sum(np.linalg.norm(true_Y1, axis=1))) print("Mean square percentage error for BiRnn is:", \ np.sum((pred1-true_Y1)**2)/np.sum((true_Y1)**2)) tf.reset_default_graph() if ff_type == 'magnitude': images2, labels2 = regressor_placeholders(input_size=input_size * input_size, output_size=1) logits2 = rnn(images2, lstm_size=256, dim_out=1, num_layers=3, partition=ff_type) else: images2, labels2 = regressor_placeholders(input_size=input_size * input_size * 2, output_size=2) logits2 = rnn(images2, lstm_size=256, dim_out=2, num_layers=2, partition=ff_type) data2 = RNNDataSet(input_size=input_size, training=False, partition=ff_type, bidirection=False) true_Y2 = np.array(data2.get_test_label()) true_Y2_mean = np.mean(true_Y2, axis=0) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver, save_path = utils.restore(sess, get('rnn.' + ff_type + '_checkpoint')) pred2 = pred_rnn(sess, saver, save_path, images2, logits2, data2, ff_type) print("Coefficient of determination R^2 for Rnn is:", \ np.sum((pred2-true_Y2_mean)**2)/np.sum((true_Y2-true_Y2_mean)**2)) print("Mean norm percentage error for Rnn is:", \ np.sum(np.linalg.norm(pred2-true_Y2, axis=1))/np.sum(np.linalg.norm(true_Y2, axis=1))) print("Mean square percentage error for Rnn is:", \ np.sum((pred2-true_Y2)**2)/np.sum((true_Y2)**2))