def __init__(self, network_architecture, transfer_fct=tf.nn.softplus, learning_rate=0.001, batch_size=100,load_model = False,checkpoint_folder = './vae_checkpoints'): self.network_architecture = network_architecture self.transfer_fct = transfer_fct self.learning_rate = learning_rate self.batch_size = batch_size # tf Graph input self.x = tf.placeholder(tf.float32, [None, network_architecture["n_input"]]) # Create autoencoder network # Initializing the tensor flow variables # Launch the session self.sess = tf.InteractiveSession() #self.saver = tf.train.Saver(tf.all_variables()) self._create_network() # Define loss function based variational upper-bound and # corresponding optimizer self._create_loss_optimizer() print len(tf.all_variables()) self.saver = tf.train.Saver(var_list=tf.all_variables()) if load_model == False: init = tf.initialize_all_variables() self.sess.run(init) else: ckpt = tf.train.get_checkpoint_state(checkpoint_folder) self.saver.restore(self.sess, ckpt.model_checkpoint_path) print "Loaded model:",ckpt.model_checkpoint_path self.sess.run(tf.all_variables())
def _discriminator_model(sess, features, disc_input): # Fully convolutional model mapsize = 3 layers = [64, 128, 256, 512] old_vars = tf.all_variables() model = Model('DIS', 2*disc_input - 1) for layer in range(len(layers)): nunits = layers[layer] stddev_factor = 2.0 model.add_conv2d(nunits, mapsize=mapsize, stride=2, stddev_factor=stddev_factor) model.add_batch_norm() model.add_relu() # Finalization a la "all convolutional net" model.add_conv2d(nunits, mapsize=mapsize, stride=1, stddev_factor=stddev_factor) model.add_batch_norm() model.add_relu() model.add_conv2d(nunits, mapsize=1, stride=1, stddev_factor=stddev_factor) model.add_batch_norm() model.add_relu() # Linearly map to real/fake and return average score # (softmax will be applied later) model.add_conv2d(1, mapsize=1, stride=1, stddev_factor=stddev_factor) model.add_mean() new_vars = tf.all_variables() disc_vars = list(set(new_vars) - set(old_vars)) return model.get_output(), disc_vars
def reset_module(self, module): temp = set(tf.all_variables()) module.backward(module.loss) self.sess.run(tf.initialize_variables(set(tf.all_variables()) - temp))
def train_dnn(data_folder, model_file): # Output of dnn using input x y = DNN(x) print "Loading training pickles..." train_set = import_data.load_dataset(data_folder + '/train_data.pickle', data_folder + '/train_labels.pickle', context_frames=context_frames) # Create the dir for the model if not os.path.isdir('%s/models/%s'%(save_loc,start_date)): try: os.makedirs('%s/models/%s'%(save_loc,start_date)) except OSError: if not os.path.isdir('%s/models/%s'%(save_loc,start_date)): raise # Create the session global sess sess = tf.InteractiveSession() global summary_op global train_writer global saver saver = tf.train.Saver() # Op for merging all summaries summary_op = tf.merge_all_summaries() # Summary Writer train_writer = tf.train.SummaryWriter('%ssummaries/%s'%(save_loc, start_date), sess.graph) # Cost function cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_)) # Optimizer # For gradient descend, learning rate = 0.002 (see Hinton et al.) # For AdamOptimizer, learning rate = 0.0001 (better than default (exp 1.2)) if (optimizer_name == 'Adam'): # Hacky solution for always making sure that the beta2_power var # is always initialized temp = set(tf.all_variables()) optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost) sess.run(tf.initialize_variables(set(tf.all_variables()) - temp)) else: optimizer = tf.train.GradientDescentOptimizer(0.02).minimize(cost) if model_file: saver.restore(sess, model_file) print "Model restored" else: # Initialization init_op = tf.initialize_all_variables() sess.run(init_op) print("Training network. Date: %s" % start_date) train(train_set, y, cost, optimizer) save_path = saver.save(sess, "%s/models/%s/model.ckpt"%(save_loc, start_date)) print("Model saved in file: %s" % save_path) print("Summaries written to summaries/%s" % start_date) evaluate_dnn(data_folder, y)
def sample(self, args): if self.model is None: # Allow sample to be usable outside of main() with open(os.path.join(args.save_dir, 'config.pkl')) as f: saved_args = cPickle.load(f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl')) as f: self.chars, self.vocab = cPickle.load(f) self.model = Model(saved_args, True) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) return self.model.sample(sess, self.chars, self.vocab, args.n, args.prime) else: with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) return self.model.sample(sess, self.chars, self.vocab, args.n, args.prime) return None
def _create_initializers(self): if self._var_count != len(tf.all_variables()): self._saver = tf.train.Saver(tf.all_variables(), max_to_keep=5) self._init = tf.initialize_all_variables() self._check_inited = tf.assert_variables_initialized() self._var_count = len(tf.all_variables()) if self._summary_writer: self._summaries = tf.merge_all_summaries() self._summary_writer.add_graph(tf.get_default_graph().as_graph_def())
def _create_initializers(self): if self._var_count != len(tf.all_variables()): save_dir = os.path.dirname(self._save_path) if self._save_path else None if save_dir and not tf.gfile.IsDirectory(save_dir): tf.gfile.MakeDirs(save_dir) self._saver = tf.train.Saver(tf.all_variables(), max_to_keep=5) self._init = tf.initialize_all_variables() self._check_inited = tf.assert_variables_initialized() self._var_count = len(tf.all_variables()) if self._summary_writer: self._summaries = tf.merge_all_summaries() self._summary_writer.add_graph(tf.get_default_graph())
def testGraphMatchesImmediate(self): """Ensures that the vars line up between the two modes.""" with tf.Graph().as_default(): input_pt = prettytensor.wrap(self.input) self.BuildLargishGraph(input_pt) normal_names = sorted([v.name for v in tf.all_variables()]) with tf.Graph().as_default(): template = prettytensor.template('input') self.BuildLargishGraph(template).construct( input=prettytensor.wrap(self.input)) template_names = sorted([v.name for v in tf.all_variables()]) self.assertSequenceEqual(normal_names, template_names)
def register_all_variables_and_grards(y): all_vars = tf.all_variables() for v in tf.all_variables(): tf.histogram_summary('hist_'+v.name, v) if v.get_shape() == []: tf.scalar_summary('scal_'+v.name, v) grad_vars = opt.compute_gradients(y,all_vars) #[ (T(gradient),variable) ] for (dldw,v) in grad_vars: if dldw != None: tf.histogram_summary('hist_'+v.name+'dW', dldw) if v.get_shape() == [] or dldw.get_shape() == []: tf.scalar_summary('scal_'+v.name+'dW', dldw) l2norm_dldw = tf.reduce_mean(tf.square(dldw)) tf.scalar_summary('scal_'+v.name+'dW_l2_norm', l2norm_dldw)
def __init__(self, model_dir): """Create G2P model and initialize or load parameters in session.""" self.model_dir = model_dir # Preliminary actions before model creation. if not (model_dir and os.path.exists(os.path.join(self.model_dir, "model"))): return #Load model parameters. num_layers, size = data_utils.load_params(self.model_dir) batch_size = 1 # We decode one word at a time. # Load vocabularies self.gr_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.grapheme")) self.ph_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme")) self.rev_ph_vocab =\ data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"), reverse=True) self.session = tf.Session() # Create model. print("Creating %d layers of %d units." % (num_layers, size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, size, num_layers, 0, batch_size, 0, 0, forward_only=True) self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) # Check for saved models and restore them. print("Reading model parameters from %s" % self.model_dir) self.model.saver.restore(self.session, os.path.join(self.model_dir, "model"))
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in xrange(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in xrange(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start) if (e * data_loader.num_batches + b) % args.save_every == 0: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print "model saved to {}".format(checkpoint_path)
def get_embeddings(sess): embeddings = dict() for num_cat_value in np.unique(config.feature_desc): embed_prefix_name = 'embeddings' + str(num_cat_value) embed_var = [v for v in tf.all_variables() if v.name.startswith(embed_prefix_name)][0] embeddings[num_cat_value] = sess.run(embed_var) return embeddings
def cluster_feature_analysis(sess, user_ids): # Get trained parameters lstm_vars = [v for v in tf.all_variables() if v.name.startswith('lstm')] matrix_var = sess.run(lstm_vars[0]) bias_var = sess.run(lstm_vars[1]) # Split the gates matrix_i, matrix_j, matrix_f, matrix_o = sess.run(array_ops.split(1, 4, matrix_var)) bias_i, bias_j, bias_f, bias_o = sess.run(array_ops.split(0, 4, bias_var)) dict_i, dict_j, dict_f, dict_o = dict(), dict(), dict(), dict() for feature in range(len(config.feature_desc)): dict_i[feature] = [] dict_j[feature] = [] dict_f[feature] = [] dict_o[feature] = [] for user_id in user_ids: print user_id gates_i, gates_j, gates_f, gates_o = feature_importance(sess, user_id, matrix_i, matrix_j, matrix_f, matrix_o, bias_i, bias_j, bias_f, bias_o) for feature in range(len(config.feature_desc)): dict_i[feature].append(gates_i[feature]) dict_j[feature].append(gates_j[feature]) dict_f[feature].append(gates_f[feature]) dict_o[feature].append(gates_o[feature]) return dict_i, dict_j, dict_f, dict_o
def testStochasticVariablesWithCallableInitializer(self): shape = (10, 20) def sigma_init(shape, dtype, partition_info): _ = partition_info return tf.ones(shape, dtype=dtype) * 2. with tf.variable_scope( "stochastic_variables", custom_getter=sv.make_stochastic_variable_getter( dist_cls=dist.NormalWithSoftplusSigma, dist_kwargs={"validate_args": True}, param_initializers={ "mu": np.ones( shape, dtype=np.float32) * 4., "sigma": sigma_init })): v = tf.get_variable("sv", shape) for var in tf.all_variables(): if "mu" in var.name: mu_var = var if "sigma" in var.name: sigma_var = var v = tf.convert_to_tensor(v) with self.test_session() as sess: sess.run(tf.initialize_all_variables()) self.assertAllEqual(np.ones(shape) * 4., sess.run(mu_var)) self.assertAllEqual(np.ones(shape) * 2., sess.run(sigma_var)) self.assertEqual(shape, sess.run(v).shape)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = {} for v in tf.all_variables(): if v in tf.trainable_variables(): restore_name = variable_averages.average_name(v) else: restore_name = v.op.name variables_to_restore[restore_name] = v saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def add_aux_layer(self, aux_attrs): layer_name = aux_attrs['layer_name'] with tf.variable_scope(layer_name): init_op = tf.initialize_all_variables() saver = tf.train.Saver(tf.all_variables()) tensors_dict = {'%_init_op' % layer_name: init_op, '%s_saver_op' % layer_name: saver} return tensors_dict
def restore(checkpoint_file=’hello.chk’): x = tf.Variable(-1.0, validate_shape=False, name='x') y = tf.Variable(-1.0, validate_shape=False, name='y') with tf.Session() as session: saver = tf.train.Saver() saver.restore(session, checkpoint_file) print(session.run(tf.all_variables()))
def train_model(args): data_loader = InputHandler(args.data_dir, args.batch_size, args.result_length) args.vocabulary_size = data_loader.vocabulary_size # Save the original files, so that we can load the model when sampling with open(os.path.join(args.snapshots_dir, CONFIGURATION_FILE), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.snapshots_dir, WORDS_VOCABULARY_FILE), 'wb') as f: cPickle.dump((data_loader.words, data_loader.vocabulary), f) model = RNNModel(args.rnn_size, args.network_depth, args.batch_size, args.result_length, args.vocabulary_size, args.gradient) with tf.Session() as session: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in range(args.num_epochs): session.run(tf.assign(model.lr, args.training_rate * (args.decay_rate ** e))) data_loader.set_batch_pointer_to_zero() state = model.initial_state.eval() for b in range(data_loader.num_batches): x, y = data_loader.get_next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = session.run([model.cost, model.final_state, model.train_op], feed) if (e * data_loader.num_batches + b) % args.snapshot == 0 \ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result snapshot_path = os.path.join(args.snapshots_dir, 'model.ckpt') saver.save(session, snapshot_path, global_step = e * data_loader.num_batches + b) print("Model snapshot was taken to {}".format(snapshot_path))
def guarantee_initialized_variables(self, session, list_of_variables = None): if list_of_variables is None: list_of_variables = tf.all_variables() uninitialized_variables = list(tf.get_variable(name) for name in session.run(tf.report_uninitialized_variables(list_of_variables))) session.run(tf.initialize_variables(uninitialized_variables)) return uninitialized_variables
def testGradient(self): with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess: batch_size = 1 cell_size = 3 input_size = 2 # Inputs x = tf.zeros([batch_size, input_size]) h = tf.zeros([batch_size, cell_size]) output = gru_ops.GRUBlockCell(cell_size)(x, h) sess.run([tf.initialize_all_variables()]) all_variables = tf.all_variables() [w_ru, b_ru, w_c, b_c] = all_variables[:4] error_x = tf.test.compute_gradient_error(x, (batch_size, input_size), output[0], (batch_size, cell_size)) error_h = tf.test.compute_gradient_error(h, (batch_size, cell_size), output[0], (batch_size, cell_size)) error_w_ru = tf.test.compute_gradient_error( w_ru, (input_size + cell_size, 2 * cell_size), output[0], (batch_size, cell_size) ) error_w_c = tf.test.compute_gradient_error( w_c, (input_size + cell_size, cell_size), output[0], (batch_size, cell_size) ) error_b_ru = tf.test.compute_gradient_error(b_ru, (2 * cell_size,), output[0], (batch_size, cell_size)) error_b_c = tf.test.compute_gradient_error(b_c, (cell_size,), output[0], (batch_size, cell_size)) eps = 1e-4 self.assertLess(error_x, eps) self.assertLess(error_h, eps) self.assertLess(error_w_ru, eps) self.assertLess(error_w_c, eps) self.assertLess(error_b_ru, eps) self.assertLess(error_b_c, eps)
def run_model_image(checkpoint_file, image): """ Run an image through the trained model and vizualize its activations :param checkpoint_file: The saved model parameters for the basic model :param image: The supplied image (same dimensions as training). """ with tf.Graph().as_default(): image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 1]) image = tf.image.per_image_whitening(image) image = tf.reshape(image, [1, IMAGE_SIZE, IMAGE_SIZE, 1]) image = tf.cast(image, tf.float32) relu1, relu2, relu3 = inference(train=False, images=image, visualize=True) saver = tf.train.Saver(tf.all_variables()) sess = tf.Session() saver.restore(sess=sess, save_path=checkpoint_file) units = relu1.eval(session=sess) plotNNFilter(units) units = relu2.eval(session=sess) plotNNFilter(units) units = relu3.eval(session=sess) plotNNFilter(units)
def train(self, data=0, steps=-1, dropout=None, display_step=10, test_step=200, batch_size=10, do_resume=False): # epochs=-1, if data: self.data = data steps = 9999999 if steps == -1 else steps session = self.session # with tf.device(_cpu): # import tensorflow.contrib.layers as layers # t = tf.verify_tensor_all_finite(t, msg) tf.add_check_numerics_ops() try: self.summaries = tf.summary.merge_all() except: self.summaries = tf.merge_all_summaries() try: self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph) # except: self.summary_writer = tf.train.SummaryWriter(current_logdir(), session.graph) # if not dropout: dropout = 1. # keep all x = self.x y = self.y keep_prob = self.keep_prob try: saver = tf.train.Saver(tf.global_variables()) except: saver = tf.train.Saver(tf.all_variables()) snapshot = self.name + str(get_last_tensorboard_run_nr()) checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if do_resume and checkpoint: print("LOADING " + checkpoint + " !!!") saver.restore(session, checkpoint) try: session.run([tf.global_variables_initializer()]) except: session.run([tf.initialize_all_variables()]) step = 0 # show first while step < steps: batch_xs, batch_ys = self.next_batch(batch_size, session) # print("step %d \r" % step)# end=' ') # tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size) # Fit training using batch data feed_dict = {x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True} loss, _ = session.run([self.cost, self.optimizer], feed_dict=feed_dict) if step % display_step == 0: seconds = int(time.time()) - start # Calculate batch accuracy, loss feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False} acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed) # self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ') if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!") # restore! if step % test_step == 0: self.test(step) if step % save_step == 0 and step > 0: print("SAVING snapshot %s" % snapshot) saver.save(session, checkpoint_dir + snapshot + ".ckpt", self.global_step) step += 1 print("\nOptimization Finished!") self.test(step, number=10000) # final test
def restore_fn(self, checkpoint_path, from_detection_checkpoint=True): """Return callable for loading a checkpoint into the tensorflow graph. Args: checkpoint_path: path to checkpoint to restore. from_detection_checkpoint: whether to restore from a full detection checkpoint (with compatible variable names) or to restore from a classification checkpoint for initialization prior to training. Returns: a callable which takes a tf.Session as input and loads a checkpoint when run. """ variables_to_restore = {} for variable in tf.all_variables(): if variable.op.name.startswith(self._extract_features_scope): var_name = variable.op.name if not from_detection_checkpoint: var_name = ( re.split('^' + self._extract_features_scope + '/', var_name)[-1]) variables_to_restore[var_name] = variable # TODO: Load variables selectively using scopes. variables_to_restore = ( variables_helper.get_variables_available_in_checkpoint( variables_to_restore, checkpoint_path)) saver = tf.train.Saver(variables_to_restore) def restore(sess): saver.restore(sess, checkpoint_path) return restore
def drawGraph(self, n_row, n_latent, n_col): with tf.name_scope('matDecomp'): self._p = tf.placeholder(tf.float32, shape=[None, n_col]) self._c = tf.placeholder(tf.float32, shape=[None, n_col]) self._lambda = tf.placeholder(tf.float32) self._index = tf.placeholder(tf.float32, shape=[None, n_row]) self._A = tf.Variable(tf.truncated_normal([n_row, n_latent])) self._B = tf.Variable(tf.truncated_normal([n_latent, n_col])) self._h = tf.matmul(tf.matmul(self._index, self._A), self._B) weighted_loss = tf.reduce_mean(tf.mul(self._c, tf.squared_difference(self._p, self._h))) self._weighted_loss = weighted_loss l2_A = tf.reduce_sum(tf.square(self._A)) l2_B = tf.reduce_sum(tf.square(self._B)) n_w = tf.constant(n_row * n_latent + n_latent * n_col, tf.float32) l2 = tf.truediv(tf.add(l2_A, l2_B), n_w) reg_term = tf.mul(self._lambda, l2) self._loss = tf.add(weighted_loss, reg_term) self._mask = tf.placeholder(tf.float32, shape=[n_row, n_col]) one = tf.constant(1, tf.float32) pred = tf.cast(tf.greater_equal(tf.matmul(self._A, self._B), one), tf.float32) cor = tf.mul(tf.cast(tf.equal(pred, self._p), tf.float32), self._c) self._vali_err = tf.reduce_sum(tf.mul(cor, self._mask)) self._saver = tf.train.Saver([v for v in tf.all_variables() if v.name.find('matDecomp') != -1]) tf.scalar_summary('training_weighted_loss_l2', self._loss) tf.scalar_summary('validation_weighted_loss', self._weighted_loss) merged = tf.merge_all_summaries()
def predict(args): with open(os.path.join(args.save_dir, 'config.pkl')) as f: saved_args = cPickle.load(f) #saved_args.batch_size = args.batch_size saved_args.batch_size = 1 #saved_args.seq_length = args.seq_length with open(os.path.join(args.save_dir, 'chars_vocab.pkl')) as f: chars, vocab, idx2classid, classid2idx = cPickle.load(f) model = Model(saved_args, infer=False) print(args) def predict_sentence(sentence): return model.predict(sentence, saved_args.seq_length, sess, vocab, idx2classid) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) #print model.predict(sess, chars, vocab, args.n, args.prime) sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: print(predict_sentence(sentence)) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def predict(self,x_test): x_test = self.normalize_xtest(x_test) X = tf.placeholder("float", [None, self.inputNumber]) Y = tf.placeholder("float", [None, self.outputNumber]) W1 = self.init_weight([self.inputNumber, self.layerOne], 'W1') B1 = self.init_bias([self.layerOne], 'B1') W2 = self.init_weight([self.layerOne, self.layerTwo], 'W2') B2 = self.init_bias([self.layerTwo], 'B2') W3 = self.init_weight([self.layerTwo,self.outputNumber], 'W3') B3 = self.init_bias([self.outputNumber], 'B3') L2 = self.model(X, W1, B1) L3 = self.model(L2, W2, B2) y_out = tf.nn.relu(tf.matmul(L3, W3) + B3) cost = tf.reduce_mean(tf.square((Y - y_out))) train_op = tf.train.AdamOptimizer(self.learningRate).minimize(cost) pridict_op = tf.nn.relu(tf.matmul(L3, W3) + B3) sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) saver = tf.train.Saver(tf.all_variables()) saver.restore(sess,self.savePath) y_predict = sess.run(pridict_op, feed_dict={X:x_test}) return self.denormalize_ypredict(y_predict)
def evaluate (tfrecord_file_paths, theme): eval_dir = 'workspace/{}/eval'.format(theme) with tf.Graph().as_default() as g: images, labels = distorted_inputs(tfrecord_file_paths=tfrecord_file_paths) logits = cifar10.inference(tf.image.resize_images(images, cifar10.IMAGE_SIZE, cifar10.IMAGE_SIZE)) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = {} for v in tf.all_variables(): if v in tf.trainable_variables(): restore_name = variable_averages.average_name(v) else: restore_name = v.op.name variables_to_restore[restore_name] = v saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(eval_dir, g) eval_once(theme, saver, summary_writer, top_k_op, summary_op)
def testPrepareSessionWithReadyForLocalInitOp(self): with tf.Graph().as_default(): v = tf.Variable(1, name="v") w = tf.Variable( v, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(v).eval()) self.assertEqual(False, tf.is_variable_initialized(w).eval()) sm2 = tf.train.SessionManager( ready_op=tf.report_uninitialized_variables(), ready_for_local_init_op=tf.report_uninitialized_variables( tf.all_variables()), local_init_op=w.initializer) sess = sm2.prepare_session("", init_op=v.initializer) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval( session=sess)) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval( session=sess)) self.assertEquals(1, sess.run(v)) self.assertEquals(1, sess.run(w))
def _setup_np_inference(self, np_images, checkpoint_path): """Sets up and restores inference graph, creates and caches a Session.""" tf.logging.info('Restoring model weights.') # Define inference over an image placeholder. _, height, width, _ = np.shape(np_images) image_placeholder = tf.placeholder( tf.float32, shape=(None, height, width, 3)) # Preprocess batch. preprocessed = self.preprocess_data(image_placeholder, is_training=False) # Unscale and jpeg encode preprocessed images for display purposes. im_strings = preprocessing.unscale_jpeg_encode(preprocessed) # Do forward pass to get embeddings. embeddings = self.forward(preprocessed, is_training=False) # Create a saver to restore model variables. tf.train.get_or_create_global_step() saver = tf.train.Saver(tf.all_variables()) self._image_placeholder = image_placeholder self._batch_encoded = embeddings self._np_inf_tensor_dict = { 'embeddings': embeddings, 'raw_image_strings': im_strings, } # Create a session and restore model variables. self._sess = tf.Session() saver.restore(self._sess, checkpoint_path)
def testWaitForSessionLocalInit(self): server = tf.train.Server.create_local_server() with tf.Graph().as_default() as graph: v = tf.Variable(1, name="v") w = tf.Variable( v, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") sm = tf.train.SessionManager( graph=graph, ready_op=tf.report_uninitialized_variables(), ready_for_local_init_op=tf.report_uninitialized_variables( tf.all_variables()), local_init_op=w.initializer) # Initialize v but not w s = tf.Session(server.target, graph=graph) s.run(v.initializer) sess = sm.wait_for_session(server.target, max_wait_secs=3) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval( session=sess)) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval( session=sess)) self.assertEquals(1, sess.run(v)) self.assertEquals(1, sess.run(w))
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size) decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE, global_step, decay_steps, cifar10.LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope: # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. loss = tower_loss(scope) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def main(_): parser = argparse.ArgumentParser(description='TransE.') parser.add_argument('--data', dest='data_dir', type=str, help="Data folder", default='./data/FB15k/') parser.add_argument('--lr', dest='lr', type=float, help="Learning rate", default=1e-2) parser.add_argument("--dim", dest='dim', type=int, help="Embedding dimension", default=256) parser.add_argument("--batch", dest='batch', type=int, help="Batch size", default=32) parser.add_argument("--worker", dest='n_worker', type=int, help="Evaluation worker", default=3) parser.add_argument("--generator", dest='n_generator', type=int, help="Data generator", default=10) parser.add_argument("--eval_batch", dest="eval_batch", type=int, help="Evaluation batch size", default=32) parser.add_argument("--save_dir", dest='save_dir', type=str, help="Model path", default='./transE') parser.add_argument("--load_model", dest='load_model', type=str, help="Model file", default="") parser.add_argument("--save_per", dest='save_per', type=int, help="Save per x iteration", default=1) parser.add_argument("--eval_per", dest='eval_per', type=int, help="Evaluate every x iteration", default=5) parser.add_argument("--max_iter", dest='max_iter', type=int, help="Max iteration", default=30) parser.add_argument("--summary_dir", dest='summary_dir', type=str, help="summary directory", default='./transE_summary/') parser.add_argument("--keep", dest='drop_out', type=float, help="Keep prob (1.0 keep all, 0. drop all)", default=0.5) parser.add_argument("--optimizer", dest='optimizer', type=str, help="Optimizer", default='gradient') parser.add_argument("--prefix", dest='prefix', type=str, help="model_prefix", default='DEFAULT') parser.add_argument("--loss_weight", dest='loss_weight', type=float, help="Weight on parameter loss", default=1e-2) parser.add_argument("--neg_weight", dest='neg_weight', type=float, help="Sampling weight on negative examples", default=0.5) parser.add_argument("--save_per_batch", dest='save_per_batch', type=int, help='evaluate and save after every x batches', default=1000) parser.add_argument( "--outfile_prefix", dest='outfile_prefix', type=str, help='The filename of output file is outfile_prefix.txt', default='test_output') parser.add_argument("--neg_sample", dest='neg_sample', type=int, help='No. of neg. samples per (h,r) or (t,r) pair', default=5) parser.add_argument( "--fanout_thresh", dest='fanout_thresh', type=int, help='threshold on fanout of entities to be considered', default=2) parser.add_argument('--annoy_n_trees', dest='annoy_n_trees', type=int, help='builds a forest of n_trees trees', default=10) parser.add_argument( '--annoy_search_k', dest='annoy_search_k', type=int, help='During the query it will inspect up to search_k nodes', default=-1) parser.add_argument('--eval_after', dest='eval_after', type=int, help='Evaluate after this many no. of epochs', default=4) args = parser.parse_args() if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) print(args) model = TransE(args.data_dir, embed_dim=args.dim, fanout_thresh=args.fanout_thresh, eval_batch=args.eval_batch) train_pos_neg_list, \ train_loss, train_op = train_ops(model, learning_rate=args.lr, optimizer_str=args.optimizer, regularizer_weight=args.loss_weight) get_embedding_op = embedding_ops(model) # test_input, test_head, test_tail = test_ops(model) f1 = open('%s/%s.txt' % (args.save_dir, args.outfile_prefix), 'w') with tf.Session() as session: tf.global_variables_initializer().run() all_var = tf.all_variables() print 'printing all', len(all_var), ' TF variables:' for var in all_var: print var.name, var.get_shape() saver = tf.train.Saver(restore_sequentially=True) iter_offset = 0 if args.load_model is not None and os.path.exists(args.load_model): saver.restore(session, args.load_model) iter_offset = int( args.load_model.split('.')[-2].split('_')[-1]) + 1 f1.write("Load model from %s, iteration %d restored.\n" % (args.load_model, iter_offset)) total_inst = model.n_train best_filtered_mean_rank = float("inf") f1.write("preparing training data...\n") nbatches_count = 0 # training_data_list = [] training_data_pos_neg_list = [] for dat in model.raw_training_data(batch_size=args.batch): # raw_training_data_queue.put(dat) # training_data_list.append(dat) ps_list = data_generator_func(dat, model.tr_h, model.hr_t, model.n_entity, args.neg_sample, model.n_relation) assert ps_list is not None training_data_pos_neg_list.append(ps_list) nbatches_count += 1 f1.write("training data prepared.\n") f1.write("No. of batches : %d\n" % nbatches_count) f1.close() start_time = timeit.default_timer() for n_iter in range(iter_offset, args.max_iter): accu_loss = 0. ninst = 0 # f1.close() for batch_id in range(nbatches_count): f1 = open('%s/%s.txt' % (args.save_dir, args.outfile_prefix), 'a') pos_neg_list = training_data_pos_neg_list[batch_id] #print data_e l, _ = session.run([train_loss, train_op], {train_pos_neg_list: pos_neg_list}) accu_loss += l ninst += len(pos_neg_list) # print('len(pos_neg_list) = %d\n' % len(pos_neg_list)) if ninst % (5000) is not None: f1.write('[%d sec](%d/%d) : %.2f -- loss : %.5f \n' % (timeit.default_timer() - start_time, ninst, total_inst, float(ninst) / total_inst, l)) f1.close() f1 = open('%s/%s.txt' % (args.save_dir, args.outfile_prefix), 'a') f1.write("") f1.write("iter %d avg loss %.5f, time %.3f\n" % (n_iter, accu_loss / ninst, timeit.default_timer() - start_time)) save_path = saver.save( session, os.path.join( args.save_dir, "TransE_" + str(args.prefix) + "_" + str(n_iter) + ".ckpt")) f1.write("Model saved at %s\n" % save_path) with tf.device('/cpu'): if n_iter > args.eval_after and (n_iter % args.eval_per == 0 or n_iter == args.max_iter - 1): t = AnnoyIndex(model.embed_dim, metric='euclidean') ent_embedding, rel_embedding = session.run( get_embedding_op, {train_pos_neg_list: pos_neg_list}) # sess = tf.InteractiveSession() # with sess.as_default(): # ent_embedding = model.ent_embeddings.eval() print np.asarray(ent_embedding).shape print np.asarray(rel_embedding).shape # print ent_embedding[10,:] # print rel_embedding[10,:] print 'Index creation started' for i in xrange(model.n_entity): v = ent_embedding[i, :] t.add_item(i, v) t.build(args.annoy_n_trees) print 'Index creation completed' # n = int(0.0005 * model.n_entity) n = 1000 # search_k = int(n * args.annoy_n_trees/100.0) search_k = 1000 print 'No. of items = %d' % t.get_n_items() print sum(t.get_item_vector(0)) print sum(ent_embedding[0, :]) assert sum(t.get_item_vector(0)) == sum( ent_embedding[0, :]) eval_dict = zip([model.raw_training_data], ['TRAIN']) for data_func, test_type in eval_dict: accu_mean_rank_h = list() accu_mean_rank_t = list() accu_filtered_mean_rank_h = list() accu_filtered_mean_rank_t = list() evaluation_count = 0 evaluation_batch = [] batch_id = 0 for testing_data in data_func( batch_size=args.eval_batch): batch_id += 1 print 'test_type: %s, batch id: %d' % (test_type, batch_id) head_ids = list() tail_ids = list() for i in xrange(testing_data.shape[0]): # try: # print (ent_embedding[testing_data[i,0],:] + rel_embedding[testing_data[i,2],:]) tail_ids.append( t.get_nns_by_vector( (ent_embedding[testing_data[i, 0], :] + rel_embedding[testing_data[i, 2], :]), n, search_k)) head_ids.append( t.get_nns_by_vector( (ent_embedding[testing_data[i, 1], :] - rel_embedding[testing_data[i, 2], :]), n, search_k)) # except: # print 'i = %d' % i # print 'testing_data[i,0] = %d' % testing_data[i,0] # print 'testing_data[i,1] = %d' % testing_data[i,1] # print 'testing_data[i,2] = %d' % testing_data[i,2] # print head_ids # print tail_ids evaluation_batch.append( (testing_data, head_ids, tail_ids)) evaluation_count += 1 if batch_id > 52662: break while evaluation_count > 0: evaluation_count -= 1 # (mrh, fmrh), (mrt, fmrt) = result_queue.get() (mrh, fmrh), (mrt, fmrt) = worker_func( evaluation_batch[evaluation_count - 1], model.hr_t, model.tr_h) accu_mean_rank_h += mrh accu_mean_rank_t += mrt accu_filtered_mean_rank_h += fmrh accu_filtered_mean_rank_t += fmrt f1.write( "[%s] ITER %d [HEAD PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f\n" % (test_type, n_iter, np.mean(accu_mean_rank_h), np.mean(accu_filtered_mean_rank_h), np.mean( np.asarray(accu_mean_rank_h, dtype=np.int32) < 10), np.mean( np.asarray(accu_filtered_mean_rank_h, dtype=np.int32) < 10))) f1.write( "[%s] ITER %d [TAIL PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f\n" % (test_type, n_iter, np.mean(accu_mean_rank_t), np.mean(accu_filtered_mean_rank_t), np.mean( np.asarray(accu_mean_rank_t, dtype=np.int32) < 10), np.mean( np.asarray(accu_filtered_mean_rank_t, dtype=np.int32) < 10))) f1.close()
def __init__(self, phase, visualize, output_dir, batch_size, initial_learning_rate, steps_per_checkpoint, model_dir, target_embedding_size, attn_num_hidden, attn_num_layers, clip_gradients, max_gradient_norm, session, load_model, gpu_id, use_gru, use_distance=True, max_image_width=160, max_image_height=60, max_prediction_length=50, channels=1, reg_val=0): self.use_distance = use_distance # We need resized width, not the actual width max_resized_width = 1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT self.max_original_width = max_image_width self.max_width = int(math.ceil(max_resized_width)) self.max_label_length = max_prediction_length self.encoder_size = int(math.ceil(1. * self.max_width / 4)) self.decoder_size = max_prediction_length + 2 self.buckets = [(self.encoder_size, self.decoder_size)] if gpu_id >= 0: device_id = '/gpu:' + str(gpu_id) else: device_id = '/cpu:0' self.device_id = device_id if not os.path.exists(model_dir): os.makedirs(model_dir) if phase == 'test': batch_size = 1 logging.info('phase: %s', phase) logging.info('model_dir: %s', model_dir) logging.info('load_model: %s', load_model) logging.info('output_dir: %s', output_dir) logging.info('steps_per_checkpoint: %d', steps_per_checkpoint) logging.info('batch_size: %d', batch_size) logging.info('learning_rate: %f', initial_learning_rate) logging.info('reg_val: %d', reg_val) logging.info('max_gradient_norm: %f', max_gradient_norm) logging.info('clip_gradients: %s', clip_gradients) logging.info('max_image_width %f', max_image_width) logging.info('max_prediction_length %f', max_prediction_length) logging.info('channels: %d', channels) logging.info('target_embedding_size: %f', target_embedding_size) logging.info('attn_num_hidden: %d', attn_num_hidden) logging.info('attn_num_layers: %d', attn_num_layers) logging.info('visualize: %s', visualize) if use_gru: logging.info('using GRU in the decoder.') self.reg_val = reg_val self.sess = session self.steps_per_checkpoint = steps_per_checkpoint self.model_dir = model_dir self.output_dir = output_dir self.batch_size = batch_size self.max_label_lengthc = int(self.max_label_length / 4) self.global_step = tf.Variable(0, trainable=False) self.phase = phase self.visualize = visualize self.learning_rate = initial_learning_rate self.clip_gradients = clip_gradients self.channels = channels if phase == 'train': self.forward_only = False else: self.forward_only = True with tf.device(device_id): self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32) self.height_float = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.float64) self.img_pl = tf.placeholder(tf.string, name='input_image_as_bytes') self.labels = tf.placeholder(tf.int32, shape=(self.batch_size, self.max_label_lengthc), name="input_labels_as_bytes") #self.label_data = tf.placeholder(tf.string, shape=[None,self.max_label_length], name="input_labels_as_bs") self.img_data = tf.cond(tf.less(tf.rank(self.img_pl), 1), lambda: tf.expand_dims(self.img_pl, 0), lambda: self.img_pl) self.img_data = tf.map_fn(self._prepare_image, self.img_data, dtype=tf.float32) num_images = tf.shape(self.img_data)[0] # TODO: create a mask depending on the image/batch size self.encoder_masks = [] for i in xrange(self.encoder_size + 1): self.encoder_masks.append(tf.tile([[1.]], [num_images, 1])) self.decoder_inputs = [] self.target_weights = [] for i in xrange(self.decoder_size + 1): self.decoder_inputs.append(tf.tile([1], [num_images])) if i < self.decoder_size: self.target_weights.append(tf.tile([1.], [num_images])) else: self.target_weights.append(tf.tile([0.], [num_images])) cnn_model = CNN(self.img_data, not self.forward_only) self.conv_output = cnn_model.tf_output() self.perm_conv_output = tf.transpose(self.conv_output, perm=[1, 0, 2]) self.attention_decoder_model = Seq2SeqModel( encoder_masks=self.encoder_masks, encoder_inputs_tensor=self.perm_conv_output, labels=self.labels, decoder_inputs=self.decoder_inputs, target_weights=self.target_weights, batch_size=self.batch_size, target_vocab_size=len(DataGen.CHARMAP), buckets=self.buckets, target_embedding_size=target_embedding_size, attn_num_layers=attn_num_layers, attn_num_hidden=attn_num_hidden, forward_only=self.forward_only, use_gru=use_gru) table = tf.contrib.lookup.MutableHashTable( key_dtype=tf.int64, value_dtype=tf.string, default_value="", checkpoint=True, ) insert = table.insert( tf.constant(list(range(len(DataGen.CHARMAP))), dtype=tf.int64), tf.constant(DataGen.CHARMAP), ) with tf.control_dependencies([insert]): num_feed = [] prb_feed = [] for line in xrange(len(self.attention_decoder_model.output)): guess = tf.argmax( self.attention_decoder_model.output[line], axis=1) proba = tf.reduce_max(tf.nn.softmax( self.attention_decoder_model.output[line]), axis=1) num_feed.append(guess) prb_feed.append(proba) # Join the predictions into a single output string. trans_output = tf.transpose(num_feed) trans_output = tf.map_fn( lambda m: tf.foldr( lambda a, x: tf.cond( tf.equal(x, DataGen.EOS_ID), lambda: '', lambda: table.lookup(x) + a # pylint: disable=undefined-variable ), m, initializer=''), trans_output, dtype=tf.string) # Calculate the total probability of the output string. trans_outprb = tf.transpose(prb_feed) trans_outprb = tf.gather(trans_outprb, tf.range(tf.size(trans_output))) trans_outprb = tf.map_fn(lambda m: tf.foldr( lambda a, x: tf.multiply(tf.cast(x, tf.float64), a), m, initializer=tf.cast(1, tf.float64)), trans_outprb, dtype=tf.float64) self.prediction = tf.cond( tf.equal(tf.shape(trans_output)[0], 1), lambda: trans_output[0], lambda: trans_output, ) self.probability = tf.cond( tf.equal(tf.shape(trans_outprb)[0], 1), lambda: trans_outprb[0], lambda: trans_outprb, ) self.prediction = tf.identity(self.prediction, name='prediction') self.probability = tf.identity(self.probability, name='probability') if not self.forward_only: # train self.updates = [] self.summaries_by_bucket = [] params = tf.trainable_variables() opt = tf.train.AdadeltaOptimizer( learning_rate=initial_learning_rate) loss_op = self.attention_decoder_model.loss if self.reg_val > 0: reg_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) logging.info('Adding %s regularization losses', len(reg_losses)) logging.debug('REGULARIZATION_LOSSES: %s', reg_losses) loss_op = self.reg_val * tf.reduce_sum( reg_losses) + loss_op gradients, params = list( zip(*opt.compute_gradients(loss_op, params))) if self.clip_gradients: gradients, _ = tf.clip_by_global_norm( gradients, max_gradient_norm) # Summaries for loss, variables, gradients, gradient norms and total gradient norm. summaries = [ tf.summary.scalar("loss", loss_op), tf.summary.scalar("total_gradient_norm", tf.global_norm(gradients)) ] all_summaries = tf.summary.merge(summaries) self.summaries_by_bucket.append(all_summaries) # update op - apply gradients update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.updates.append( opt.apply_gradients(list(zip(gradients, params)), global_step=self.global_step)) self.saver_all = tf.train.Saver(tf.all_variables()) self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt") ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and load_model: # pylint: disable=no-member logging.info("Reading model parameters from %s", ckpt.model_checkpoint_path) self.saver_all.restore(self.sess, ckpt.model_checkpoint_path) else: logging.info("Created model with fresh parameters.") self.sess.run(tf.initialize_all_variables())
def __init__(self, g_net, d_net, z_sampler, data, model, sigma, digit, reg, lr, cilpc, batch_size, num_batches, plot_size, save_size, d_iters, data_name, data_path, path_output): # changed self.model = model self.data = data self.g_net = g_net self.d_net = d_net self.z_sampler = z_sampler self.x_dim = self.d_net.x_dim self.z_dim = self.g_net.z_dim self.sigma = sigma self.digit = digit self.regc = reg self.lr = lr self.cilpc = cilpc self.batch_size = batch_size self.num_batches = num_batches self.plot_size = plot_size self.save_size = save_size self.d_iters = d_iters self.data_name = data_name self.data_path = data_path self.path_output = path_output (self.data_td, self.label_td), (_, _) = mnist_db.load_data() self.data_td = np.reshape(self.data_td, (-1, 784)) self.data_td = normlization(self.data_td) self.x = tf.placeholder(tf.float32, [None, self.x_dim], name='x') # [None, 784] self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z') self.x_ = self.g_net(self.z) self.d = self.d_net(self.x, reuse=False) self.d_ = self.d_net(self.x_) self.g_loss = tf.reduce_mean(self.d_) self.d_loss = tf.reduce_mean(self.d) - tf.reduce_mean(self.d_) self.reg = tc.layers.apply_regularization( tc.layers.l1_regularizer(self.regc), weights_list=[var for var in tf.all_variables() if 'weights' in var.name] ) self.g_loss_reg = self.g_loss + self.reg self.d_loss_reg = self.d_loss + self.reg with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self.d_rmsprop = tf.train.RMSPropOptimizer(learning_rate=self.lr) # DP case grads_and_vars = self.d_rmsprop.compute_gradients(-1 * self.d_loss_reg, var_list=self.d_net.vars) dp_grads_and_vars = [] # noisy version for gv in grads_and_vars: # for each pair g = gv[ 0] # get the gradient, type in loop one: Tensor("gradients/AddN_37:0", shape=(4, 4, 1, 64), dtype=float32) # print g # shape of all vars if g is not None: # skip None case g = self.dpnoise(g, self.batch_size) dp_grads_and_vars.append((g, gv[1])) self.d_rmsprop_new = self.d_rmsprop.apply_gradients(dp_grads_and_vars) # should assign to a new optimizer # self.d_rmsprop = tf.train.RMSPropOptimizer(learning_rate=self.lr) \ # .minimize(-1*self.d_loss_reg, var_list=self.d_net.vars) # non-DP case self.g_rmsprop = tf.train.RMSPropOptimizer(learning_rate=self.lr) \ .minimize(-1 * self.g_loss_reg, var_list=self.g_net.vars) self.d_clip = [v.assign(tf.clip_by_value(v, -1 * self.cilpc, self.cilpc)) for v in self.d_net.vars] self.d_net_var_grad = [i for i in tf.gradients(self.d_loss_reg, self.d_net.vars) if i is not None] # explore the effect of noise on norm of D net variables's gradient vector, also remove None type self.norm_d_net_var_grad = [] gpu_options = tf.GPUOptions(allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) self.g_loss_store = [] # store loss of generator self.d_loss_store = [] # store loss of discriminator self.wdis_store = [] # store Wasserstein distance, new added
B=100 C=3 batch_size=128 data_size=2000 x_dim=(40,40) u_dim=2 k=.1 A=int(k*data_size) # number of samples we gather on each cycle policy_eval=PlanePolicy(1, x_dim, u_dim, "epolicy") policy_batch=PlanePolicy(batch_size, x_dim, u_dim, "epolicy", share=True) e2c = E2CPlaneModel(policy_batch.u, batch_size) for v in tf.all_variables(): print("%s : %s" % (v.name, v.get_shape())) e2c.buildLoss(lambd=.25) policy_batch.set_reward(e2c.loss) # drive towards area where prediction is weak sess=tf.InteractiveSession() # save both policy and E2C variables saver = tf.train.Saver(max_to_keep=num_episodes) def getXs(D,idx): p0s = D[idx,0:2].reshape((batch_size,-1)) u0v = D[idx,2:4] p1s = D[idx,4:6].reshape((batch_size,-1)) x0v = np.zeros((batch_size,1600))
sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=len(onehot_label), vocab_size=len(vocabulary), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=list(map(int, FLAGS.num_filters.split(","))), l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = 0 train_op = tf.train.AdamOptimizer(0.001).minimize(cnn.loss) saver = tf.train.Saver(tf.all_variables()) # Initialize all variables sess.run(tf.initialize_all_variables()) if FLAGS.use_word2vec: # initial matrix with random uniform if os.path.isfile(FLAGS.w2v_data_path): initE = np.load(FLAGS.w2v_data_path) else: initE = np.random.uniform( -1.0, 1.0, (len(vocabulary), FLAGS.embedding_dim)) # load any vectors from the word2vec print("Loading word2vec file {}\n".format(FLAGS.word2vec)) word_vectors = KeyedVectors.load_word2vec_format(
def __init__(self, source_vocab_size, target_vocab_size, en_de_seq_len, hidden_size, num_layers, batch_size, learning_rate, num_samples=1024, forward_only=False, beam_search=True, beam_size=10): ''' 初始化并创建模型 :param source_vocab_size:encoder输入的vocab size :param target_vocab_size: decoder输入的vocab size,这里跟上面一样 :param en_de_seq_len: 源和目的序列最大长度 :param hidden_size: RNN模型的隐藏层单元个数 :param num_layers: RNN堆叠的层数 :param batch_size: batch大小 :param learning_rate: 学习率 :param num_samples: 计算loss时做sampled softmax时的采样数 :param forward_only: 预测时指定为真 :param beam_search: 预测时是采用greedy search还是beam search :param beam_size: beam search的大小 ''' self.source_vocab_size = source_vocab_size self.target_vocab_size = target_vocab_size self.en_de_seq_len = en_de_seq_len self.hidden_size = hidden_size self.num_layers = num_layers self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.num_samples = num_samples self.forward_only = forward_only self.beam_search = beam_search self.beam_size = beam_size self.global_step = tf.Variable(0, trainable=False) output_projection = None softmax_loss_function = None # 定义采样loss函数,传入后面的sequence_loss_by_example函数 if num_samples > 0 and num_samples < self.target_vocab_size: w = tf.get_variable('proj_w', [hidden_size, self.target_vocab_size]) w_t = tf.transpose(w) b = tf.get_variable('proj_b', [self.target_vocab_size]) output_projection = (w, b) #调用sampled_softmax_loss函数计算sample loss,这样可以节省计算时间 def sample_loss(logits, labels): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, labels=labels, inputs=logits, num_sampled=num_samples, num_classes=self.target_vocab_size) softmax_loss_function = sample_loss self.keep_drop = tf.placeholder(tf.float32) # 定义encoder和decoder阶段的多层dropout RNNCell def create_rnn_cell(): encoDecoCell = tf.contrib.rnn.BasicLSTMCell(hidden_size) encoDecoCell = tf.contrib.rnn.DropoutWrapper(encoDecoCell, input_keep_prob=1.0, output_keep_prob=self.keep_drop) return encoDecoCell encoCell = tf.contrib.rnn.MultiRNNCell([create_rnn_cell() for _ in range(num_layers)]) # 定义输入的placeholder,采用了列表的形式 self.encoder_inputs = [] self.decoder_inputs = [] self.decoder_targets = [] self.target_weights = [] for i in range(en_de_seq_len[0]): self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None, ], name="encoder{0}".format(i))) for i in range(en_de_seq_len[1]): self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None, ], name="decoder{0}".format(i))) self.decoder_targets.append(tf.placeholder(tf.int32, shape=[None, ], name="target{0}".format(i))) self.target_weights.append(tf.placeholder(tf.float32, shape=[None, ], name="weight{0}".format(i))) # test模式,将上一时刻输出当做下一时刻输入传入 if forward_only: if beam_search:#如果是beam_search的话,则调用自己写的embedding_attention_seq2seq函数,而不是legacy_seq2seq下面的 self.beam_outputs, _, self.beam_path, self.beam_symbol = embedding_attention_seq2seq( self.encoder_inputs, self.decoder_inputs, encoCell, num_encoder_symbols=source_vocab_size, num_decoder_symbols=target_vocab_size, embedding_size=hidden_size, output_projection=output_projection, feed_previous=True) else: decoder_outputs, _ = tf.contrib.legacy_seq2seq.embedding_attention_seq2seq( self.encoder_inputs, self.decoder_inputs, encoCell, num_encoder_symbols=source_vocab_size, num_decoder_symbols=target_vocab_size, embedding_size=hidden_size, output_projection=output_projection, feed_previous=True) # 因为seq2seq模型中未指定output_projection,所以需要在输出之后自己进行output_projection if output_projection is not None: self.outputs = tf.matmul(decoder_outputs, output_projection[0]) + output_projection[1] else: # 因为不需要将output作为下一时刻的输入,所以不用output_projection decoder_outputs, _ = tf.contrib.legacy_seq2seq.embedding_attention_seq2seq( self.encoder_inputs, self.decoder_inputs, encoCell, num_encoder_symbols=source_vocab_size, num_decoder_symbols=target_vocab_size, embedding_size=hidden_size, output_projection=output_projection, feed_previous=False) self.loss = tf.contrib.legacy_seq2seq.sequence_loss( decoder_outputs, self.decoder_targets, self.target_weights, softmax_loss_function=softmax_loss_function) # Initialize the optimizer opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08) self.optOp = opt.minimize(self.loss) self.saver = tf.train.Saver(tf.all_variables())
def initialize(): new_variables = set(tf.all_variables()) - ALREADY_INITIALIZED get_session().run(tf.initialize_variables(new_variables)) ALREADY_INITIALIZED.update(new_variables)
def main(): s = { 'nh1': 450, 'nh2': 450, 'win': 3, 'emb_dimension': 300, 'lr': 0.0001, 'lr_decay': 0.5, 'max_grad_norm': 5, 'seed': 345, 'nepochs': 50, # 总共迭代50个epoch 'batch_size': 16, # batch_size=16 'keep_prob': 1.0, 'check_dir': './checkpoints/kp20k_mycps_multisize_CNN_LSTM_attention_Adam_0.0001_16_again', 'display_test_per': 1, 'lr_decay_per': 5 } # load the dataset # data_set_file = 'CNTN/data/inspec_wo_stem/data_set.pkl' # emb_file = 'CNTN/data/inspec_wo_stem/embedding.pkl' data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl' emb_file = 'data/ACL2017/ACL2017_t_a_embedding.pkl' # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file) print('loading dataset.....') train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017( data_set_file, emb_file) test_lex, test_y, test_z = test_set y_nclasses = 2 z_nclasses = 5 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) ########################################### with tf.Session(config=config) as sess: my_model = mymodel.myModel(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], batch_size=s['batch_size'], rnn_model_cell='lstm') checkpoint_dir = s['check_dir'] logfile = open(str(s['check_dir']) + '/predict_log.txt', 'a', encoding='utf-8') saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # print(ckpt.all_model_checkpoint_paths[4]) print(ckpt.model_checkpoint_path) logfile.write(str(ckpt.model_checkpoint_path) + '\n') saver.restore(sess, ckpt.model_checkpoint_path) def dev_step(cwords): feed = { my_model.cnn_input_x: cwords, my_model.keep_prob: s['keep_prob'] } fetches = my_model.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred predictions_test = [] groundtruth_test = [] start_num = 0 steps = len(test_lex) // s['batch_size'] # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): print('testing............') for step in range(steps): # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) # x, z = batch x = load.pad_sentences(x) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) start_num += s['batch_size'] if step % 100 == 0: print('tested %d batch......' % (step // 100)) print("测试结果:") logfile.write("测试结果:\n") res_test = tools.conlleval(predictions_test, groundtruth_test) print('all: ', res_test) logfile.write('all: ' + str(res_test) + '\n') res_test_top5 = tools.conlleval_top(predictions_test, groundtruth_test, 5) print('top5: ', res_test_top5) logfile.write('top5: ' + str(res_test_top5) + '\n') res_test_top10 = tools.conlleval_top(predictions_test, groundtruth_test, 10) print('top10: ', res_test_top10) logfile.write('top10: ' + str(res_test_top10) + '\n') logfile.close()
def gen_head_poetry(heads, type): if type != 5 and type != 7: print('The second para has to be 5 or 7!') return def to_word(weights): t = np.cumsum(weights) s = np.sum(weights) sample = int(np.searchsorted(t, np.random.rand(1) * s)) return words[sample] _, last_state, probs, cell, initial_state = neural_network() Session_config = tf.ConfigProto(allow_soft_placement=True) Session_config.gpu_options.allow_growth = True with tf.Session(config=Session_config) as sess: with tf.device('/gpu:1'): sess.run(tf.global_variables_initializer() ) #tf.initialize_all_variables() saver = tf.train.Saver(tf.all_variables()) saver.restore(sess, 'model/poetry.module-99') poem = '' for head in heads: flag = True while flag: state_ = sess.run(cell.zero_state(1, tf.float32)) x = np.array([list(map(word_num_map.get, u'['))]) [probs_, state_] = sess.run([probs, last_state], feed_dict={ input_data: x, initial_state: state_ }) sentence = head x = np.zeros((1, 1)) x[0, 0] = word_num_map[sentence] [probs_, state_] = sess.run([probs, last_state], feed_dict={ input_data: x, initial_state: state_ }) word = to_word(probs_) sentence += word while word != u'。': x = np.zeros((1, 1)) print x x[0, 0] = word_num_map[word] print x [probs_, state_] = sess.run([probs, last_state], feed_dict={ input_data: x, initial_state: state_ }) word = to_word(probs_) sentence += word if len(sentence) == 2 + 2 * type: sentence += u'\n' poem += sentence flag = False return poem
def main(_): if FLAGS.checkpoints: # Get the checkpoints list from flags and run some basic checks. checkpoints = [c.strip() for c in FLAGS.checkpoints.split(",")] checkpoints = [c for c in checkpoints if c] if not checkpoints: raise ValueError("No checkpoints provided for averaging.") if FLAGS.prefix: checkpoints = [FLAGS.prefix + c for c in checkpoints] else: assert FLAGS.num_last_checkpoints >= 1, "Must average at least one model" assert FLAGS.prefix, ("Prefix must be provided when averaging last" " N checkpoints") checkpoint_state = tf.train.get_checkpoint_state( os.path.dirname(FLAGS.prefix)) # Checkpoints are ordered from oldest to newest. checkpoints = checkpoint_state.all_model_checkpoint_paths[ -FLAGS.num_last_checkpoints:] checkpoints = [c for c in checkpoints if checkpoint_exists(c)] if not checkpoints: if FLAGS.checkpoints: raise ValueError( "None of the provided checkpoints exist. %s" % FLAGS.checkpoints) else: raise ValueError("Could not find checkpoints at %s" % os.path.dirname(FLAGS.prefix)) # Read variables from all checkpoints and average them. tf.logging.info("Reading variables and averaging checkpoints:") for c in checkpoints: tf.logging.info("%s ", c) var_list = tf.contrib.framework.list_variables(checkpoints[0]) var_values, var_dtypes = {}, {} for (name, shape) in var_list: if not name.startswith("global_step"): var_values[name] = np.zeros(shape) for checkpoint in checkpoints: reader = tf.contrib.framework.load_checkpoint(checkpoint) for name in var_values: tensor = reader.get_tensor(name) var_dtypes[name] = tensor.dtype var_values[name] += tensor tf.logging.info("Read from checkpoint %s", checkpoint) for name in var_values: # Average. var_values[name] /= len(checkpoints) tf_vars = [ tf.get_variable(v, shape=var_values[v].shape, dtype=var_dtypes[name]) for v in var_values ] placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars] assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] global_step = tf.Variable( 0, name="global_step", trainable=False, dtype=tf.int64) saver = tf.train.Saver(tf.all_variables()) # Build a model consisting only of variables, set them to the average values. with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(var_values)): sess.run(assign_op, {p: value}) # Use the built saver to save the averaged checkpoint. saver.save(sess, FLAGS.output_path, global_step=global_step) tf.logging.info("Averaged checkpoints saved in %s", FLAGS.output_path)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs_2() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference1(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def test(load_version, sift_test_list, iss_test_list, submap_id, cam_id, submap_image_id): print('----------------- START to test -----------------') sift_test_list = sift_test_list[submap_id - 1][submap_image_id - 1] iss_test_list = iss_test_list[submap_id - 1] # record test_list for checking with open('sift_test_list.txt', 'w') as file: for i in range(len(sift_test_list)): file.write('%s\n' % sift_test_list[i]) with open('iss_test_list.txt', 'w') as file: for i in range(len(iss_test_list)): file.write('%s\n' % iss_test_list[i]) # define placeholder image_pl = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 3]) pos_pcl_pl = tf.placeholder(tf.float32, shape=[batch_size, pcl_size, 3]) neg_pcl_pl = tf.placeholder(tf.float32, shape=[batch_size, pcl_size, 3]) is_training = tf.placeholder(tf.bool) # build model print('build model') with tf.device('/gpu:1'): # use gpu 1 to forward with tf.variable_scope('image_branch') as scope: image_feature = vgg16(image_pl, is_training=True, output_dim=image_feature_dim, bn_decay=None) with tf.variable_scope('pointcloud_branch') as scope: pos_pcl_feature, _ = pointNet(pos_pcl_pl, pcl_feature_dim, is_training=is_training, use_bn=False, bn_decay=None) scope.reuse_variables() neg_pcl_feature, _ = pointNet(neg_pcl_pl, pcl_feature_dim, is_training=is_training, use_bn=False, bn_decay=None) saver = tf.train.Saver(tf.all_variables(), max_to_keep=None) # tf.global_variables # run model print('run model...') config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(config=config) as sess: print('initialise model...') sess.run(tf.global_variables_initializer()) print(' load model...') save_path = 'model/' + 'v1' + '/' + load_version + '_model.ckpt' saver.restore(sess, save_path) #restore_tf_model(sess) print(" Model loaded from: %s" % save_path) # -------------------- evaluate model --------------------- print('**** Validate ...') print(' Compute image and pcl descriptors') # test list sift_batch_num = len(sift_test_list) // batch_size sift_test_num = sift_batch_num * batch_size iss_batch_num = len(iss_test_list) // batch_size iss_test_num = iss_batch_num * batch_size img_feature = np.zeros([sift_test_num, image_feature_dim]) pcl_feature = np.zeros([iss_test_num, pcl_feature_dim]) # feed sift test list into the network batch_counter = 0 print('---------- test sift ----------') for i in range(sift_batch_num): print(" *** sift progress: %d/%d" % (i, sift_batch_num)) img_batch = getSIFTTestBatch(sift_test_list, i) #print img_batch.shape feed_dict = {image_pl: img_batch, is_training: False} img_batch_feature = sess.run(image_feature, feed_dict=feed_dict) #print type(img_batch_feature) img_feature[batch_counter:batch_counter + img_batch_feature.shape[0], :] = img_batch_feature batch_counter += img_batch_feature.shape[0] # feed iss test list into the network batch_counter = 0 print('-------- test iss --------------') for i in range(iss_batch_num): print(" *** iss progress: %d/%d" % (i, iss_batch_num)) pcl_batch = getISSTestBatch(iss_test_list, i) feed_dict = {pos_pcl_pl: pcl_batch, is_training: False} pcl_batch_feature = sess.run(pos_pcl_feature, feed_dict=feed_dict) pcl_feature[batch_counter:batch_counter + pcl_batch_feature.shape[0], :] = pcl_batch_feature batch_counter += pcl_batch_feature.shape[0] # compute distance array between img_feature and pcl_feature img_vec = np.sum(np.multiply(img_feature, img_feature), axis=1, keepdims=True) pcl_vec = np.sum(np.multiply(pcl_feature, pcl_feature), axis=1, keepdims=True) dist_array = img_vec + np.transpose(pcl_vec) - 2 * np.matmul( img_feature, np.transpose(pcl_feature)) print(" image patch num: %d, submap pcl num: %d" % (dist_array.shape[0], dist_array.shape[1])) # find correspondences and record img_pcl_correspondences = [] txt_file_path = "%s/%03d_cam%d_%03d.txt" % ( sift_iss_correspond_dir, submap_id, cam_id, submap_image_id) with open(txt_file_path, "w") as file: for i in range(dist_array.shape[0]): min_dist_id = np.argmin(dist_array[i, :]) min_dist_val = dist_array[i, min_dist_id] #print min_dist_val if min_dist_val <= thresh_dist: img_pcl_correspondences.append( [sift_test_list[i], iss_test_list[min_dist_id]]) file.write('%d %d %s %s\n' % ((i + 1), (min_dist_id + 1), sift_test_list[i], iss_test_list[min_dist_id]))
logging.debug("Initializing...") logging.debug("Agent...") agent = Agent(saved_execution_times_prefix, adjacency_matrix_filename, feature_matrix_filename, benchmark, execution_features, output_schedule_filename=output_schedule_filename, adjacency_is_sparse=SPARSE_ADJ, num_repeats=NUM_REPEATS) logging.debug("Replay Memory...") num_update_transitions = len(agent.all_nodes_to_allocate)*NUM_EPISODES_PER_UPDATE replay_memory = ReplayMemory(num_update_transitions) logging.debug("Policy Network...") with tf.variable_scope('policy_network'): flags.DEFINE_string('method_type', "reinforce_policy", "'value' or 'policy' based policy_network") flags.DEFINE_float('learning_rate', POLICY_LR, 'Initial learning rate.') policy_network = ReinforcePolicyNetwork_SimpleMLP(agent.undirected_adj, agent.feature_matrix.shape[0], agent.feature_matrix.shape[1], agent.actions_vector, include_partial_solution_feature=INCLUDE_PARTIAL_SOLUTION_FEATURE, zero_non_included_nodes=ZERO_NON_INCLUDED_NODES, variable_support=VARIABLE_SUPPORT) policy_saver = tf.train.Saver([v for v in tf.all_variables() if 'policy_network' in v.name]) logging.debug("Value Network...") tf.flags.FLAGS.__delattr__('method_type') tf.flags.FLAGS.__delattr__('learning_rate') flags.DEFINE_string('method_type', "batched_statevalue", "'value' or 'policy' or 'statevalue' network") flags.DEFINE_float('learning_rate', STATE_LR, 'Initial learning rate.') with tf.variable_scope('statevalue_network'): value_network = BatchedStateValueNetwork(agent.undirected_adj, agent.feature_matrix.shape[0], agent.feature_matrix.shape[1], agent.actions_vector) value_saver = tf.train.Saver([v for v in tf.all_variables() if 'statevalue_network' in v.name]) logging.debug("Tensorflow session...") session_config = tf.ConfigProto() session_config.gpu_options.allow_growth=True sess = tf.Session(config=session_config) sess.run(tf.global_variables_initializer())
def main(argv=None): # 将简单的运算放在CPU上,只有神经网络的训练过程放在GPU上。 with tf.Graph().as_default(), tf.device('/cpu:0'): # 定义基本的训练过程 x, y_ = get_input() regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, 60000 / BATCH_SIZE, LEARNING_RATE_DECAY) opt = tf.train.GradientDescentOptimizer(learning_rate) tower_grads = [] reuse_variables = False # 将神经网络的优化过程跑在不同的GPU上。 for i in range(N_GPU): with tf.device('/gpu:%d' % i): with tf.name_scope( 'GPU_%d' % i) as scope: # name_scope并不会影响get_variable的命名空间 cur_loss = get_loss( x, y_, regularizer, scope, reuse_variables) # 总共有4个变量,2个weight以及2个bias reuse_variables = True grads = opt.compute_gradients( cur_loss ) # A list of (gradient, variable) pairs. [(grad1, var1),(grad2,var2),(grad3,var3),(grad4,var4)] # 之所以有4个梯度,是因为有4个变量,weight1, bias1, weight2,bias2,注意此处与gpu 的个数无关 tower_grads.append(grads) # 计算变量的平均梯度。 # 变量是共享的,将所有gpu上的梯度进行求和平均 grad_and_vars = average_gradients( tower_grads ) # [ 第1个gpu: [(g1, v1),(g2,v2),(g3,v3),(g4,v4)], 第2个gpu: [(g1, v1),(g2,v2),(g3,v3),(g4,v4)],...] for grad, var in grad_and_vars: if grad is not None: tf.summary.histogram('gradients_on_average/%s' % var.op.name, grad) # 使用平均梯度更新参数。 apply_gradient_op = opt.apply_gradients(grad_and_vars, global_step=global_step) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) # 计算变量的滑动平均值。 variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variables_to_average) # 每一轮迭代需要更新变量的取值并更新变量的滑动平均值。 train_op = tf.group(apply_gradient_op, variables_averages_op) #sys.exit(-1) saver = tf.train.Saver(tf.all_variables()) summary_op = tf.summary.merge_all() init = tf.initialize_all_variables() with tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=True)) as sess: # 初始化所有变量并启动队列。 init.run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.summary.FileWriter(MODEL_SAVE_PATH, sess.graph) for step in range(TRAINING_STEPS): # 执行神经网络训练操作,并记录训练操作的运行时间。 start_time = time.time() _, loss_value = sess.run([train_op, cur_loss]) duration = time.time() - start_time # 每隔一段时间数据当前的训练进度,并统计训练速度。 if step != 0 and step % 10 == 0: # 计算使用过的训练数据个数。 num_examples_per_step = BATCH_SIZE * N_GPU examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / N_GPU # 输出训练信息。 format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' ) print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) # 通过TensorBoard可视化训练过程。 summary = sess.run(summary_op) summary_writer.add_summary(summary, step) # 每隔一段时间保存当前的模型。 if step % 1000 == 0 or (step + 1) == TRAINING_STEPS: checkpoint_path = os.path.join(MODEL_SAVE_PATH, MODEL_NAME) saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info('Prepare to export model to: %s', FLAGS.export_path) with tf.Graph().as_default(): image, image_size, resized_image_size = _create_input_tensors() model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: FLAGS.num_classes}, crop_size=FLAGS.crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.inference_scales) == (1.0, ): tf.logging.info('Exported model performs single-scale inference.') predictions = model.predict_labels( image, model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Exported model performs multi-scale inference.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( image, model_options=model_options, eval_scales=FLAGS.inference_scales, add_flipped_images=FLAGS.add_flipped_images) raw_predictions = tf.identity( tf.cast(predictions[common.OUTPUT_TYPE], tf.float32), _RAW_OUTPUT_NAME) raw_probabilities = tf.identity( predictions[common.OUTPUT_TYPE + model.PROB_SUFFIX], _RAW_OUTPUT_PROB_NAME) # Crop the valid regions from the predictions. semantic_predictions = raw_predictions[:, :resized_image_size[0], : resized_image_size[1]] semantic_probabilities = raw_probabilities[:, :resized_image_size[0], : resized_image_size[1]] # Resize back the prediction to the original image size. def _resize_label(label, label_size): # Expand dimension of label to [1, height, width, 1] for resize operation. label = tf.expand_dims(label, 3) resized_label = tf.image.resize_images( label, label_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True) return tf.cast(tf.squeeze(resized_label, 3), tf.int32) semantic_predictions = _resize_label(semantic_predictions, image_size) semantic_predictions = tf.identity(semantic_predictions, name=_OUTPUT_NAME) semantic_probabilities = tf.image.resize_bilinear( semantic_probabilities, image_size, align_corners=True, name=_OUTPUT_PROB_NAME) if FLAGS.quantize_delay_step >= 0: contrib_quantize.create_eval_graph() saver = tf.train.Saver(tf.all_variables()) dirname = os.path.dirname(FLAGS.export_path) tf.gfile.MakeDirs(dirname) graph_def = tf.get_default_graph().as_graph_def(add_shapes=True) freeze_graph.freeze_graph_with_def_protos( graph_def, saver.as_saver_def(), FLAGS.checkpoint_path, _OUTPUT_NAME + ',' + _OUTPUT_PROB_NAME, restore_op_name=None, filename_tensor_name=None, output_graph=FLAGS.export_path, clear_devices=True, initializer_nodes=None) if FLAGS.save_inference_graph: tf.train.write_graph(graph_def, dirname, 'inference_graph.pbtxt')
def question_gen_run(argv): #parser = argparse.ArgumentParser() #parser.add_argument('--model_prefix', type=str, required=True, help='Prefix to the models.') #parser.add_argument('--in_path', type=str, required=True, help='The path to the test file.') #parser.add_argument('--out_path', type=str, required=True, help='The path to the output file.') #parser.add_argument('--mode', type=str, required=True, help='Can be `greedy` or `beam`') #args, unparsed = parser.parse_known_args() #model_prefix = args.model_prefix #in_path = args.in_path #out_path = args.out_path #mode = args.mode print(sys.argv) model_prefix = argv[0] in_path = argv[1] out_path = argv[2] mode = argv[3] print("CUDA_VISIBLE_DEVICES " + os.environ['CUDA_VISIBLE_DEVICES']) # load the configuration file print('Loading configurations from ' + model_prefix + ".config.json") FLAGS = namespace_utils.load_namespace(model_prefix + ".config.json") FLAGS = NP2P_trainer.enrich_options(FLAGS) # load vocabs print('Loading vocabs.') word_vocab = char_vocab = POS_vocab = NER_vocab = None if FLAGS.with_word: word_vocab = Vocab(FLAGS.word_vec_path, fileformat='txt2') print('word_vocab: {}'.format(word_vocab.word_vecs.shape)) if FLAGS.with_char: char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2') print('char_vocab: {}'.format(char_vocab.word_vecs.shape)) if FLAGS.with_POS: POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2') print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape)) if FLAGS.with_NER: NER_vocab = Vocab(model_prefix + ".NER_vocab", fileformat='txt2') print('NER_vocab: {}'.format(NER_vocab.word_vecs.shape)) print('Loading test set.') if FLAGS.infile_format == 'fof': testset, _ = NP2P_data_stream.read_generation_datasets_from_fof( in_path, isLower=FLAGS.isLower) elif FLAGS.infile_format == 'plain': testset, _ = NP2P_data_stream.read_all_GenerationDatasets( in_path, isLower=FLAGS.isLower) else: testset, _ = NP2P_data_stream.read_all_GQA_questions( in_path, isLower=FLAGS.isLower, switch=FLAGS.switch_qa) print('Number of samples: {}'.format(len(testset))) print('Build DataStream ... ') batch_size = -1 if mode.find('beam') >= 0: batch_size = 1 devDataStream = NP2P_data_stream.QADataStream(testset, word_vocab, char_vocab, POS_vocab, NER_vocab, options=FLAGS, isShuffle=False, isLoop=False, isSort=True, batch_size=batch_size) print('Number of instances in testDataStream: {}'.format( devDataStream.get_num_instance())) print('Number of batches in testDataStream: {}'.format( devDataStream.get_num_batch())) best_path = model_prefix + ".best.model" with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-0.01, 0.01) with tf.name_scope("Valid"): with tf.variable_scope("Model", reuse=False, initializer=initializer): valid_graph = ModelGraph(word_vocab=word_vocab, char_vocab=char_vocab, POS_vocab=POS_vocab, NER_vocab=NER_vocab, options=FLAGS, mode="decode") ## remove word _embedding vars_ = {} for var in tf.all_variables(): if "word_embedding" in var.name: continue if not var.name.startswith("Model"): continue vars_[var.name.split(":")[0]] = var saver = tf.train.Saver(vars_) initializer = tf.global_variables_initializer() #gpu_fraction = 0.1 #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess = tf.Session() sess.run(initializer) saver.restore(sess, best_path) # restore the model total = 0 correct = 0 if mode.endswith('evaluate'): ref_outfile = open(out_path + ".ref", 'wt') pred_outfile = open(out_path + ".pred", 'wt') else: outfile = open(out_path, 'wt') total_num = devDataStream.get_num_batch() devDataStream.reset() for i in range(total_num): cur_batch = devDataStream.get_batch(i) if mode == 'pointwise': (sentences, prediction_lengths, generator_input_idx, generator_output_idx) = search(sess, valid_graph, word_vocab, cur_batch, FLAGS, decode_mode=mode) for j in xrange(cur_batch.batch_size): cur_total = cur_batch.answer_lengths[j] cur_correct = 0 for k in xrange(cur_total): if generator_output_idx[ j, k] == cur_batch.in_answer_words[j, k]: cur_correct += 1.0 total += cur_total correct += cur_correct outfile.write( cur_batch.instances[j][1].tokText.encode('utf-8') + "\n") outfile.write(sentences[j].encode('utf-8') + "\n") outfile.write("========\n") outfile.flush() print('Current dev accuracy is %d/%d=%.2f' % (correct, total, correct / float(total) * 100)) elif mode in ['greedy', 'multinomial']: print('Batch {}'.format(i)) (sentences, prediction_lengths, generator_input_idx, generator_output_idx) = search(sess, valid_graph, word_vocab, cur_batch, FLAGS, decode_mode=mode) for j in xrange(cur_batch.batch_size): outfile.write( cur_batch.instances[j][1].ID_num.encode('utf-8') + "\n") outfile.write( cur_batch.instances[j][1].tokText.encode('utf-8') + "\n") outfile.write(sentences[j].encode('utf-8') + "\n") outfile.write("========\n") outfile.flush() elif mode == 'greedy_evaluate': print('Batch {}'.format(i)) (sentences, prediction_lengths, generator_input_idx, generator_output_idx) = search(sess, valid_graph, word_vocab, cur_batch, FLAGS, decode_mode="greedy") for j in xrange(cur_batch.batch_size): ref_outfile.write( cur_batch.instances[j][1].tokText.encode('utf-8') + "\n") pred_outfile.write(sentences[j].encode('utf-8') + "\n") ref_outfile.flush() pred_outfile.flush() elif mode == 'beam_evaluate': print('Instance {}'.format(i)) ref_outfile.write( cur_batch.instances[0][1].tokText.encode('utf-8') + "\n") ref_outfile.flush() hyps = run_beam_search(sess, valid_graph, word_vocab, cur_batch, FLAGS) cur_passage = cur_batch.instances[0][0] cur_id2phrase = None if FLAGS.with_phrase_projection: (cur_phrase2id, cur_id2phrase) = cur_batch.phrase_vocabs[0] cur_sent = hyps[0].idx_seq_to_string(cur_passage, cur_id2phrase, word_vocab, FLAGS) pred_outfile.write(cur_sent.encode('utf-8') + "\n") pred_outfile.flush() else: # beam search print('Instance {}'.format(i)) hyps = run_beam_search(sess, valid_graph, word_vocab, cur_batch, FLAGS) outfile.write( "Input: " + cur_batch.instances[0][0].tokText.encode('utf-8') + "\n") outfile.write( "Truth: " + cur_batch.instances[0][1].tokText.encode('utf-8') + "\n") for j in xrange(len(hyps)): hyp = hyps[j] cur_passage = cur_batch.instances[0][0] cur_id2phrase = None if FLAGS.with_phrase_projection: (cur_phrase2id, cur_id2phrase) = cur_batch.phrase_vocabs[0] cur_sent = hyp.idx_seq_to_string(cur_passage, cur_id2phrase, word_vocab, FLAGS) outfile.write("Hyp-{}: ".format(j) + cur_sent.encode('utf-8') + " {}".format(hyp.avg_log_prob()) + "\n") #outfile.write("========\n") outfile.flush() if mode.endswith('evaluate'): ref_outfile.close() pred_outfile.close() else: outfile.close()
def main(argv=None): # pylint: disable=unused-argument if FLAGS.model_name: subdir = FLAGS.model_name preload_model = True else: subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') preload_model = False log_dir = os.path.join(os.path.expanduser(FLAGS.logs_base_dir), subdir) model_dir = os.path.join(os.path.expanduser(FLAGS.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.mkdir(model_dir) np.random.seed(seed=FLAGS.seed) dataset = facenet.get_dataset(FLAGS.data_dir) train_set, validation_set = facenet.split_dataset(dataset, FLAGS.train_set_fraction, FLAGS.split_mode) print('Model directory: %s' % model_dir) with tf.Graph().as_default(): tf.set_random_seed(FLAGS.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for input images images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3), name='input') # Placeholder for phase_train phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') # Build the inference graph embeddings = facenet.inference_nn4_max_pool_96( images_placeholder, phase_train=phase_train_placeholder) # Split example embeddings into anchor, positive and negative anchor, positive, negative = tf.split(0, 3, embeddings) # Calculate triplet loss loss = facenet.triplet_loss(anchor, positive, negative) # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op, _ = facenet.train(loss, global_step) # Create a saver saver = tf.train.Saver(tf.all_variables(), max_to_keep=0) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) summary_writer = tf.train.SummaryWriter(log_dir, sess.graph) with sess.as_default(): if preload_model: ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise ValueError('Checkpoint not found') # Training and validation loop for epoch in range(FLAGS.max_nrof_epochs): # Train for one epoch step = train(sess, train_set, epoch, images_placeholder, phase_train_placeholder, global_step, embeddings, loss, train_op, summary_op, summary_writer) # Validate epoch validate(sess, validation_set, epoch, images_placeholder, phase_train_placeholder, global_step, embeddings, loss, train_op, summary_op, summary_writer) # Save the model checkpoint after each epoch print('Saving checkpoint') checkpoint_path = os.path.join(model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) graphdef_dir = os.path.join(model_dir, 'graphdef') graphdef_filename = 'graph_def.pb' if (not os.path.exists( os.path.join(graphdef_dir, graphdef_filename))): print('Saving graph definition') tf.train.write_graph(sess.graph_def, graphdef_dir, graphdef_filename, False)
def __init__(self, vocab_size, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, num_samples=512, forward_only=False, max_dialog_length=10, max_answer_length=20): self.vocab_size = vocab_size self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.max_dialog_length = max_dialog_length self.max_answer_length = max_answer_length # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < self.vocab_size: with tf.device("/cpu:0"): w = tf.get_variable("proj_w", [size, self.vocab_size]) w_t = tf.transpose(w) b = tf.get_variable("proj_b", [self.vocab_size]) output_projection = (w, b) def sampled_loss(inputs, labels): with tf.device("/cpu:0"): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, self.vocab_size) softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. single_cell = tf.nn.rnn_cell.BasicLSTMCell(size) cell = single_cell if num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return dialog_attention_seq2seq( encoder_inputs, decoder_inputs, cell, vocab_size, output_projection=output_projection, feed_previous=do_decode) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in range(0, max_dialog_length): one_turn_encoder_inputs = [] one_turn_decoder_inputs = [] one_turn_target_weights = [] for j in range(0, max_answer_length): one_turn_encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}_{1}".format(i, j))) for j in range(0, max_answer_length + 1): one_turn_decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}_{1}".format(i, j))) one_turn_target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}_{1}".format(i, j))) self.encoder_inputs.append(list(one_turn_encoder_inputs)) self.decoder_inputs.append(list(one_turn_decoder_inputs)) self.target_weights.append(list(one_turn_target_weights)) # Our targets are decoder inputs shifted by one. targets = [] for i in range(0, max_dialog_length): targets.append([ self.decoder_inputs[i][j + 1] for j in xrange(len(self.decoder_inputs[i]) - 1) ]) # Training outputs and losses. if forward_only: self.outputs, _ = seq2seq_f(self.encoder_inputs, self.decoder_inputs, True) self.loss = 0 for i in range(0, max_dialog_length): self.loss += sequence_loss( self.outputs[i][:-1], targets[i], self.target_weights[i][:-1], softmax_loss_function=softmax_loss_function) # If we use output projection, we need to project outputs for decoding. if output_projection is not None: self.outputs = tf.matmul( self.outputs, output_projection[0]) + output_projection[1] else: self.outputs, _ = seq2seq_f(self.encoder_inputs, self.decoder_inputs, False) self.loss = 0 for i in range(0, max_dialog_length): self.loss += sequence_loss( self.outputs[i][:-1], targets[i], self.target_weights[i][:-1], softmax_loss_function=softmax_loss_function) # Gradients and SGD update operation for training the model. params = tf.trainable_variables() if not forward_only: opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def create_model(session, forward_only, beam_search): dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 model = seq2seq_model.Seq2SeqModel( FLAGS.post_vocab_size, FLAGS.response_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, embedding_size=FLAGS.embedding_size, forward_only=forward_only, beam_search=beam_search, beam_size=FLAGS.beam_size, category=FLAGS.category, use_emb=FLAGS.use_emb, use_imemory=FLAGS.use_imemory, use_ememory=FLAGS.use_ememory, emotion_size=FLAGS.emotion_size, imemory_size=FLAGS.imemory_size, dtype=dtype) see_variable = True if see_variable == True: for i in tf.global_variables(): print(i.name, i.get_shape()) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) pre_ckpt = tf.train.get_checkpoint_state(FLAGS.pretrain_dir) if ckpt: #and tf.gfile.Exists(ckpt.model_checkpoint_path+".index"): if FLAGS.load_model == 0: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: path = ckpt.model_checkpoint_path[:ckpt.model_checkpoint_path.find('-')+1]+str(FLAGS.load_model) print("Reading model parameters from %s" % path) model.saver.restore(session, path) else: if pre_ckpt: session.run(tf.initialize_variables(model.initial_var)) if FLAGS.pretrain > -1: path = pre_ckpt.model_checkpoint_path[:pre_ckpt.model_checkpoint_path.find('-')+1]+str(FLAGS.pretrain) print("Reading pretrain model parameters from %s" % path) model.pretrain_saver.restore(session, path) else: print("Reading pretrain model parameters from %s" % pre_ckpt.model_checkpoint_path) model.pretrain_saver.restore(session, pre_ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.global_variables_initializer()) vec_post, vec_response = data_utils.get_data(FLAGS.data_dir, FLAGS.post_vocab_size, FLAGS.response_vocab_size) initvec_post = tf.constant(vec_post, dtype=dtype, name='init_wordvector_post') initvec_response = tf.constant(vec_response, dtype=dtype, name='init_wordvector_response') embedding_post = [x for x in tf.trainable_variables() if x.name == 'embedding_attention_seq2seq/rnn/embedding_wrapper/embedding:0'][0] #bug1: RNN-rnn, EmbeddingWrapper-embedding_wrapper embedding_response = [x for x in tf.trainable_variables() if x.name == 'embedding_attention_seq2seq/embedding_attention_decoder/embedding:0'][0] session.run(embedding_post.assign(initvec_post)) session.run(embedding_response.assign(initvec_response)) if FLAGS.use_ememory: vec_ememory = data_utils.get_ememory(FLAGS.data_dir, FLAGS.response_vocab_size) initvec_ememory = tf.constant(vec_ememory, dtype=dtype, name='init_ememory') ememory = [x for x in tf.all_variables() if x.name == 'embedding_attention_seq2seq/embedding_attention_decoder/external_memory:0'][0] session.run(ememory.assign(initvec_ememory)) return model
def train(self, train_images, model_path, logs_path, learning_rate=1e-4, beta1=0.9, train_epochs=100, batch_size=128): # divide trainable variables into a group for D and a group for G t_vars = tf.trainable_variables() D_vars = [var for var in t_vars if 'd_' in var.name] G_vars = [var for var in t_vars if 'g_' in var.name] trainD_op = tf.train.AdamOptimizer(learning_rate, beta1).minimize(self.d_loss, var_list=D_vars) trainG_op = tf.train.AdamOptimizer(learning_rate, beta1).minimize(self.g_loss, var_list=G_vars) tf.get_variable_scope().reuse_variables() """ Summary """ d_loss_sum = tf.summary.scalar("d_loss", self.d_loss) g_loss_sum = tf.summary.scalar("g_loss", self.g_loss) # final summary operations g_sum_op = tf.summary.merge([g_loss_sum]) d_sum_op = tf.summary.merge([d_loss_sum]) ''' TensorFlow Session ''' # start TensorFlow session init = tf.initialize_all_variables() saver = tf.train.Saver(tf.all_variables()) sess = tf.InteractiveSession() logdir = logs_path + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/" summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph) sess.run(init) DISPLAY_STEP = 10 index_in_epoch = 0 # Pre-train discriminator for i in range(30): z_batch = np.random.normal(0, 1, size=[batch_size, self.z_dim]).astype(np.float32) batch_xs, index_in_epoch = _next_batch(train_images, batch_size, index_in_epoch) sess.run([trainD_op], feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1}) # Train generator and discriminator together for i in range(train_epochs): # get new batch z_batch = np.random.normal(0, 1, size=[batch_size, self.z_dim]).astype(np.float32) batch_xs, index_in_epoch = _next_batch(train_images, batch_size, index_in_epoch) # train on batch # Train discriminator on both real and fake images _, summaryD = sess.run([trainD_op, d_sum_op], feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1}) summary_writer.add_summary(summaryD, i) # Train generator _, summaryG = sess.run([trainG_op, g_sum_op], feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1}) summary_writer.add_summary(summaryG, i) # check progress on every 1st,2nd,...,10th,20th,...,100th... step if i % DISPLAY_STEP == 0 or (i + 1) == train_epochs: dLoss, gLoss = sess.run([self.d_loss, self.g_loss], feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1}) print("=========== updating G&D ==========") print("iteration:", i) print("gen loss:", gLoss) print("dis loss:", dLoss) outimage = self.Gen.eval(feed_dict={self.Z: z_batch, self.phase: 1}, session=sess) for index in range(3): result = (outimage[index].astype(np.float32)) * 255. result = np.clip(result, 0, 255).astype('uint8') result = np.reshape(result, (28, 28)) cv2.imwrite("out" + str(index + 1) + ".bmp", result) if i % (DISPLAY_STEP * 10) == 0 and i: DISPLAY_STEP *= 10 summary_writer.close() save_path = saver.save(sess, model_path) print("Model saved in file:", save_path)
def train(self, train_images, model_path, logs_path, learning_rate=1e-4, train_epochs=100): # divide trainable variables into a group for D and a group for G t_vars = tf.trainable_variables() D_vars = [var for var in t_vars if 'd_' in var.name] G_vars = [var for var in t_vars if 'g_' in var.name] trainD_op = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.9).minimize(self.d_loss, var_list=D_vars) trainG_op = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.9).minimize(self.g_loss, var_list=G_vars) tf.get_variable_scope().reuse_variables() """ Summary """ d_loss_sum = tf.summary.scalar("d_loss", self.d_loss) g_loss_sum = tf.summary.scalar("g_loss", self.g_loss) # final summary operations g_sum_op = tf.summary.merge([g_loss_sum]) d_sum_op = tf.summary.merge([d_loss_sum]) ''' TensorFlow Session ''' # start TensorFlow session init = tf.initialize_all_variables() saver = tf.train.Saver(tf.all_variables()) gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) logdir = logs_path + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/" summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph) sess.run(init) DISPLAY_STEP = 10 index_in_epoch = 0 start_time = time.time() # Train generator and discriminator together for i in range(train_epochs): d_iters = 5 # get new batch batch_xs, index_in_epoch = _next_batch(train_images, self.batch_size, index_in_epoch) for _ in range(0, d_iters): z_batch = np.random.normal(0, 1, size=[self.batch_size, self.z_dim]).astype(np.float32) # train on batch # Train discriminator on both real and fake images _, summaryD, dLoss = sess.run([trainD_op, d_sum_op, self.d_loss], feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1}) summary_writer.add_summary(summaryD, i) # Train generator z_batch = np.random.normal(0, 1, size=[self.batch_size, self.z_dim]).astype(np.float32) _, summaryG, gLoss = sess.run([trainG_op, g_sum_op, self.g_loss], feed_dict={self.Z: z_batch, self.phase: 1}) summary_writer.add_summary(summaryG, i) # check progress on every 1st,2nd,...,10th,20th,...,100th... step if i % DISPLAY_STEP == 0 or (i + 1) == train_epochs: print("=========== updating G&D ==========") print('Time: %.2f' % (time.time() - start_time)) print("iteration:", i) print("gen loss:", gLoss) print("dis loss:", dLoss) z_batch = np.random.normal(0, 1, size=[self.batch_size, self.z_dim]).astype(np.float32) outimage = self.Gen.eval(feed_dict={self.Z: z_batch, self.phase: 1}, session=sess) save_images(outimage, [8, 8], 'img/' + 'sample_%d_epoch.png' % (i)) if i % (DISPLAY_STEP * 10) == 0 and i: DISPLAY_STEP *= 10 summary_writer.close() save_path = saver.save(sess, model_path) print("Model saved in file:", save_path)
def train(trainpath, ckptfile): immm = loadimg(trainpath) print ('testing() called') V = 64 vz = 32 margin = 32 re_filed = 0 # data_size = get_datasize(trainpath) # print ('training size:', data_size) with tf.Graph().as_default(): # placeholders for graph input view_ = tf.placeholder('float32', shape=(None, V, V, vz,1), name='im0') # y_ = tf.placeholder('int64', shape=(N`-one,V-16,V-16,V-16), name='y') keep_prob_ = tf.placeholder('float32') # graph outputs fc8 = model.inference_multiview(view_, g_.NUM_CLASSES, keep_prob_) # pr=tf.nn.softmax(fc8) # loss = model.loss(fc8, y_) # train_op = model.train(loss, global_step, data_size) prediction = model.classify(fc8) # must be after merge_all_summaries # validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') # validation_summary = tf.summary.scalar('validation_loss', validation_loss) # validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy') # validation_acc_summary = tf.summary.scalar('validation_accuracy', validation_acc) saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000) sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) saver.restore(sess, ckptfile) print ('restore variables done') immm = padding(immm,margin) # zmmm = np.zeros(immm.shape) imum = get_cen(immm.shape,V,V,vz,margin,re_filed) for num in range(len(imum)): print(num) center_point = imum[num] # center_point[0] = center_point[0] +margin # center_point[1] = center_point[1] +margin # # center_point[2] = center_point[2] +margi/home/ttt/model/model3.cpkt-41000n #print('center point:', center_point) image = immm[center_point[0]-V//2:center_point[0]+V//2, center_point[1]-V//2:center_point[1]+V//2, center_point[2]-vz//2:center_point[2]+vz//2] if image.shape !=(V,V,vz): break image = np.expand_dims(image, axis = 0) image = np.expand_dims(image, axis = 4) # start_time = time.time() feed_dict = {view_: image, keep_prob_: 0.5 } # feed_dict_1 = {view_: batch_x, # keep_prob_: 0.5 } # p_fc,p_softmax = sess.run( # [fc8,pr], # feed_dict=feed_dict_1) # print(p_fc,p_softmax) pred = sess.run( prediction, feed_dict=feed_dict) # pred = np.argmax(pred,-1) pred = pred[:,:,:,:,1] pred = np.array(pred) pred = np.squeeze(pred) bnn = np.argwhere(pred) pred = pred>0.5 print(bnn.shape[0]) immm[center_point[0]-V//2:center_point[0]+V//2, center_point[1]-V//2:center_point[1]+V//2, center_point[2]-vz//2:center_point[2]+vz//2] = pred immm = depadding(immm,margin) immm[immm>1] = 0 immm = immm*255 writetiff3d('/media/ttt/Elements/TanYinghui/TP/5_7.tif',immm.astype('uint8'))
def test(self, model): self.mode = 'test' test_input_path = './dataset/Xu et al.\'s dataset/TEST/INPUT/' test_gt_path = './dataset/Xu et al.\'s dataset/TEST/GT/' save_path = './dataset/Xu et al.\'s dataset/M0/JointFinetune/' if not os.path.exists(save_path): os.mkdir(save_path) test_input_list = [im for im in os.listdir(test_input_path) if im.endswith('.png')] test_gt_list = [im for im in os.listdir(test_gt_path) if im.endswith('.png')] test_num = len(test_input_list) print('Num. of test patches: ', test_num) edge_psnr_file = np.zeros(test_num) edge_ssim_file = np.zeros(test_num) psnr_file = np.zeros(test_num) ssim_file = np.zeros(test_num) test_size = 200 test_down_size = test_size // self.sr_scale with tf.Graph().as_default(): EPCNN_input = tf.placeholder(shape=[None, self.train_down_size, self.train_down_size, 4], dtype=tf.float32) GRCNN_input = tf.placeholder(shape=[None, self.train_down_size, self.train_down_size, 3], dtype=tf.float32) Tar_edge = tf.placeholder(shape=[None, self.train_size, self.train_size, 1], dtype=tf.float32) Tar_image = tf.placeholder(shape=[None, self.train_size, self.train_size, 3], dtype=tf.float32) EPCNN_output = self.EPCNN_inference(EPCNN_input) EPCNN_output = tf.clip_by_value(EPCNN_output, 0.0, 255.0) GRCNN_output = self.GRCNN_inference(GRCNN_input, EPCNN_output) GRCNN_output = tf.clip_by_value(GRCNN_output, 0.0, 255.0) para_num = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]) print('Num. of Parameters: ', para_num) var_list = [v for v in tf.all_variables() if v.name.startswith('EPCNN') or v.name.startswith('GRCNN')] saver = tf.train.Saver(var_list) with tf.Session() as sess: saver.restore(sess, os.path.join(self.model_path, model)) for i in range(test_num): ep_input, gr_input, target_edge, target_image = im2tfrecord.generatingSyntheticEdge(os.path.join(test_input_path, test_input_list[i]), os.path.join(test_gt_path, test_gt_list[i])) ep_input = ep_input.astype(np.float32) gr_input = gr_input.astype(np.float32) target_edge = target_edge.astype(np.float32) target_image = target_image.astype(np.float32) ep_input = np.expand_dims(ep_input, axis=0) gr_input = np.expand_dims(gr_input, axis=0) target_edge = np.expand_dims(target_edge, axis=0) target_edge = np.expand_dims(target_edge, axis=3) target_image = np.expand_dims(target_image, axis=0) ep_output, gr_output = sess.run([EPCNN_output, GRCNN_output], feed_dict={EPCNN_input: ep_input, GRCNN_input: gr_input, Tar_edge: target_edge, Tar_image: target_image}) ep_output = np.squeeze(ep_output) gr_output = np.squeeze(gr_output) target_edge = np.squeeze(target_edge) target_image = np.squeeze(target_image) ep_output = ep_output.astype('uint8') gr_output = gr_output.astype('uint8') target_edge = target_edge.astype('uint8') target_image = target_image.astype('uint8') edge_psnr_file[i] = psnr(ep_output, target_edge) edge_ssim_file[i] = ssim(ep_output, target_edge) psnr_file[i] = psnr(gr_output, target_image) ssim_file[i] = ssim(gr_output, target_image) save_name = test_input_list[i].split('.')[0][:-5] cv2.imwrite(os.path.join(save_path, save_name + '_output_edge.png'), ep_output) cv2.imwrite(os.path.join(save_path, save_name + '_output.png'), gr_output) print('JointFinetune: ', model) print('Edge PSNR: ', str(np.mean(edge_psnr_file))) print('Edge SSIM: ', str(np.mean(edge_ssim_file))) print('PSNR: ', str(np.mean(psnr_file))) print('SSIM: ', str(np.mean(ssim_file)))
def test(load_version, sift_test_list, iss_test_list, submap_id): print ('----------------- START to test -----------------') #sift_test_list = sift_test_list[submap_id-1][submap_image_id-1] iss_test_list = iss_test_list[submap_id-1] iss_test_file = "iss_test_list_txt/%03d.txt" % submap_id with open(iss_test_file, 'w') as file: for i in range(len(iss_test_list)): file.write('%s\n' % iss_test_list[i]) # define placeholder image_pl = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 3]) pos_pcl_pl = tf.placeholder(tf.float32, shape=[batch_size, pcl_size, 3]) neg_pcl_pl = tf.placeholder(tf.float32, shape=[batch_size, pcl_size, 3]) is_training = tf.placeholder(tf.bool) # build model print ('build model') with tf.device('/gpu:1'): # use gpu 1 to forward with tf.variable_scope('image_branch') as scope: image_feature = vgg16(image_pl, is_training=True, output_dim=image_feature_dim, bn_decay=None) with tf.variable_scope('pointcloud_branch') as scope: pos_pcl_feature,_ = pointNet(pos_pcl_pl, pcl_feature_dim, is_training=is_training, use_bn=False, bn_decay=None) scope.reuse_variables() neg_pcl_feature,_ = pointNet(neg_pcl_pl, pcl_feature_dim, is_training=is_training, use_bn=False, bn_decay=None) saver = tf.train.Saver(tf.all_variables(), max_to_keep=None) # tf.global_variables # run model print('run model...') config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(config=config) as sess: print('initialise model...') sess.run(tf.global_variables_initializer()) print(' load model...') save_path = 'model/' + 'v1' + '/' + load_version +'_model.ckpt' saver.restore(sess, save_path) #restore_tf_model(sess) print(" Model loaded from: %s" % save_path) # -------------------- evaluate model --------------------- print('**** Validate ...') print(' Compute image and pcl descriptors') iss_batch_num = len(iss_test_list) // batch_size iss_test_num = iss_batch_num * batch_size pcl_feature = np.zeros([iss_test_num, pcl_feature_dim]) # feed iss test list into the network batch_counter = 0 print('-------- test iss --------------') for i in range(iss_batch_num): print(" *** iss progress: %d/%d" % (i, iss_batch_num)) pcl_batch = getISSTestBatch(iss_test_list,i) feed_dict = {pos_pcl_pl:pcl_batch, is_training: False} pcl_batch_feature = sess.run(pos_pcl_feature, feed_dict=feed_dict) pcl_feature[batch_counter: batch_counter+pcl_batch_feature.shape[0],:] = pcl_batch_feature batch_counter += pcl_batch_feature.shape[0] print('---------- test sift ----------') sift_submap_test_list = sift_test_list[submap_id-1] # all images for k in range(len(sift_submap_test_list)): sift_test_list = sift_submap_test_list[k] # image id: i+1 cam_id = sift_test_list[0].split('/')[-2] # expected 'cam1_xxx' # record test_list for checking sift_test_file = "sift_test_list_txt/%03d_%s.txt" % (submap_id, cam_id) with open(sift_test_file, 'w') as file: for i in range(len(sift_test_list)): file.write('%s\n' % sift_test_list[i]) # test the patches from one image in the submap sift_batch_num = len(sift_test_list) // batch_size sift_test_num = sift_batch_num * batch_size img_feature = np.zeros([sift_test_num, image_feature_dim]) # feed sift test list into the network batch_counter = 0 print(" *** image id: %d/%d" % (k,len(sift_submap_test_list))) for i in range(sift_batch_num): #print(" *** image id: %d/%d, batch id: %d/%d" % (k, len(sift_submap_test_list), i, sift_batch_num)) img_batch = getSIFTTestBatch(sift_test_list, i) #print img_batch.shape feed_dict = {image_pl:img_batch, is_training: False} img_batch_feature = sess.run(image_feature, feed_dict=feed_dict) #print type(img_batch_feature) img_feature[batch_counter: batch_counter+img_batch_feature.shape[0],:] = img_batch_feature batch_counter += img_batch_feature.shape[0] # compute distance array between img_feature and pcl_feature img_vec = np.sum(np.multiply(img_feature, img_feature), axis=1, keepdims=True) pcl_vec = np.sum(np.multiply(pcl_feature, pcl_feature), axis=1, keepdims=True) dist_array = img_vec + np.transpose(pcl_vec) - 2*np.matmul(img_feature, np.transpose(pcl_feature)) print(" image patch num: %d, submap pcl num: %d" % (dist_array.shape[0], dist_array.shape[1])) # find correspondences and record # img_pcl_correspondences = []; cam_id = sift_test_list[0].split('/')[-2] txt_folder = "%s/%03d" % (sift_iss_correspond_dir, submap_id) if not os.path.exists(txt_folder): os.makedirs(txt_folder) txt_file_path = "%s/%s.txt" % (txt_folder, cam_id) top_k = 10 with open(txt_file_path, "w") as file: for i in range(dist_array.shape[0]): #min_dist_id = np.argmin(dist_array[i,:]) min_dist_id = np.argsort(dist_array[i,:])[:top_k] idx = np.concatenate((np.array([i+1]), min_dist_id+1)) #print(idx) idx=idx.reshape(1, idx.shape[0]) np.savetxt(file, idx,fmt='%d')
def train_and_eval(): with tf.Graph().as_default(): # common part on cpu with tf.device('/cpu:0'): # train/test phase indicator phase_train = tf.placeholder(tf.bool, name='phase_train') # learning rate is manually set learning_rate = tf.placeholder(tf.float32, name='learning_rate') # global step global_step = tf.Variable(0, trainable=False, name='global_step') # optimizer learning_rate_weights = learning_rate learning_rate_biases = 2.0 * learning_rate # double learning rate for biases optim_weights = tf.train.MomentumOptimizer(learning_rate_weights, 0.9) optim_biases = tf.train.MomentumOptimizer(learning_rate_biases, 0.9) gpu_grads = [] # per gpu for i in xrange(FLAGS.num_gpu): print('Initialize the {0}th gpu'.format(i)) with tf.device('/gpu:{0}'.format(i)): with tf.name_scope('gpu_{0}'.format(i)) as scope: if i > 0: m.add_to_collection = False # only one gpu's is used # only one gpu's info is printed out, but all summaried # multigpu is actived by the train_op (because of the average_gradients as a sync barrier) # when test, only one gpu is used loss, accuracy, logits = loss_and_accuracy_per_gpu( phase_train, scope) # Reuse variables tf.get_variable_scope().reuse_variables() weights, biases = tf.get_collection( 'weights'), tf.get_collection('biases') assert (len(weights) + len(biases) == len( tf.trainable_variables())) params = weights + biases gradients = tf.gradients(loss, params, name='gradients') gpu_grads.append(gradients) # add summary for all the entropy_losses and weight_l2_loss m.summary_losses() with tf.device('/cpu:0'): # set up train_op weights, biases = tf.get_collection('weights'), tf.get_collection( 'biases') averaged_grads = average_gradients(gpu_grads) weights_grads = averaged_grads[:len(weights)] biases_grads = averaged_grads[len(weights):] apply_weights_op = optim_weights.apply_gradients( zip(weights_grads, weights), global_step=global_step) apply_biases_op = optim_biases.apply_gradients( zip(biases_grads, biases), global_step=global_step) train_op = tf.group(apply_weights_op, apply_biases_op) # saver saver = tf.train.Saver(tf.all_variables()) # start session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) # summary summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, graph_def=sess.graph_def) for var in tf.trainable_variables(): tf.histogram_summary('params/' + var.op.name, var) # initialization init_op = tf.initialize_all_variables() if FLAGS.restore_path is None: print('Initializing...') sess.run(init_op, {phase_train.name: True}) else: print('Restore variable from %s' % FLAGS.restore_path) saver.restore(sess, FLAGS.restore_path) # train loop tf.train.start_queue_runners(sess=sess) curr_lr = 0.0 lr_scale = 1.0 # NOTE: the interval should be the multiple of the num_gpu for step in xrange(0, FLAGS.max_steps, FLAGS.num_gpu): # set learning rate manually if step <= 32000: _lr = lr_scale * 1e-1 elif step <= 48000: _lr = lr_scale * 1e-2 else: _lr = lr_scale * 1e-3 if curr_lr != _lr: curr_lr = _lr print('Learning rate set to %f' % curr_lr) fetches = [train_op, loss] if step % FLAGS.summary_interval == 0: fetches += [accuracy, summary_op] sess_outputs = sess.run(fetches, { phase_train.name: True, learning_rate.name: curr_lr }) if step % FLAGS.summary_interval == 0: train_loss_value, train_acc_value, summary_str = sess_outputs[ 1:] print( '[%s] Iteration %d, train loss = %f, train accuracy = %f' % (datetime.now(), step, train_loss_value, train_acc_value)) summary_writer.add_summary(summary_str, step) if step > 0 and step % FLAGS.save_interval == 0: checkpoint_path = os.path.join(FLAGS.log_dir, 'checkpoint') saver.save(sess, checkpoint_path, global_step=step) print('Checkpoint saved at %s' % checkpoint_path)
def train(): print('[Dataset Configuration]') print('\tCIFAR-100 dir: %s' % FLAGS.data_dir) print('\tNumber of classes: %d' % FLAGS.num_classes) print('\tNumber of test images: %d' % FLAGS.num_test_instance) print('[Network Configuration]') print('\tBatch size: %d' % FLAGS.batch_size) #print('\tResidual blocks per group: %d' % FLAGS.num_residual_units) #print('\tNetwork width multiplier: %d' % FLAGS.k) print('[Testing Configuration]') print('\tCheckpoint path: %s' % FLAGS.ckpt_path) print('\tDataset: %s' % ('Training' if FLAGS.train_data else 'Test')) print('\tNumber of testing iterations: %d' % FLAGS.test_iter) print('\tOutput path: %s' % FLAGS.output) print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction) print('\tLog device placement: %d' % FLAGS.log_device_placement) with tf.Graph().as_default(): # The CIFAR-100 dataset with tf.variable_scope('test_image'): test_images, test_labels = data_input.inputs( not FLAGS.train_data, FLAGS.data_dir, FLAGS.batch_size) # The class labels with open(os.path.join(FLAGS.data_dir, 'batches.meta.txt')) as fd: classes = [temp.strip() for temp in fd.readlines()] # Build a Graph that computes the predictions from the inference model. images = tf.placeholder(tf.float32, [ FLAGS.batch_size, data_input.IMAGE_SIZE, data_input.IMAGE_SIZE, 3 ]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size]) # Build model decay_step = FLAGS.lr_step_epoch * FLAGS.num_train_instance / FLAGS.batch_size hp = wrinc.HParams(batch_size=FLAGS.batch_size, num_classes=FLAGS.num_classes, initial_lr=FLAGS.initial_lr, decay_step=decay_step, lr_decay=FLAGS.lr_decay, momentum=FLAGS.momentum) network = wrinc.wrinc(hp, images, labels, None) network.build_model() # network.build_train_op() # NO training op # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000) if os.path.isdir(FLAGS.ckpt_path): ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_path) # Restores from checkpoint if ckpt and ckpt.model_checkpoint_path: print('\tRestore from %s' % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found in the dir [%s]' % FLAGS.ckpt_path) sys.exit(1) elif os.path.isfile(FLAGS.ckpt_path): print('\tRestore from %s' % FLAGS.ckpt_path) saver.restore(sess, FLAGS.ckpt_path) else: print('No checkpoint file found in the path [%s]' % FLAGS.ckpt_path) sys.exit(1) # Start queue runners tf.train.start_queue_runners(sess=sess) # Testing! result_ll = [[0, 0] for _ in range(FLAGS.num_classes) ] # Correct/wrong counts for each class test_loss = 0.0, 0.0 for i in range(FLAGS.test_iter): test_images_val, test_labels_val = sess.run( [test_images, test_labels]) preds_val, loss_value, acc_value = sess.run( [network.preds, network.loss, network.acc], feed_dict={ network.is_train: False, images: test_images_val, labels: test_labels_val }) test_loss += loss_value for j in range(FLAGS.batch_size): correct = 0 if test_labels_val[j] == preds_val[j] else 1 result_ll[test_labels_val[j] % FLAGS.num_classes][correct] += 1 test_loss /= FLAGS.test_iter # Summary display & output acc_list = [float(r[0]) / float(r[0] + r[1]) for r in result_ll] result_total = np.sum(np.array(result_ll), axis=0) acc_total = float(result_total[0]) / np.sum(result_total) print 'Class \t\t\tT\tF\tAcc.' format_str = '%-31s %7d %7d %.5f' for i in range(FLAGS.num_classes): print format_str % (classes[i], result_ll[i][0], result_ll[i][1], acc_list[i]) print(format_str % ('(Total)', result_total[0], result_total[1], acc_total)) # Output to file(if specified) if FLAGS.output.strip(): with open(FLAGS.output, 'w') as fd: fd.write('Class \t\t\tT\tF\tAcc.\n') format_str = '%-31s %7d %7d %.5f' for i in range(FLAGS.num_classes): t, f = result_ll[i] format_str = '%-31s %7d %7d %.5f\n' fd.write(format_str % (classes[i].replace(' ', '-'), t, f, acc_list[i])) fd.write( format_str % ('(Total)', result_total[0], result_total[1], acc_total))
def train(): print "Building training graph ..." with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-FLAGS.init_scale, FLAGS.init_scale) with tf.variable_scope("char-rnn", initializer=initializer): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') cell = model.build_cell(keep_prob) inputs = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size, FLAGS.num_steps], name='inputs') targets = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size, FLAGS.num_steps], name='targets') lr = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate') initial_state = tf.placeholder( dtype=tf.float32, shape=[FLAGS.batch_size, cell.state_size], name='initial_state') logits, final_state = model.predict(inputs, cell, initial_state, keep_prob) loss = model.loss(logits, targets) train_op = model.train_batch(loss, lr) # create saver and summary saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() sess = tf.Session() sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) # load data print "Loading data ..." reader = text_input.TextReader( os.path.join(FLAGS.data_dir, FLAGS.data_file)) reader.prepare_data() train_loader = text_input.DataLoader( os.path.join(FLAGS.data_dir, 'train.cPickle'), FLAGS.batch_size, FLAGS.num_steps) test_loader = text_input.DataLoader( os.path.join(FLAGS.data_dir, 'test.cPickle'), FLAGS.batch_size, FLAGS.num_steps) total_steps = FLAGS.num_epochs * train_loader.num_batch save_path = os.path.join(FLAGS.train_dir, 'model.ckpt') zero_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32).eval(session=sess) global_step = 0 def eval(sess, loader, state): test_loss = 0. for _ in xrange(loader.num_batch): x_batch, y_batch = loader.next_batch() feed = { inputs: x_batch, targets: y_batch, keep_prob: 1., initial_state: state } state, loss_value = sess.run([final_state, loss], feed_dict=feed) test_loss += loss_value return test_loss / loader.num_batch # training for epoch in xrange(FLAGS.num_epochs): current_lr = FLAGS.init_lr * (FLAGS.lr_decay**(max( epoch - FLAGS.decay_after + 1, 0))) state = zero_state training_loss = 0. for _ in xrange(train_loader.num_batch): global_step += 1 start_time = time.time() x_batch, y_batch = train_loader.next_batch() feed = { inputs: x_batch, targets: y_batch, keep_prob: (1. - FLAGS.dropout), lr: current_lr, initial_state: state } state, loss_value, _ = sess.run([final_state, loss, train_op], feed_dict=feed) duration = time.time() - start_time training_loss += loss_value if global_step % FLAGS.log_steps == 0: format_str = ( '%s: step %d/%d (epoch %d/%d), loss = %.2f (%.3f sec/batch), lr: %.5f' ) print(format_str % (datetime.now(), global_step, total_steps, epoch + 1, FLAGS.num_epochs, loss_value, duration, current_lr)) if global_step % FLAGS.summary_steps == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, global_step) if epoch % FLAGS.save_epochs == 0: saver.save(sess, save_path, global_step) train_loader.reset_pointer() # epoch summary training_loss /= train_loader.num_batch summary_writer.add_summary( _summary_for_scalar('training_loss', training_loss), global_step) test_loss = eval(sess, test_loader, zero_state) test_loader.reset_pointer() summary_writer.add_summary( _summary_for_scalar('test_loss', test_loss), global_step) print("Epoch %d: training_loss = %.2f, test_loss = %.2f" % (epoch + 1, training_loss, test_loss))
#print tf.cast(predicted_labels, tf.int32),labels_ph correct_prediction = tf.equal(predicted, labels_ph) #预测值与label进行比较 accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) print("训练图片识别:{0}".format( accuracy.eval({ images_ph: sample_images, labels_ph: sample_labels }))) #a = accuracy.eval({images_ph: sample_images, labels_ph: sample_labels}) #print '最终的测试正确率:{4}'.format(accuracy) #print '最终的测试正确率:{4}'.format(predicted) # save the variables on disk variables = tf.all_variables() saver = tf.train.Saver(variables) saver.save(sess, "data/data.ckpt") # save the model to file and wo can use it predict sth like images # tf.train.write_graph(sess.graph_def, 'graph', 'model.ph', False) ''' node_seq = {} # Keyed by node name. seq = 4 for node in g.as_graph_def().node: seq += 1 print g.as_graph_def().node[22] print seq ''' #a ="Accuracy/predicted_labels".split('.')
def __init__(self, phase, visualize, data_path, output_dir, batch_size, initial_learning_rate, num_epoch, steps_per_checkpoint, target_vocab_size, model_dir, target_embedding_size, attn_num_hidden, attn_num_layers, clip_gradients, max_gradient_norm, session, load_model, gpu_id, use_gru, use_distance=True, max_image_width=160, max_image_height=60, max_prediction_length=8, reg_val=0): self.use_distance = use_distance # We need resized width, not the actual width self.max_original_width = max_image_width self.max_width = int(math.ceil(1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT)) self.encoder_size = int(math.ceil(1. * self.max_width / 4)) self.decoder_size = max_prediction_length + 2 self.buckets = [(self.encoder_size, self.decoder_size)] gpu_device_id = '/gpu:' + str(gpu_id) self.gpu_device_id = gpu_device_id if not os.path.exists(model_dir): os.makedirs(model_dir) logging.info('loading data') # load data if phase == 'train': self.s_gen = DataGen(data_path, self.buckets, epochs=num_epoch, max_width=self.max_original_width) else: batch_size = 1 self.s_gen = DataGen(data_path, self.buckets, epochs=1, max_width=self.max_original_width) logging.info('phase: %s' % phase) logging.info('model_dir: %s' % (model_dir)) logging.info('load_model: %s' % (load_model)) logging.info('output_dir: %s' % (output_dir)) logging.info('steps_per_checkpoint: %d' % (steps_per_checkpoint)) logging.info('batch_size: %d' % (batch_size)) logging.info('num_epoch: %d' % num_epoch) logging.info('learning_rate: %d' % initial_learning_rate) logging.info('reg_val: %d' % (reg_val)) logging.info('max_gradient_norm: %f' % max_gradient_norm) logging.info('clip_gradients: %s' % clip_gradients) logging.info('max_image_width %f' % max_image_width) logging.info('max_prediction_length %f' % max_prediction_length) logging.info('target_vocab_size: %d' % target_vocab_size) logging.info('target_embedding_size: %f' % target_embedding_size) logging.info('attn_num_hidden: %d' % attn_num_hidden) logging.info('attn_num_layers: %d' % attn_num_layers) logging.info('visualize: %s' % visualize) if use_gru: logging.info('using GRU in the decoder.') self.reg_val = reg_val self.sess = session self.steps_per_checkpoint = steps_per_checkpoint self.model_dir = model_dir self.output_dir = output_dir self.batch_size = batch_size self.num_epoch = num_epoch self.global_step = tf.Variable(0, trainable=False) self.phase = phase self.visualize = visualize self.learning_rate = initial_learning_rate self.clip_gradients = clip_gradients if phase == 'train': self.forward_only = False elif phase == 'test': self.forward_only = True else: assert False, phase with tf.device(gpu_device_id): self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32) self.height_float = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.float64) self.img_pl = tf.placeholder(tf.string, name='input_image_as_bytes') self.img_data = tf.cond( tf.less(tf.rank(self.img_pl), 1), lambda: tf.expand_dims(self.img_pl, 0), lambda: self.img_pl ) self.img_data = tf.map_fn(self._prepare_image, self.img_data, dtype=tf.float32) num_images = tf.shape(self.img_data)[0] # TODO: create a mask depending on the image/batch size self.encoder_masks = [] for i in xrange(self.encoder_size + 1): self.encoder_masks.append( tf.tile([[1.]], [num_images, 1]) ) self.decoder_inputs = [] self.target_weights = [] for i in xrange(self.decoder_size + 1): self.decoder_inputs.append( tf.tile([0], [num_images]) ) if i < self.decoder_size: self.target_weights.append(tf.tile([1.], [num_images])) else: self.target_weights.append(tf.tile([0.], [num_images])) # TODO: not 2, 2 is static (???) self.zero_paddings = tf.zeros([num_images, 2, 512], dtype=np.float32) cnn_model = CNN(self.img_data, True) self.conv_output = cnn_model.tf_output() self.concat_conv_output = tf.concat(axis=1, values=[self.conv_output, self.zero_paddings]) self.perm_conv_output = tf.transpose(self.concat_conv_output, perm=[1, 0, 2]) self.attention_decoder_model = Seq2SeqModel( encoder_masks=self.encoder_masks, encoder_inputs_tensor=self.perm_conv_output, decoder_inputs=self.decoder_inputs, target_weights=self.target_weights, target_vocab_size=target_vocab_size, buckets=self.buckets, target_embedding_size=target_embedding_size, attn_num_layers=attn_num_layers, attn_num_hidden=attn_num_hidden, forward_only=self.forward_only, use_gru=use_gru) table = tf.contrib.lookup.MutableHashTable( key_dtype=tf.int64, value_dtype=tf.string, default_value="", checkpoint=True, ) insert = table.insert( tf.constant(range(len(DataGen.CHARMAP)), dtype=tf.int64), tf.constant(DataGen.CHARMAP), ) with tf.control_dependencies([insert]): num_feed = [] for l in xrange(len(self.attention_decoder_model.output)): guess = tf.argmax(self.attention_decoder_model.output[l], axis=1) num_feed.append(guess) trans_output = tf.transpose(num_feed) trans_output = tf.map_fn( lambda m: tf.foldr( lambda a, x: tf.cond( tf.equal(x, DataGen.EOS_ID), lambda: '', lambda: table.lookup(x) + a ), m, initializer='' ), trans_output, dtype=tf.string ) self.prediction = tf.cond( tf.equal(tf.shape(trans_output)[0], 1), lambda: trans_output[0], lambda: trans_output ) if not self.forward_only: # train self.updates = [] self.summaries_by_bucket = [] params = tf.trainable_variables() opt = tf.train.AdadeltaOptimizer(learning_rate=initial_learning_rate) if self.reg_val > 0: reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) logging.info('Adding %s regularization losses', len(reg_losses)) logging.debug('REGULARIZATION_LOSSES: %s', reg_losses) loss_op = self.reg_val * tf.reduce_sum(reg_losses) + self.attention_decoder_model.loss else: loss_op = self.attention_decoder_model.loss gradients, params = zip(*opt.compute_gradients(loss_op, params)) if self.clip_gradients: gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm) # Add summaries for loss, variables, gradients, gradient norms and total gradient norm. summaries = [] summaries.append(tf.summary.scalar("loss", loss_op)) summaries.append(tf.summary.scalar("total_gradient_norm", tf.global_norm(gradients))) all_summaries = tf.summary.merge(summaries) self.summaries_by_bucket.append(all_summaries) # update op - apply gradients update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.updates.append(opt.apply_gradients(zip(gradients, params), global_step=self.global_step)) self.saver_all = tf.train.Saver(tf.all_variables()) self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt") ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and load_model: logging.info("Reading model parameters from %s" % ckpt.model_checkpoint_path) self.saver_all.restore(self.sess, ckpt.model_checkpoint_path) else: logging.info("Created model with fresh parameters.") self.sess.run(tf.initialize_all_variables())