def __init__(self, network_architecture, transfer_fct=tf.nn.softplus, 
                 learning_rate=0.001, batch_size=100,load_model = False,checkpoint_folder = './vae_checkpoints'):
        self.network_architecture = network_architecture
        self.transfer_fct = transfer_fct
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        
        # tf Graph input
        self.x = tf.placeholder(tf.float32, [None, network_architecture["n_input"]])
        
        # Create autoencoder network
        
        # Initializing the tensor flow variables

        # Launch the session
        self.sess = tf.InteractiveSession()
        #self.saver = tf.train.Saver(tf.all_variables())
        self._create_network()
        # Define loss function based variational upper-bound and 
        # corresponding optimizer
        self._create_loss_optimizer()
        print len(tf.all_variables())
        self.saver = tf.train.Saver(var_list=tf.all_variables())
        
        if load_model == False:
            init = tf.initialize_all_variables()
            self.sess.run(init)
        else:
            ckpt = tf.train.get_checkpoint_state(checkpoint_folder)
            self.saver.restore(self.sess, ckpt.model_checkpoint_path)
            print "Loaded model:",ckpt.model_checkpoint_path
            self.sess.run(tf.all_variables())
Beispiel #2
0
def _discriminator_model(sess, features, disc_input):
    # Fully convolutional model
    mapsize = 3
    layers  = [64, 128, 256, 512]

    old_vars = tf.all_variables()

    model = Model('DIS', 2*disc_input - 1)

    for layer in range(len(layers)):
        nunits = layers[layer]
        stddev_factor = 2.0

        model.add_conv2d(nunits, mapsize=mapsize, stride=2, stddev_factor=stddev_factor)
        model.add_batch_norm()
        model.add_relu()

    # Finalization a la "all convolutional net"
    model.add_conv2d(nunits, mapsize=mapsize, stride=1, stddev_factor=stddev_factor)
    model.add_batch_norm()
    model.add_relu()

    model.add_conv2d(nunits, mapsize=1, stride=1, stddev_factor=stddev_factor)
    model.add_batch_norm()
    model.add_relu()

    # Linearly map to real/fake and return average score
    # (softmax will be applied later)
    model.add_conv2d(1, mapsize=1, stride=1, stddev_factor=stddev_factor)
    model.add_mean()

    new_vars  = tf.all_variables()
    disc_vars = list(set(new_vars) - set(old_vars))

    return model.get_output(), disc_vars
Beispiel #3
0
    def reset_module(self, module):

        temp = set(tf.all_variables())

        module.backward(module.loss)

        self.sess.run(tf.initialize_variables(set(tf.all_variables()) - temp))
Beispiel #4
0
def train_dnn(data_folder, model_file): 
    # Output of dnn using input x
    y = DNN(x)
    
    print "Loading training pickles..."  
    train_set = import_data.load_dataset(data_folder + '/train_data.pickle', 
                                         data_folder + '/train_labels.pickle',
                                         context_frames=context_frames)      
        
    # Create the dir for the model
    if not os.path.isdir('%s/models/%s'%(save_loc,start_date)):
        try:
            os.makedirs('%s/models/%s'%(save_loc,start_date))
        except OSError:
            if not os.path.isdir('%s/models/%s'%(save_loc,start_date)):
                raise
    
    # Create the session
    global sess
    sess = tf.InteractiveSession()    
    global summary_op
    global train_writer
    global saver
    saver = tf.train.Saver()
        
    # Op for merging all summaries
    summary_op = tf.merge_all_summaries()
    # Summary Writer
    train_writer = tf.train.SummaryWriter('%ssummaries/%s'%(save_loc, start_date), sess.graph)
        
    # Cost function
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
    # Optimizer
    # For gradient descend, learning rate = 0.002 (see Hinton et al.)
    # For AdamOptimizer, learning rate = 0.0001 (better than default (exp 1.2))
    if (optimizer_name == 'Adam'):
        # Hacky solution for always making sure that the beta2_power var
        # is always initialized
        temp = set(tf.all_variables())
        optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost)
        sess.run(tf.initialize_variables(set(tf.all_variables()) - temp))
    else:
        optimizer = tf.train.GradientDescentOptimizer(0.02).minimize(cost)
    
    if model_file:
        saver.restore(sess, model_file)
        print "Model restored"
    else:
        # Initialization
        init_op = tf.initialize_all_variables()
        sess.run(init_op)    
    
    print("Training network. Date: %s" % start_date)
    train(train_set, y, cost, optimizer)
    
    save_path = saver.save(sess, "%s/models/%s/model.ckpt"%(save_loc, start_date))
    print("Model saved in file: %s" % save_path)
    print("Summaries written to summaries/%s" % start_date)
    
    evaluate_dnn(data_folder, y)
    def sample(self, args):
        if self.model is None:
            # Allow sample to be usable outside of main()
            with open(os.path.join(args.save_dir, 'config.pkl')) as f:
                saved_args = cPickle.load(f)
            with open(os.path.join(args.save_dir, 'chars_vocab.pkl')) as f:
                self.chars, self.vocab = cPickle.load(f)
            self.model = Model(saved_args, True)

            with tf.Session() as sess:
                tf.initialize_all_variables().run()
                saver = tf.train.Saver(tf.all_variables())
                ckpt = tf.train.get_checkpoint_state(args.save_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    return self.model.sample(sess, self.chars, self.vocab, args.n, args.prime)
        else:
            with tf.Session() as sess:
                tf.initialize_all_variables().run()
                saver = tf.train.Saver(tf.all_variables())
                ckpt = tf.train.get_checkpoint_state(args.save_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    return self.model.sample(sess, self.chars, self.vocab, args.n, args.prime)

        return None
 def _create_initializers(self):
   if self._var_count != len(tf.all_variables()):
     self._saver = tf.train.Saver(tf.all_variables(), max_to_keep=5)
     self._init = tf.initialize_all_variables()
     self._check_inited = tf.assert_variables_initialized()
     self._var_count = len(tf.all_variables())
     if self._summary_writer:
       self._summaries = tf.merge_all_summaries()
       self._summary_writer.add_graph(tf.get_default_graph().as_graph_def())
 def _create_initializers(self):
   if self._var_count != len(tf.all_variables()):
     save_dir = os.path.dirname(self._save_path) if self._save_path else None
     if save_dir and not tf.gfile.IsDirectory(save_dir):
       tf.gfile.MakeDirs(save_dir)
     self._saver = tf.train.Saver(tf.all_variables(), max_to_keep=5)
     self._init = tf.initialize_all_variables()
     self._check_inited = tf.assert_variables_initialized()
     self._var_count = len(tf.all_variables())
     if self._summary_writer:
       self._summaries = tf.merge_all_summaries()
       self._summary_writer.add_graph(tf.get_default_graph())
  def testGraphMatchesImmediate(self):
    """Ensures that the vars line up between the two modes."""
    with tf.Graph().as_default():
      input_pt = prettytensor.wrap(self.input)
      self.BuildLargishGraph(input_pt)
      normal_names = sorted([v.name for v in tf.all_variables()])

    with tf.Graph().as_default():
      template = prettytensor.template('input')
      self.BuildLargishGraph(template).construct(
          input=prettytensor.wrap(self.input))
      template_names = sorted([v.name for v in tf.all_variables()])

    self.assertSequenceEqual(normal_names, template_names)
def register_all_variables_and_grards(y):
    all_vars = tf.all_variables()
    for v in tf.all_variables():
        tf.histogram_summary('hist_'+v.name, v)
        if v.get_shape() == []:
            tf.scalar_summary('scal_'+v.name, v)

    grad_vars = opt.compute_gradients(y,all_vars) #[ (T(gradient),variable) ]
    for (dldw,v) in grad_vars:
        if dldw != None:
            tf.histogram_summary('hist_'+v.name+'dW', dldw)
            if v.get_shape() == [] or dldw.get_shape() == []:
                tf.scalar_summary('scal_'+v.name+'dW', dldw)
            l2norm_dldw = tf.reduce_mean(tf.square(dldw))
            tf.scalar_summary('scal_'+v.name+'dW_l2_norm', l2norm_dldw)
Beispiel #10
0
  def __init__(self, model_dir):
    """Create G2P model and initialize or load parameters in session."""
    self.model_dir = model_dir

    # Preliminary actions before model creation.
    if not (model_dir and
            os.path.exists(os.path.join(self.model_dir, "model"))):
      return

    #Load model parameters.
    num_layers, size = data_utils.load_params(self.model_dir)
    batch_size = 1 # We decode one word at a time.
    # Load vocabularies
    self.gr_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.grapheme"))
    self.ph_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.phoneme"))

    self.rev_ph_vocab =\
      data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"),
                                 reverse=True)

    self.session = tf.Session()

    # Create model.
    print("Creating %d layers of %d units." % (num_layers, size))
    self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                            len(self.ph_vocab), self._BUCKETS,
                                            size, num_layers, 0, batch_size,
                                            0, 0, forward_only=True)
    self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
    # Check for saved models and restore them.
    print("Reading model parameters from %s" % self.model_dir)
    self.model.saver.restore(self.session, os.path.join(self.model_dir,
                                                        "model"))
Beispiel #11
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
Beispiel #12
0
def get_embeddings(sess):
    embeddings = dict()
    for num_cat_value in np.unique(config.feature_desc):
        embed_prefix_name = 'embeddings' + str(num_cat_value)
        embed_var = [v for v in tf.all_variables() if v.name.startswith(embed_prefix_name)][0]
        embeddings[num_cat_value] = sess.run(embed_var)
    return embeddings
Beispiel #13
0
def cluster_feature_analysis(sess, user_ids):
    # Get trained parameters
    lstm_vars = [v for v in tf.all_variables() if v.name.startswith('lstm')]
    matrix_var = sess.run(lstm_vars[0])
    bias_var = sess.run(lstm_vars[1])
    
    # Split the gates
    matrix_i, matrix_j, matrix_f, matrix_o = sess.run(array_ops.split(1, 4, matrix_var))
    bias_i, bias_j, bias_f, bias_o = sess.run(array_ops.split(0, 4, bias_var))
    
    dict_i, dict_j, dict_f, dict_o = dict(), dict(), dict(), dict()
    for feature in range(len(config.feature_desc)):
        dict_i[feature] = []
        dict_j[feature] = []
        dict_f[feature] = []
        dict_o[feature] = []
    for user_id in user_ids:
        print user_id
        gates_i, gates_j, gates_f, gates_o = feature_importance(sess, user_id, matrix_i, 
                                                                matrix_j, matrix_f, matrix_o, 
                                                                bias_i, bias_j, bias_f, bias_o)
        for feature in range(len(config.feature_desc)):
            dict_i[feature].append(gates_i[feature])
            dict_j[feature].append(gates_j[feature])
            dict_f[feature].append(gates_f[feature])
            dict_o[feature].append(gates_o[feature])                        
    return dict_i, dict_j, dict_f, dict_o
  def testStochasticVariablesWithCallableInitializer(self):
    shape = (10, 20)

    def sigma_init(shape, dtype, partition_info):
      _ = partition_info
      return tf.ones(shape, dtype=dtype) * 2.

    with tf.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusSigma,
            dist_kwargs={"validate_args": True},
            param_initializers={
                "mu": np.ones(
                    shape, dtype=np.float32) * 4.,
                "sigma": sigma_init
            })):
      v = tf.get_variable("sv", shape)

    for var in tf.all_variables():
      if "mu" in var.name:
        mu_var = var
      if "sigma" in var.name:
        sigma_var = var

    v = tf.convert_to_tensor(v)
    with self.test_session() as sess:
      sess.run(tf.initialize_all_variables())
      self.assertAllEqual(np.ones(shape) * 4., sess.run(mu_var))
      self.assertAllEqual(np.ones(shape) * 2., sess.run(sigma_var))
      self.assertEqual(shape, sess.run(v).shape)
Beispiel #15
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    # Get images and labels for CIFAR-10.
    eval_data = FLAGS.eval_data == 'test'
    images, labels = cifar10.inputs(eval_data=eval_data)
    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)
    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)
    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = {}
    for v in tf.all_variables():
      if v in tf.trainable_variables():
        restore_name = variable_averages.average_name(v)
      else:
        restore_name = v.op.name
      variables_to_restore[restore_name] = v
    saver = tf.train.Saver(variables_to_restore)
    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()
    graph_def = tf.get_default_graph().as_graph_def()
    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
                                            graph_def=graph_def)
    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
def add_aux_layer(self, aux_attrs):
    layer_name = aux_attrs['layer_name']
    with tf.variable_scope(layer_name):
        init_op = tf.initialize_all_variables()
        saver = tf.train.Saver(tf.all_variables())
        tensors_dict = {'%_init_op' % layer_name: init_op, '%s_saver_op' % layer_name: saver}
        return tensors_dict
def restore(checkpoint_file=’hello.chk’):
    x = tf.Variable(-1.0, validate_shape=False, name='x')
    y = tf.Variable(-1.0, validate_shape=False, name='y')
    with tf.Session() as session:
        saver = tf.train.Saver()
        saver.restore(session, checkpoint_file)
        print(session.run(tf.all_variables()))
def train_model(args):
	data_loader = InputHandler(args.data_dir, args.batch_size, args.result_length)
	args.vocabulary_size = data_loader.vocabulary_size

	# Save the original files, so that we can load the model when sampling
	with open(os.path.join(args.snapshots_dir, CONFIGURATION_FILE), 'wb') as f:
		cPickle.dump(args, f)
	with open(os.path.join(args.snapshots_dir, WORDS_VOCABULARY_FILE), 'wb') as f:
		cPickle.dump((data_loader.words, data_loader.vocabulary), f)

	model = RNNModel(args.rnn_size, args.network_depth, args.batch_size, args.result_length,
					 args.vocabulary_size, args.gradient)

	with tf.Session() as session:
		tf.initialize_all_variables().run()
		saver = tf.train.Saver(tf.all_variables())
		for e in range(args.num_epochs):
			session.run(tf.assign(model.lr, args.training_rate * (args.decay_rate ** e)))
			data_loader.set_batch_pointer_to_zero()
			state = model.initial_state.eval()

			for b in range(data_loader.num_batches):
				x, y = data_loader.get_next_batch()
				feed = {model.input_data: x, model.targets: y, model.initial_state: state}
				train_loss, state, _ = session.run([model.cost, model.final_state, model.train_op], feed)
				if (e * data_loader.num_batches + b) % args.snapshot == 0 \
						or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
					snapshot_path = os.path.join(args.snapshots_dir, 'model.ckpt')
					saver.save(session, snapshot_path, global_step = e * data_loader.num_batches + b)
					print("Model snapshot was taken to {}".format(snapshot_path))
Beispiel #19
0
 def guarantee_initialized_variables(self, session, list_of_variables = None):
     if list_of_variables is None:
         list_of_variables = tf.all_variables()
     uninitialized_variables = list(tf.get_variable(name) for name in
             session.run(tf.report_uninitialized_variables(list_of_variables)))
     session.run(tf.initialize_variables(uninitialized_variables))
     return uninitialized_variables
Beispiel #20
0
    def testGradient(self):
        with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess:
            batch_size = 1
            cell_size = 3
            input_size = 2

            # Inputs
            x = tf.zeros([batch_size, input_size])
            h = tf.zeros([batch_size, cell_size])
            output = gru_ops.GRUBlockCell(cell_size)(x, h)

            sess.run([tf.initialize_all_variables()])

            all_variables = tf.all_variables()

            [w_ru, b_ru, w_c, b_c] = all_variables[:4]

            error_x = tf.test.compute_gradient_error(x, (batch_size, input_size), output[0], (batch_size, cell_size))
            error_h = tf.test.compute_gradient_error(h, (batch_size, cell_size), output[0], (batch_size, cell_size))
            error_w_ru = tf.test.compute_gradient_error(
                w_ru, (input_size + cell_size, 2 * cell_size), output[0], (batch_size, cell_size)
            )
            error_w_c = tf.test.compute_gradient_error(
                w_c, (input_size + cell_size, cell_size), output[0], (batch_size, cell_size)
            )
            error_b_ru = tf.test.compute_gradient_error(b_ru, (2 * cell_size,), output[0], (batch_size, cell_size))
            error_b_c = tf.test.compute_gradient_error(b_c, (cell_size,), output[0], (batch_size, cell_size))

        eps = 1e-4
        self.assertLess(error_x, eps)
        self.assertLess(error_h, eps)
        self.assertLess(error_w_ru, eps)
        self.assertLess(error_w_c, eps)
        self.assertLess(error_b_ru, eps)
        self.assertLess(error_b_c, eps)
def run_model_image(checkpoint_file, image):
    """
    Run an image through the trained model and vizualize its activations
    :param checkpoint_file: The saved model parameters for the basic model
    :param image: The supplied image (same dimensions as training).
    """
    with tf.Graph().as_default():
        image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 1])
        image = tf.image.per_image_whitening(image)
        image = tf.reshape(image, [1, IMAGE_SIZE, IMAGE_SIZE, 1])
        image = tf.cast(image, tf.float32)

        relu1, relu2, relu3 = inference(train=False, images=image, visualize=True)

        saver = tf.train.Saver(tf.all_variables())
        sess = tf.Session()
        saver.restore(sess=sess, save_path=checkpoint_file)

        units = relu1.eval(session=sess)
        plotNNFilter(units)

        units = relu2.eval(session=sess)
        plotNNFilter(units)

        units = relu3.eval(session=sess)
        plotNNFilter(units)
	def train(self, data=0, steps=-1, dropout=None, display_step=10, test_step=200, batch_size=10,
	          do_resume=False):  # epochs=-1,
		if data: self.data = data
		steps = 9999999 if steps == -1 else steps
		session = self.session
		# with tf.device(_cpu):

		# import tensorflow.contrib.layers as layers
		# t = tf.verify_tensor_all_finite(t, msg)
		tf.add_check_numerics_ops()
		try:
			self.summaries = tf.summary.merge_all()
		except:
			self.summaries = tf.merge_all_summaries()
		try:
			self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph)  #
		except:
			self.summary_writer = tf.train.SummaryWriter(current_logdir(), session.graph)  #
		if not dropout: dropout = 1.  # keep all
		x = self.x
		y = self.y
		keep_prob = self.keep_prob
		try:
			saver = tf.train.Saver(tf.global_variables())
		except:
			saver = tf.train.Saver(tf.all_variables())
		snapshot = self.name + str(get_last_tensorboard_run_nr())
		checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
		if do_resume and checkpoint:
			print("LOADING " + checkpoint + " !!!")
			saver.restore(session, checkpoint)
		try:
			session.run([tf.global_variables_initializer()])
		except:
			session.run([tf.initialize_all_variables()])
		step = 0  # show first
		while step < steps:
			batch_xs, batch_ys = self.next_batch(batch_size, session)
			# print("step %d \r" % step)# end=' ')

			# tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size)
			# Fit training using batch data
			feed_dict = {x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True}
			loss, _ = session.run([self.cost, self.optimizer], feed_dict=feed_dict)
			if step % display_step == 0:
				seconds = int(time.time()) - start
				# Calculate batch accuracy, loss
				feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False}
				acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed)
				# self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve
				print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ')
				if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!")  # restore!
			if step % test_step == 0: self.test(step)
			if step % save_step == 0 and step > 0:
				print("SAVING snapshot %s" % snapshot)
				saver.save(session, checkpoint_dir + snapshot + ".ckpt", self.global_step)

			step += 1
		print("\nOptimization Finished!")
		self.test(step, number=10000)  # final test
  def restore_fn(self, checkpoint_path, from_detection_checkpoint=True):
    """Return callable for loading a checkpoint into the tensorflow graph.

    Args:
      checkpoint_path: path to checkpoint to restore.
      from_detection_checkpoint: whether to restore from a full detection
        checkpoint (with compatible variable names) or to restore from a
        classification checkpoint for initialization prior to training.

    Returns:
      a callable which takes a tf.Session as input and loads a checkpoint when
        run.
    """
    variables_to_restore = {}
    for variable in tf.all_variables():
      if variable.op.name.startswith(self._extract_features_scope):
        var_name = variable.op.name
        if not from_detection_checkpoint:
          var_name = (
              re.split('^' + self._extract_features_scope + '/', var_name)[-1])
        variables_to_restore[var_name] = variable
    # TODO: Load variables selectively using scopes.
    variables_to_restore = (
        variables_helper.get_variables_available_in_checkpoint(
            variables_to_restore, checkpoint_path))
    saver = tf.train.Saver(variables_to_restore)

    def restore(sess):
      saver.restore(sess, checkpoint_path)
    return restore
Beispiel #24
0
    def drawGraph(self, n_row, n_latent, n_col):
        with tf.name_scope('matDecomp'):
            self._p = tf.placeholder(tf.float32, shape=[None, n_col])
            self._c = tf.placeholder(tf.float32, shape=[None, n_col])
            self._lambda = tf.placeholder(tf.float32)
            self._index = tf.placeholder(tf.float32, shape=[None, n_row])
            self._A = tf.Variable(tf.truncated_normal([n_row, n_latent]))
            self._B = tf.Variable(tf.truncated_normal([n_latent, n_col]))
            self._h = tf.matmul(tf.matmul(self._index, self._A), self._B) 
            
            weighted_loss = tf.reduce_mean(tf.mul(self._c, tf.squared_difference(self._p, self._h)))
            self._weighted_loss = weighted_loss
            l2_A = tf.reduce_sum(tf.square(self._A))
            l2_B = tf.reduce_sum(tf.square(self._B))
            n_w = tf.constant(n_row * n_latent + n_latent * n_col, tf.float32)
            l2 = tf.truediv(tf.add(l2_A, l2_B), n_w)
            reg_term = tf.mul(self._lambda, l2)
            self._loss = tf.add(weighted_loss, reg_term)
            
            self._mask = tf.placeholder(tf.float32, shape=[n_row, n_col])
            one = tf.constant(1, tf.float32)
            pred = tf.cast(tf.greater_equal(tf.matmul(self._A, self._B), one), tf.float32)
            cor = tf.mul(tf.cast(tf.equal(pred, self._p), tf.float32), self._c)
            self._vali_err = tf.reduce_sum(tf.mul(cor, self._mask))

            self._saver = tf.train.Saver([v for v in tf.all_variables() if v.name.find('matDecomp') != -1])
            tf.scalar_summary('training_weighted_loss_l2', self._loss)
            tf.scalar_summary('validation_weighted_loss', self._weighted_loss)
            merged = tf.merge_all_summaries()
def predict(args):
    with open(os.path.join(args.save_dir, 'config.pkl')) as f:
        saved_args = cPickle.load(f)
        #saved_args.batch_size = args.batch_size
        saved_args.batch_size = 1
        #saved_args.seq_length = args.seq_length
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl')) as f:
        chars, vocab, idx2classid, classid2idx = cPickle.load(f)
    model = Model(saved_args, infer=False)

    print(args)

    def predict_sentence(sentence):
        return model.predict(sentence, saved_args.seq_length, sess, vocab, idx2classid)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            #print model.predict(sess, chars, vocab, args.n, args.prime)
            sys.stdout.write("> ")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
            while sentence:
                print(predict_sentence(sentence))
                print("> ", end="")
                sys.stdout.flush()
                sentence = sys.stdin.readline()
    def predict(self,x_test):
        x_test = self.normalize_xtest(x_test)

        X = tf.placeholder("float", [None, self.inputNumber])
        Y = tf.placeholder("float", [None, self.outputNumber])

        W1 = self.init_weight([self.inputNumber, self.layerOne], 'W1')
        B1 = self.init_bias([self.layerOne], 'B1')

        W2 = self.init_weight([self.layerOne, self.layerTwo], 'W2')
        B2 = self.init_bias([self.layerTwo], 'B2')

        W3 = self.init_weight([self.layerTwo,self.outputNumber], 'W3')
        B3 = self.init_bias([self.outputNumber], 'B3')
  
        L2 = self.model(X,  W1, B1)
        L3 = self.model(L2, W2, B2)

        y_out = tf.nn.relu(tf.matmul(L3, W3) + B3)
        cost = tf.reduce_mean(tf.square((Y - y_out)))
        train_op = tf.train.AdamOptimizer(self.learningRate).minimize(cost)
        pridict_op = tf.nn.relu(tf.matmul(L3, W3) + B3)

        sess = tf.Session()
        init = tf.initialize_all_variables()
        sess.run(init)

        saver = tf.train.Saver(tf.all_variables())
        saver.restore(sess,self.savePath)

        y_predict = sess.run(pridict_op, feed_dict={X:x_test})
        return self.denormalize_ypredict(y_predict)
Beispiel #27
0
def evaluate (tfrecord_file_paths, theme):
    eval_dir = 'workspace/{}/eval'.format(theme)
    with tf.Graph().as_default() as g:
        images, labels = distorted_inputs(tfrecord_file_paths=tfrecord_file_paths)
        logits = cifar10.inference(tf.image.resize_images(images, cifar10.IMAGE_SIZE, cifar10.IMAGE_SIZE))

        # Calculate predictions.
        top_k_op = tf.nn.in_top_k(logits, labels, 1)

        variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = {}

        for v in tf.all_variables():
            if v in tf.trainable_variables():
                restore_name = variable_averages.average_name(v)
            else:
                restore_name = v.op.name
            variables_to_restore[restore_name] = v

        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(eval_dir, g)

        eval_once(theme, saver, summary_writer, top_k_op, summary_op)
 def testPrepareSessionWithReadyForLocalInitOp(self):
   with tf.Graph().as_default():
     v = tf.Variable(1, name="v")
     w = tf.Variable(
         v,
         trainable=False,
         collections=[tf.GraphKeys.LOCAL_VARIABLES],
         name="w")
     with self.test_session():
       self.assertEqual(False, tf.is_variable_initialized(v).eval())
       self.assertEqual(False, tf.is_variable_initialized(w).eval())
     sm2 = tf.train.SessionManager(
         ready_op=tf.report_uninitialized_variables(),
         ready_for_local_init_op=tf.report_uninitialized_variables(
             tf.all_variables()),
         local_init_op=w.initializer)
     sess = sm2.prepare_session("", init_op=v.initializer)
     self.assertEqual(
         True,
         tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval(
             session=sess))
     self.assertEqual(
         True,
         tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval(
             session=sess))
     self.assertEquals(1, sess.run(v))
     self.assertEquals(1, sess.run(w))
Beispiel #29
0
  def _setup_np_inference(self, np_images, checkpoint_path):
    """Sets up and restores inference graph, creates and caches a Session."""
    tf.logging.info('Restoring model weights.')

    # Define inference over an image placeholder.
    _, height, width, _ = np.shape(np_images)
    image_placeholder = tf.placeholder(
        tf.float32, shape=(None, height, width, 3))

    # Preprocess batch.
    preprocessed = self.preprocess_data(image_placeholder, is_training=False)

    # Unscale and jpeg encode preprocessed images for display purposes.
    im_strings = preprocessing.unscale_jpeg_encode(preprocessed)

    # Do forward pass to get embeddings.
    embeddings = self.forward(preprocessed, is_training=False)

    # Create a saver to restore model variables.
    tf.train.get_or_create_global_step()
    saver = tf.train.Saver(tf.all_variables())

    self._image_placeholder = image_placeholder
    self._batch_encoded = embeddings

    self._np_inf_tensor_dict = {
        'embeddings': embeddings,
        'raw_image_strings': im_strings,
    }

    # Create a session and restore model variables.
    self._sess = tf.Session()
    saver.restore(self._sess, checkpoint_path)
  def testWaitForSessionLocalInit(self):
    server = tf.train.Server.create_local_server()
    with tf.Graph().as_default() as graph:
      v = tf.Variable(1, name="v")
      w = tf.Variable(
          v,
          trainable=False,
          collections=[tf.GraphKeys.LOCAL_VARIABLES],
          name="w")
      sm = tf.train.SessionManager(
          graph=graph,
          ready_op=tf.report_uninitialized_variables(),
          ready_for_local_init_op=tf.report_uninitialized_variables(
              tf.all_variables()),
          local_init_op=w.initializer)

      # Initialize v but not w
      s = tf.Session(server.target, graph=graph)
      s.run(v.initializer)

      sess = sm.wait_for_session(server.target, max_wait_secs=3)
      self.assertEqual(
          True,
          tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval(
              session=sess))
      self.assertEqual(
          True,
          tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval(
              session=sess))
      self.assertEquals(1, sess.run(v))
      self.assertEquals(1, sess.run(w))
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default(), tf.device('/cpu:0'):
    # Create a variable to count the number of train() calls. This equals the
    # number of batches processed * FLAGS.num_gpus.
    global_step = tf.get_variable(
        'global_step', [],
        initializer=tf.constant_initializer(0), trainable=False)
    # Calculate the learning rate schedule.
    num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
                             FLAGS.batch_size)
    decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY)
    # Decay the learning rate exponentially based on the number of steps.
    lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE,
                                    global_step,
                                    decay_steps,
                                    cifar10.LEARNING_RATE_DECAY_FACTOR,
                                    staircase=True)
    # Create an optimizer that performs gradient descent.
    opt = tf.train.GradientDescentOptimizer(lr)
    # Calculate the gradients for each model tower.
    tower_grads = []
    for i in xrange(FLAGS.num_gpus):
      with tf.device('/gpu:%d' % i):
        with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
          # Calculate the loss for one tower of the CIFAR model. This function
          # constructs the entire CIFAR model but shares the variables across
          # all towers.
          loss = tower_loss(scope)
          # Reuse variables for the next tower.
          tf.get_variable_scope().reuse_variables()
          # Retain the summaries from the final tower.
          summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
          # Calculate the gradients for the batch of data on this CIFAR tower.
          grads = opt.compute_gradients(loss)
          # Keep track of the gradients across all towers.
          tower_grads.append(grads)
    # We must calculate the mean of each gradient. Note that this is the
    # synchronization point across all towers.
    grads = average_gradients(tower_grads)
    # Add a summary to track the learning rate.
    summaries.append(tf.scalar_summary('learning_rate', lr))
    # Add histograms for gradients.
    for grad, var in grads:
      if grad:
        summaries.append(
            tf.histogram_summary(var.op.name + '/gradients', grad))
    # Apply the gradients to adjust the shared variables.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
      summaries.append(tf.histogram_summary(var.op.name, var))
    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # Group all updates to into a single train op.
    train_op = tf.group(apply_gradient_op, variables_averages_op)
    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())
    # Build the summary operation from the last tower summaries.
    summary_op = tf.merge_summary(summaries)
    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()
    # Start running operations on the Graph. allow_soft_placement must be set to
    # True to build towers on GPU, as some of the ops do not have GPU
    # implementations.
    sess = tf.Session(config=tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)
    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)
    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                            graph_def=sess.graph_def)
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time
      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = duration / FLAGS.num_gpus
        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))
      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)
      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
Beispiel #32
0
def main(_):
    parser = argparse.ArgumentParser(description='TransE.')
    parser.add_argument('--data',
                        dest='data_dir',
                        type=str,
                        help="Data folder",
                        default='./data/FB15k/')
    parser.add_argument('--lr',
                        dest='lr',
                        type=float,
                        help="Learning rate",
                        default=1e-2)
    parser.add_argument("--dim",
                        dest='dim',
                        type=int,
                        help="Embedding dimension",
                        default=256)
    parser.add_argument("--batch",
                        dest='batch',
                        type=int,
                        help="Batch size",
                        default=32)
    parser.add_argument("--worker",
                        dest='n_worker',
                        type=int,
                        help="Evaluation worker",
                        default=3)
    parser.add_argument("--generator",
                        dest='n_generator',
                        type=int,
                        help="Data generator",
                        default=10)
    parser.add_argument("--eval_batch",
                        dest="eval_batch",
                        type=int,
                        help="Evaluation batch size",
                        default=32)
    parser.add_argument("--save_dir",
                        dest='save_dir',
                        type=str,
                        help="Model path",
                        default='./transE')
    parser.add_argument("--load_model",
                        dest='load_model',
                        type=str,
                        help="Model file",
                        default="")
    parser.add_argument("--save_per",
                        dest='save_per',
                        type=int,
                        help="Save per x iteration",
                        default=1)
    parser.add_argument("--eval_per",
                        dest='eval_per',
                        type=int,
                        help="Evaluate every x iteration",
                        default=5)
    parser.add_argument("--max_iter",
                        dest='max_iter',
                        type=int,
                        help="Max iteration",
                        default=30)
    parser.add_argument("--summary_dir",
                        dest='summary_dir',
                        type=str,
                        help="summary directory",
                        default='./transE_summary/')
    parser.add_argument("--keep",
                        dest='drop_out',
                        type=float,
                        help="Keep prob (1.0 keep all, 0. drop all)",
                        default=0.5)
    parser.add_argument("--optimizer",
                        dest='optimizer',
                        type=str,
                        help="Optimizer",
                        default='gradient')
    parser.add_argument("--prefix",
                        dest='prefix',
                        type=str,
                        help="model_prefix",
                        default='DEFAULT')
    parser.add_argument("--loss_weight",
                        dest='loss_weight',
                        type=float,
                        help="Weight on parameter loss",
                        default=1e-2)
    parser.add_argument("--neg_weight",
                        dest='neg_weight',
                        type=float,
                        help="Sampling weight on negative examples",
                        default=0.5)
    parser.add_argument("--save_per_batch",
                        dest='save_per_batch',
                        type=int,
                        help='evaluate and save after every x batches',
                        default=1000)
    parser.add_argument(
        "--outfile_prefix",
        dest='outfile_prefix',
        type=str,
        help='The filename of output file is outfile_prefix.txt',
        default='test_output')
    parser.add_argument("--neg_sample",
                        dest='neg_sample',
                        type=int,
                        help='No. of neg. samples per (h,r) or (t,r) pair',
                        default=5)
    parser.add_argument(
        "--fanout_thresh",
        dest='fanout_thresh',
        type=int,
        help='threshold on fanout of entities to be considered',
        default=2)
    parser.add_argument('--annoy_n_trees',
                        dest='annoy_n_trees',
                        type=int,
                        help='builds a forest of n_trees trees',
                        default=10)
    parser.add_argument(
        '--annoy_search_k',
        dest='annoy_search_k',
        type=int,
        help='During the query it will inspect up to search_k nodes',
        default=-1)
    parser.add_argument('--eval_after',
                        dest='eval_after',
                        type=int,
                        help='Evaluate after this many no. of epochs',
                        default=4)

    args = parser.parse_args()

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    print(args)

    model = TransE(args.data_dir,
                   embed_dim=args.dim,
                   fanout_thresh=args.fanout_thresh,
                   eval_batch=args.eval_batch)

    train_pos_neg_list, \
    train_loss, train_op = train_ops(model, learning_rate=args.lr,
                                     optimizer_str=args.optimizer,
                                     regularizer_weight=args.loss_weight)

    get_embedding_op = embedding_ops(model)

    # test_input, test_head, test_tail = test_ops(model)
    f1 = open('%s/%s.txt' % (args.save_dir, args.outfile_prefix), 'w')

    with tf.Session() as session:
        tf.global_variables_initializer().run()

        all_var = tf.all_variables()
        print 'printing all', len(all_var), ' TF variables:'
        for var in all_var:
            print var.name, var.get_shape()

        saver = tf.train.Saver(restore_sequentially=True)

        iter_offset = 0

        if args.load_model is not None and os.path.exists(args.load_model):
            saver.restore(session, args.load_model)
            iter_offset = int(
                args.load_model.split('.')[-2].split('_')[-1]) + 1
            f1.write("Load model from %s, iteration %d restored.\n" %
                     (args.load_model, iter_offset))

        total_inst = model.n_train
        best_filtered_mean_rank = float("inf")

        f1.write("preparing training data...\n")
        nbatches_count = 0
        # training_data_list = []
        training_data_pos_neg_list = []

        for dat in model.raw_training_data(batch_size=args.batch):
            # raw_training_data_queue.put(dat)
            # training_data_list.append(dat)
            ps_list = data_generator_func(dat, model.tr_h, model.hr_t,
                                          model.n_entity, args.neg_sample,
                                          model.n_relation)
            assert ps_list is not None
            training_data_pos_neg_list.append(ps_list)
            nbatches_count += 1
        f1.write("training data prepared.\n")
        f1.write("No. of batches : %d\n" % nbatches_count)
        f1.close()

        start_time = timeit.default_timer()

        for n_iter in range(iter_offset, args.max_iter):
            accu_loss = 0.
            ninst = 0
            # f1.close()

            for batch_id in range(nbatches_count):
                f1 = open('%s/%s.txt' % (args.save_dir, args.outfile_prefix),
                          'a')

                pos_neg_list = training_data_pos_neg_list[batch_id]
                #print data_e
                l, _ = session.run([train_loss, train_op],
                                   {train_pos_neg_list: pos_neg_list})

                accu_loss += l
                ninst += len(pos_neg_list)

                # print('len(pos_neg_list) = %d\n' % len(pos_neg_list))

                if ninst % (5000) is not None:
                    f1.write('[%d sec](%d/%d) : %.2f -- loss : %.5f \n' %
                             (timeit.default_timer() - start_time, ninst,
                              total_inst, float(ninst) / total_inst, l))
                f1.close()

            f1 = open('%s/%s.txt' % (args.save_dir, args.outfile_prefix), 'a')
            f1.write("")
            f1.write("iter %d avg loss %.5f, time %.3f\n" %
                     (n_iter, accu_loss / ninst,
                      timeit.default_timer() - start_time))

            save_path = saver.save(
                session,
                os.path.join(
                    args.save_dir, "TransE_" + str(args.prefix) + "_" +
                    str(n_iter) + ".ckpt"))
            f1.write("Model saved at %s\n" % save_path)

            with tf.device('/cpu'):
                if n_iter > args.eval_after and (n_iter % args.eval_per == 0 or
                                                 n_iter == args.max_iter - 1):

                    t = AnnoyIndex(model.embed_dim, metric='euclidean')

                    ent_embedding, rel_embedding = session.run(
                        get_embedding_op, {train_pos_neg_list: pos_neg_list})
                    # sess = tf.InteractiveSession()
                    # with sess.as_default():
                    #     ent_embedding = model.ent_embeddings.eval()
                    print np.asarray(ent_embedding).shape
                    print np.asarray(rel_embedding).shape

                    # print ent_embedding[10,:]
                    # print rel_embedding[10,:]
                    print 'Index creation started'

                    for i in xrange(model.n_entity):
                        v = ent_embedding[i, :]
                        t.add_item(i, v)
                    t.build(args.annoy_n_trees)

                    print 'Index creation completed'

                    # n = int(0.0005 * model.n_entity)
                    n = 1000
                    # search_k = int(n * args.annoy_n_trees/100.0)
                    search_k = 1000

                    print 'No. of items = %d' % t.get_n_items()
                    print sum(t.get_item_vector(0))
                    print sum(ent_embedding[0, :])
                    assert sum(t.get_item_vector(0)) == sum(
                        ent_embedding[0, :])

                    eval_dict = zip([model.raw_training_data], ['TRAIN'])

                    for data_func, test_type in eval_dict:
                        accu_mean_rank_h = list()
                        accu_mean_rank_t = list()
                        accu_filtered_mean_rank_h = list()
                        accu_filtered_mean_rank_t = list()

                        evaluation_count = 0
                        evaluation_batch = []
                        batch_id = 0
                        for testing_data in data_func(
                                batch_size=args.eval_batch):
                            batch_id += 1
                            print 'test_type: %s, batch id: %d' % (test_type,
                                                                   batch_id)
                            head_ids = list()
                            tail_ids = list()

                            for i in xrange(testing_data.shape[0]):
                                # try:
                                # print (ent_embedding[testing_data[i,0],:] + rel_embedding[testing_data[i,2],:])
                                tail_ids.append(
                                    t.get_nns_by_vector(
                                        (ent_embedding[testing_data[i, 0], :] +
                                         rel_embedding[testing_data[i, 2], :]),
                                        n, search_k))
                                head_ids.append(
                                    t.get_nns_by_vector(
                                        (ent_embedding[testing_data[i, 1], :] -
                                         rel_embedding[testing_data[i, 2], :]),
                                        n, search_k))
                                # except:
                                #     print 'i = %d' % i
                                #     print 'testing_data[i,0] = %d' % testing_data[i,0]
                                #     print 'testing_data[i,1] = %d' % testing_data[i,1]
                                #     print 'testing_data[i,2] = %d' % testing_data[i,2]

                            # print head_ids
                            # print tail_ids
                            evaluation_batch.append(
                                (testing_data, head_ids, tail_ids))
                            evaluation_count += 1
                            if batch_id > 52662:
                                break

                        while evaluation_count > 0:
                            evaluation_count -= 1

                            # (mrh, fmrh), (mrt, fmrt) = result_queue.get()
                            (mrh, fmrh), (mrt, fmrt) = worker_func(
                                evaluation_batch[evaluation_count - 1],
                                model.hr_t, model.tr_h)
                            accu_mean_rank_h += mrh
                            accu_mean_rank_t += mrt
                            accu_filtered_mean_rank_h += fmrh
                            accu_filtered_mean_rank_t += fmrt

                        f1.write(
                            "[%s] ITER %d [HEAD PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f\n"
                            % (test_type, n_iter, np.mean(accu_mean_rank_h),
                               np.mean(accu_filtered_mean_rank_h),
                               np.mean(
                                   np.asarray(accu_mean_rank_h, dtype=np.int32)
                                   < 10),
                               np.mean(
                                   np.asarray(accu_filtered_mean_rank_h,
                                              dtype=np.int32) < 10)))

                        f1.write(
                            "[%s] ITER %d [TAIL PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f\n"
                            % (test_type, n_iter, np.mean(accu_mean_rank_t),
                               np.mean(accu_filtered_mean_rank_t),
                               np.mean(
                                   np.asarray(accu_mean_rank_t, dtype=np.int32)
                                   < 10),
                               np.mean(
                                   np.asarray(accu_filtered_mean_rank_t,
                                              dtype=np.int32) < 10)))

            f1.close()
Beispiel #33
0
    def __init__(self,
                 phase,
                 visualize,
                 output_dir,
                 batch_size,
                 initial_learning_rate,
                 steps_per_checkpoint,
                 model_dir,
                 target_embedding_size,
                 attn_num_hidden,
                 attn_num_layers,
                 clip_gradients,
                 max_gradient_norm,
                 session,
                 load_model,
                 gpu_id,
                 use_gru,
                 use_distance=True,
                 max_image_width=160,
                 max_image_height=60,
                 max_prediction_length=50,
                 channels=1,
                 reg_val=0):

        self.use_distance = use_distance

        # We need resized width, not the actual width
        max_resized_width = 1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT

        self.max_original_width = max_image_width
        self.max_width = int(math.ceil(max_resized_width))
        self.max_label_length = max_prediction_length
        self.encoder_size = int(math.ceil(1. * self.max_width / 4))
        self.decoder_size = max_prediction_length + 2
        self.buckets = [(self.encoder_size, self.decoder_size)]

        if gpu_id >= 0:
            device_id = '/gpu:' + str(gpu_id)
        else:
            device_id = '/cpu:0'
        self.device_id = device_id

        if not os.path.exists(model_dir):
            os.makedirs(model_dir)

        if phase == 'test':
            batch_size = 1

        logging.info('phase: %s', phase)
        logging.info('model_dir: %s', model_dir)
        logging.info('load_model: %s', load_model)
        logging.info('output_dir: %s', output_dir)
        logging.info('steps_per_checkpoint: %d', steps_per_checkpoint)
        logging.info('batch_size: %d', batch_size)
        logging.info('learning_rate: %f', initial_learning_rate)
        logging.info('reg_val: %d', reg_val)
        logging.info('max_gradient_norm: %f', max_gradient_norm)
        logging.info('clip_gradients: %s', clip_gradients)
        logging.info('max_image_width %f', max_image_width)
        logging.info('max_prediction_length %f', max_prediction_length)
        logging.info('channels: %d', channels)
        logging.info('target_embedding_size: %f', target_embedding_size)
        logging.info('attn_num_hidden: %d', attn_num_hidden)
        logging.info('attn_num_layers: %d', attn_num_layers)
        logging.info('visualize: %s', visualize)

        if use_gru:
            logging.info('using GRU in the decoder.')

        self.reg_val = reg_val
        self.sess = session
        self.steps_per_checkpoint = steps_per_checkpoint
        self.model_dir = model_dir
        self.output_dir = output_dir
        self.batch_size = batch_size
        self.max_label_lengthc = int(self.max_label_length / 4)
        self.global_step = tf.Variable(0, trainable=False)
        self.phase = phase
        self.visualize = visualize
        self.learning_rate = initial_learning_rate
        self.clip_gradients = clip_gradients
        self.channels = channels

        if phase == 'train':
            self.forward_only = False
        else:
            self.forward_only = True

        with tf.device(device_id):

            self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32)
            self.height_float = tf.constant(DataGen.IMAGE_HEIGHT,
                                            dtype=tf.float64)

            self.img_pl = tf.placeholder(tf.string,
                                         name='input_image_as_bytes')
            self.labels = tf.placeholder(tf.int32,
                                         shape=(self.batch_size,
                                                self.max_label_lengthc),
                                         name="input_labels_as_bytes")
            #self.label_data = tf.placeholder(tf.string, shape=[None,self.max_label_length], name="input_labels_as_bs")
            self.img_data = tf.cond(tf.less(tf.rank(self.img_pl), 1),
                                    lambda: tf.expand_dims(self.img_pl, 0),
                                    lambda: self.img_pl)
            self.img_data = tf.map_fn(self._prepare_image,
                                      self.img_data,
                                      dtype=tf.float32)
            num_images = tf.shape(self.img_data)[0]

            # TODO: create a mask depending on the image/batch size
            self.encoder_masks = []
            for i in xrange(self.encoder_size + 1):
                self.encoder_masks.append(tf.tile([[1.]], [num_images, 1]))

            self.decoder_inputs = []
            self.target_weights = []
            for i in xrange(self.decoder_size + 1):
                self.decoder_inputs.append(tf.tile([1], [num_images]))
                if i < self.decoder_size:
                    self.target_weights.append(tf.tile([1.], [num_images]))
                else:
                    self.target_weights.append(tf.tile([0.], [num_images]))

            cnn_model = CNN(self.img_data, not self.forward_only)
            self.conv_output = cnn_model.tf_output()
            self.perm_conv_output = tf.transpose(self.conv_output,
                                                 perm=[1, 0, 2])
            self.attention_decoder_model = Seq2SeqModel(
                encoder_masks=self.encoder_masks,
                encoder_inputs_tensor=self.perm_conv_output,
                labels=self.labels,
                decoder_inputs=self.decoder_inputs,
                target_weights=self.target_weights,
                batch_size=self.batch_size,
                target_vocab_size=len(DataGen.CHARMAP),
                buckets=self.buckets,
                target_embedding_size=target_embedding_size,
                attn_num_layers=attn_num_layers,
                attn_num_hidden=attn_num_hidden,
                forward_only=self.forward_only,
                use_gru=use_gru)

            table = tf.contrib.lookup.MutableHashTable(
                key_dtype=tf.int64,
                value_dtype=tf.string,
                default_value="",
                checkpoint=True,
            )

            insert = table.insert(
                tf.constant(list(range(len(DataGen.CHARMAP))), dtype=tf.int64),
                tf.constant(DataGen.CHARMAP),
            )

            with tf.control_dependencies([insert]):
                num_feed = []
                prb_feed = []

                for line in xrange(len(self.attention_decoder_model.output)):
                    guess = tf.argmax(
                        self.attention_decoder_model.output[line], axis=1)
                    proba = tf.reduce_max(tf.nn.softmax(
                        self.attention_decoder_model.output[line]),
                                          axis=1)
                    num_feed.append(guess)
                    prb_feed.append(proba)

                # Join the predictions into a single output string.
                trans_output = tf.transpose(num_feed)
                trans_output = tf.map_fn(
                    lambda m: tf.foldr(
                        lambda a, x: tf.cond(
                            tf.equal(x, DataGen.EOS_ID),
                            lambda: '',
                            lambda: table.lookup(x) + a  # pylint: disable=undefined-variable
                        ),
                        m,
                        initializer=''),
                    trans_output,
                    dtype=tf.string)

                # Calculate the total probability of the output string.
                trans_outprb = tf.transpose(prb_feed)
                trans_outprb = tf.gather(trans_outprb,
                                         tf.range(tf.size(trans_output)))
                trans_outprb = tf.map_fn(lambda m: tf.foldr(
                    lambda a, x: tf.multiply(tf.cast(x, tf.float64), a),
                    m,
                    initializer=tf.cast(1, tf.float64)),
                                         trans_outprb,
                                         dtype=tf.float64)

                self.prediction = tf.cond(
                    tf.equal(tf.shape(trans_output)[0], 1),
                    lambda: trans_output[0],
                    lambda: trans_output,
                )
                self.probability = tf.cond(
                    tf.equal(tf.shape(trans_outprb)[0], 1),
                    lambda: trans_outprb[0],
                    lambda: trans_outprb,
                )

                self.prediction = tf.identity(self.prediction,
                                              name='prediction')
                self.probability = tf.identity(self.probability,
                                               name='probability')

            if not self.forward_only:  # train
                self.updates = []
                self.summaries_by_bucket = []

                params = tf.trainable_variables()
                opt = tf.train.AdadeltaOptimizer(
                    learning_rate=initial_learning_rate)
                loss_op = self.attention_decoder_model.loss

                if self.reg_val > 0:
                    reg_losses = tf.get_collection(
                        tf.GraphKeys.REGULARIZATION_LOSSES)
                    logging.info('Adding %s regularization losses',
                                 len(reg_losses))
                    logging.debug('REGULARIZATION_LOSSES: %s', reg_losses)
                    loss_op = self.reg_val * tf.reduce_sum(
                        reg_losses) + loss_op

                gradients, params = list(
                    zip(*opt.compute_gradients(loss_op, params)))
                if self.clip_gradients:
                    gradients, _ = tf.clip_by_global_norm(
                        gradients, max_gradient_norm)

                # Summaries for loss, variables, gradients, gradient norms and total gradient norm.
                summaries = [
                    tf.summary.scalar("loss", loss_op),
                    tf.summary.scalar("total_gradient_norm",
                                      tf.global_norm(gradients))
                ]
                all_summaries = tf.summary.merge(summaries)
                self.summaries_by_bucket.append(all_summaries)

                # update op - apply gradients
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    self.updates.append(
                        opt.apply_gradients(list(zip(gradients, params)),
                                            global_step=self.global_step))

        self.saver_all = tf.train.Saver(tf.all_variables())
        self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt")

        ckpt = tf.train.get_checkpoint_state(model_dir)
        if ckpt and load_model:
            # pylint: disable=no-member
            logging.info("Reading model parameters from %s",
                         ckpt.model_checkpoint_path)
            self.saver_all.restore(self.sess, ckpt.model_checkpoint_path)
        else:
            logging.info("Created model with fresh parameters.")
            self.sess.run(tf.initialize_all_variables())
Beispiel #34
0
    def __init__(self, g_net, d_net, z_sampler, data, model,
                 sigma, digit, reg, lr, cilpc, batch_size,
                 num_batches, plot_size, save_size, d_iters,
                 data_name, data_path, path_output):  # changed
        self.model = model
        self.data = data
        self.g_net = g_net
        self.d_net = d_net
        self.z_sampler = z_sampler
        self.x_dim = self.d_net.x_dim
        self.z_dim = self.g_net.z_dim
        self.sigma = sigma
        self.digit = digit
        self.regc = reg
        self.lr = lr
        self.cilpc = cilpc
        self.batch_size = batch_size
        self.num_batches = num_batches
        self.plot_size = plot_size
        self.save_size = save_size
        self.d_iters = d_iters
        self.data_name = data_name
        self.data_path = data_path
        self.path_output = path_output

        (self.data_td, self.label_td), (_, _) = mnist_db.load_data()
        self.data_td = np.reshape(self.data_td, (-1, 784))

        self.data_td = normlization(self.data_td)

        self.x = tf.placeholder(tf.float32, [None, self.x_dim], name='x')  # [None, 784]
        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        self.x_ = self.g_net(self.z)

        self.d = self.d_net(self.x, reuse=False)
        self.d_ = self.d_net(self.x_)

        self.g_loss = tf.reduce_mean(self.d_)
        self.d_loss = tf.reduce_mean(self.d) - tf.reduce_mean(self.d_)

        self.reg = tc.layers.apply_regularization(
            tc.layers.l1_regularizer(self.regc),
            weights_list=[var for var in tf.all_variables() if 'weights' in var.name]
        )
        self.g_loss_reg = self.g_loss + self.reg
        self.d_loss_reg = self.d_loss + self.reg

        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            self.d_rmsprop = tf.train.RMSPropOptimizer(learning_rate=self.lr)  # DP case
            grads_and_vars = self.d_rmsprop.compute_gradients(-1 * self.d_loss_reg, var_list=self.d_net.vars)
            dp_grads_and_vars = []  # noisy version
            for gv in grads_and_vars:  # for each pair
                g = gv[
                    0]  # get the gradient, type in loop one: Tensor("gradients/AddN_37:0", shape=(4, 4, 1, 64), dtype=float32)
                # print g # shape of all vars
                if g is not None:  # skip None case
                    g = self.dpnoise(g, self.batch_size)
                dp_grads_and_vars.append((g, gv[1]))
            self.d_rmsprop_new = self.d_rmsprop.apply_gradients(dp_grads_and_vars)  # should assign to a new optimizer
            # self.d_rmsprop = tf.train.RMSPropOptimizer(learning_rate=self.lr) \
            #     .minimize(-1*self.d_loss_reg, var_list=self.d_net.vars) # non-DP case
            self.g_rmsprop = tf.train.RMSPropOptimizer(learning_rate=self.lr) \
                .minimize(-1 * self.g_loss_reg, var_list=self.g_net.vars)

        self.d_clip = [v.assign(tf.clip_by_value(v, -1 * self.cilpc, self.cilpc)) for v in self.d_net.vars]
        self.d_net_var_grad = [i for i in tf.gradients(self.d_loss_reg, self.d_net.vars) if
                               i is not None]  # explore the effect of noise on norm of D net variables's gradient vector, also remove None type
        self.norm_d_net_var_grad = []
        gpu_options = tf.GPUOptions(allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        self.g_loss_store = []  # store loss of generator
        self.d_loss_store = []  # store loss of discriminator
        self.wdis_store = []  # store Wasserstein distance, new added
Beispiel #35
0
B=100
C=3
batch_size=128
data_size=2000

x_dim=(40,40)
u_dim=2

k=.1
A=int(k*data_size) # number of samples we gather on each cycle

policy_eval=PlanePolicy(1, x_dim, u_dim, "epolicy")
policy_batch=PlanePolicy(batch_size, x_dim, u_dim, "epolicy", share=True)

e2c = E2CPlaneModel(policy_batch.u, batch_size)
for v in tf.all_variables():
  print("%s : %s" % (v.name, v.get_shape()))
e2c.buildLoss(lambd=.25)

policy_batch.set_reward(e2c.loss) # drive towards area where prediction is weak
sess=tf.InteractiveSession()

# save both policy and E2C variables
saver = tf.train.Saver(max_to_keep=num_episodes)

def getXs(D,idx):
  p0s = D[idx,0:2].reshape((batch_size,-1))
  u0v = D[idx,2:4]
  p1s = D[idx,4:6].reshape((batch_size,-1))

  x0v = np.zeros((batch_size,1600))
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=len(onehot_label),
                      vocab_size=len(vocabulary),
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=list(map(int,
                                            FLAGS.filter_sizes.split(","))),
                      num_filters=list(map(int, FLAGS.num_filters.split(","))),
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = 0
        train_op = tf.train.AdamOptimizer(0.001).minimize(cnn.loss)

        saver = tf.train.Saver(tf.all_variables())

        # Initialize all variables
        sess.run(tf.initialize_all_variables())

        if FLAGS.use_word2vec:
            # initial matrix with random uniform
            if os.path.isfile(FLAGS.w2v_data_path):
                initE = np.load(FLAGS.w2v_data_path)
            else:
                initE = np.random.uniform(
                    -1.0, 1.0, (len(vocabulary), FLAGS.embedding_dim))

                # load any vectors from the word2vec
                print("Loading word2vec file {}\n".format(FLAGS.word2vec))
                word_vectors = KeyedVectors.load_word2vec_format(
    def __init__(self, source_vocab_size, target_vocab_size, en_de_seq_len, hidden_size, num_layers,
                 batch_size, learning_rate, num_samples=1024,
                 forward_only=False, beam_search=True, beam_size=10):
        '''
        初始化并创建模型
        :param source_vocab_size:encoder输入的vocab size
        :param target_vocab_size: decoder输入的vocab size,这里跟上面一样
        :param en_de_seq_len: 源和目的序列最大长度
        :param hidden_size: RNN模型的隐藏层单元个数
        :param num_layers: RNN堆叠的层数
        :param batch_size: batch大小
        :param learning_rate: 学习率
        :param num_samples: 计算loss时做sampled softmax时的采样数
        :param forward_only: 预测时指定为真
        :param beam_search: 预测时是采用greedy search还是beam search
        :param beam_size: beam search的大小
        '''
        self.source_vocab_size = source_vocab_size
        self.target_vocab_size = target_vocab_size
        self.en_de_seq_len = en_de_seq_len
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.num_samples = num_samples
        self.forward_only = forward_only
        self.beam_search = beam_search
        self.beam_size = beam_size
        self.global_step = tf.Variable(0, trainable=False)

        output_projection = None
        softmax_loss_function = None
        # 定义采样loss函数,传入后面的sequence_loss_by_example函数
        if num_samples > 0 and num_samples < self.target_vocab_size:
            w = tf.get_variable('proj_w', [hidden_size, self.target_vocab_size])
            w_t = tf.transpose(w)
            b = tf.get_variable('proj_b', [self.target_vocab_size])
            output_projection = (w, b)
            #调用sampled_softmax_loss函数计算sample loss,这样可以节省计算时间
            def sample_loss(logits, labels):
                labels = tf.reshape(labels, [-1, 1])
                return tf.nn.sampled_softmax_loss(w_t, b, labels=labels, inputs=logits, num_sampled=num_samples, num_classes=self.target_vocab_size)
            softmax_loss_function = sample_loss

        self.keep_drop = tf.placeholder(tf.float32)
        # 定义encoder和decoder阶段的多层dropout RNNCell
        def create_rnn_cell():
            encoDecoCell = tf.contrib.rnn.BasicLSTMCell(hidden_size)
            encoDecoCell = tf.contrib.rnn.DropoutWrapper(encoDecoCell, input_keep_prob=1.0, output_keep_prob=self.keep_drop)
            return encoDecoCell
        encoCell = tf.contrib.rnn.MultiRNNCell([create_rnn_cell() for _ in range(num_layers)])

        # 定义输入的placeholder,采用了列表的形式
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.decoder_targets = []
        self.target_weights = []
        for i in range(en_de_seq_len[0]):
            self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None, ], name="encoder{0}".format(i)))
        for i in range(en_de_seq_len[1]):
            self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None, ], name="decoder{0}".format(i)))
            self.decoder_targets.append(tf.placeholder(tf.int32, shape=[None, ], name="target{0}".format(i)))
            self.target_weights.append(tf.placeholder(tf.float32, shape=[None, ], name="weight{0}".format(i)))

        # test模式,将上一时刻输出当做下一时刻输入传入
        if forward_only:
            if beam_search:#如果是beam_search的话,则调用自己写的embedding_attention_seq2seq函数,而不是legacy_seq2seq下面的
                self.beam_outputs, _, self.beam_path, self.beam_symbol = embedding_attention_seq2seq(
                    self.encoder_inputs, self.decoder_inputs, encoCell, num_encoder_symbols=source_vocab_size,
                    num_decoder_symbols=target_vocab_size, embedding_size=hidden_size,
                    output_projection=output_projection, feed_previous=True)
            else:
                decoder_outputs, _ = tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
                    self.encoder_inputs, self.decoder_inputs, encoCell, num_encoder_symbols=source_vocab_size,
                    num_decoder_symbols=target_vocab_size, embedding_size=hidden_size,
                    output_projection=output_projection, feed_previous=True)
                # 因为seq2seq模型中未指定output_projection,所以需要在输出之后自己进行output_projection
                if output_projection is not None:
                    self.outputs = tf.matmul(decoder_outputs, output_projection[0]) + output_projection[1]
        else:
            # 因为不需要将output作为下一时刻的输入,所以不用output_projection
            decoder_outputs, _ = tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
                self.encoder_inputs, self.decoder_inputs, encoCell, num_encoder_symbols=source_vocab_size,
                num_decoder_symbols=target_vocab_size, embedding_size=hidden_size, output_projection=output_projection,
                feed_previous=False)
            self.loss = tf.contrib.legacy_seq2seq.sequence_loss(
                decoder_outputs, self.decoder_targets, self.target_weights, softmax_loss_function=softmax_loss_function)

            # Initialize the optimizer
            opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08)
            self.optOp = opt.minimize(self.loss)

        self.saver = tf.train.Saver(tf.all_variables())
def initialize():
    new_variables = set(tf.all_variables()) - ALREADY_INITIALIZED
    get_session().run(tf.initialize_variables(new_variables))
    ALREADY_INITIALIZED.update(new_variables)
Beispiel #39
0
def main():
    s = {
        'nh1': 450,
        'nh2': 450,
        'win': 3,
        'emb_dimension': 300,
        'lr': 0.0001,
        'lr_decay': 0.5,
        'max_grad_norm': 5,
        'seed': 345,
        'nepochs': 50,  # 总共迭代50个epoch
        'batch_size': 16,  # batch_size=16
        'keep_prob': 1.0,
        'check_dir':
        './checkpoints/kp20k_mycps_multisize_CNN_LSTM_attention_Adam_0.0001_16_again',
        'display_test_per': 1,
        'lr_decay_per': 5
    }

    # load the dataset
    # data_set_file = 'CNTN/data/inspec_wo_stem/data_set.pkl'
    # emb_file = 'CNTN/data/inspec_wo_stem/embedding.pkl'
    data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl'
    emb_file = 'data/ACL2017/ACL2017_t_a_embedding.pkl'
    # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file)
    print('loading dataset.....')
    train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017(
        data_set_file, emb_file)
    test_lex, test_y, test_z = test_set

    y_nclasses = 2
    z_nclasses = 5

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
        allow_soft_placement=True)  ###########################################
    with tf.Session(config=config) as sess:
        my_model = mymodel.myModel(nh1=s['nh1'],
                                   nh2=s['nh2'],
                                   ny=y_nclasses,
                                   nz=z_nclasses,
                                   de=s['emb_dimension'],
                                   lr=s['lr'],
                                   lr_decay=s['lr_decay'],
                                   embedding=embedding,
                                   max_gradient_norm=s['max_grad_norm'],
                                   batch_size=s['batch_size'],
                                   rnn_model_cell='lstm')

        checkpoint_dir = s['check_dir']
        logfile = open(str(s['check_dir']) + '/predict_log.txt',
                       'a',
                       encoding='utf-8')
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # print(ckpt.all_model_checkpoint_paths[4])
            print(ckpt.model_checkpoint_path)
            logfile.write(str(ckpt.model_checkpoint_path) + '\n')
            saver.restore(sess, ckpt.model_checkpoint_path)

        def dev_step(cwords):
            feed = {
                my_model.cnn_input_x: cwords,
                my_model.keep_prob: s['keep_prob']
            }
            fetches = my_model.sz_pred
            sz_pred = sess.run(fetches=fetches, feed_dict=feed)
            return sz_pred

        predictions_test = []
        groundtruth_test = []
        start_num = 0
        steps = len(test_lex) // s['batch_size']
        # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']):
        print('testing............')
        for step in range(steps):
            # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size'])
            x, z = test_batch_putin(test_lex,
                                    test_z,
                                    start_num=start_num,
                                    batch_size=s['batch_size'])
            # x, z = batch
            x = load.pad_sentences(x)
            predictions_test.extend(dev_step(x))
            groundtruth_test.extend(z)
            start_num += s['batch_size']
            if step % 100 == 0:
                print('tested %d batch......' % (step // 100))

        print("测试结果:")
        logfile.write("测试结果:\n")
        res_test = tools.conlleval(predictions_test, groundtruth_test)
        print('all: ', res_test)
        logfile.write('all: ' + str(res_test) + '\n')
        res_test_top5 = tools.conlleval_top(predictions_test, groundtruth_test,
                                            5)
        print('top5: ', res_test_top5)
        logfile.write('top5: ' + str(res_test_top5) + '\n')
        res_test_top10 = tools.conlleval_top(predictions_test,
                                             groundtruth_test, 10)
        print('top10: ', res_test_top10)
        logfile.write('top10: ' + str(res_test_top10) + '\n')
    logfile.close()
def gen_head_poetry(heads, type):
    if type != 5 and type != 7:
        print('The second para has to be 5 or 7!')
        return

    def to_word(weights):
        t = np.cumsum(weights)
        s = np.sum(weights)
        sample = int(np.searchsorted(t, np.random.rand(1) * s))
        return words[sample]

    _, last_state, probs, cell, initial_state = neural_network()
    Session_config = tf.ConfigProto(allow_soft_placement=True)
    Session_config.gpu_options.allow_growth = True

    with tf.Session(config=Session_config) as sess:
        with tf.device('/gpu:1'):

            sess.run(tf.global_variables_initializer()
                     )  #tf.initialize_all_variables()

            saver = tf.train.Saver(tf.all_variables())
            saver.restore(sess, 'model/poetry.module-99')
            poem = ''
            for head in heads:
                flag = True
                while flag:

                    state_ = sess.run(cell.zero_state(1, tf.float32))

                    x = np.array([list(map(word_num_map.get, u'['))])
                    [probs_, state_] = sess.run([probs, last_state],
                                                feed_dict={
                                                    input_data: x,
                                                    initial_state: state_
                                                })

                    sentence = head

                    x = np.zeros((1, 1))
                    x[0, 0] = word_num_map[sentence]
                    [probs_, state_] = sess.run([probs, last_state],
                                                feed_dict={
                                                    input_data: x,
                                                    initial_state: state_
                                                })
                    word = to_word(probs_)
                    sentence += word

                    while word != u'。':
                        x = np.zeros((1, 1))
                        print x
                        x[0, 0] = word_num_map[word]
                        print x
                        [probs_, state_] = sess.run([probs, last_state],
                                                    feed_dict={
                                                        input_data: x,
                                                        initial_state: state_
                                                    })
                        word = to_word(probs_)
                        sentence += word

                    if len(sentence) == 2 + 2 * type:
                        sentence += u'\n'
                        poem += sentence
                        flag = False

            return poem
def main(_):
  if FLAGS.checkpoints:
    # Get the checkpoints list from flags and run some basic checks.
    checkpoints = [c.strip() for c in FLAGS.checkpoints.split(",")]
    checkpoints = [c for c in checkpoints if c]
    if not checkpoints:
      raise ValueError("No checkpoints provided for averaging.")
    if FLAGS.prefix:
      checkpoints = [FLAGS.prefix + c for c in checkpoints]
  else:
    assert FLAGS.num_last_checkpoints >= 1, "Must average at least one model"
    assert FLAGS.prefix, ("Prefix must be provided when averaging last"
                          " N checkpoints")
    checkpoint_state = tf.train.get_checkpoint_state(
        os.path.dirname(FLAGS.prefix))
    # Checkpoints are ordered from oldest to newest.
    checkpoints = checkpoint_state.all_model_checkpoint_paths[
        -FLAGS.num_last_checkpoints:]

  checkpoints = [c for c in checkpoints if checkpoint_exists(c)]
  if not checkpoints:
    if FLAGS.checkpoints:
      raise ValueError(
          "None of the provided checkpoints exist. %s" % FLAGS.checkpoints)
    else:
      raise ValueError("Could not find checkpoints at %s" %
                       os.path.dirname(FLAGS.prefix))

  # Read variables from all checkpoints and average them.
  tf.logging.info("Reading variables and averaging checkpoints:")
  for c in checkpoints:
    tf.logging.info("%s ", c)
  var_list = tf.contrib.framework.list_variables(checkpoints[0])
  var_values, var_dtypes = {}, {}
  for (name, shape) in var_list:
    if not name.startswith("global_step"):
      var_values[name] = np.zeros(shape)
  for checkpoint in checkpoints:
    reader = tf.contrib.framework.load_checkpoint(checkpoint)
    for name in var_values:
      tensor = reader.get_tensor(name)
      var_dtypes[name] = tensor.dtype
      var_values[name] += tensor
    tf.logging.info("Read from checkpoint %s", checkpoint)
  for name in var_values:  # Average.
    var_values[name] /= len(checkpoints)

  tf_vars = [
      tf.get_variable(v, shape=var_values[v].shape, dtype=var_dtypes[name])
      for v in var_values
  ]
  placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars]
  assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)]
  global_step = tf.Variable(
      0, name="global_step", trainable=False, dtype=tf.int64)
  saver = tf.train.Saver(tf.all_variables())

  # Build a model consisting only of variables, set them to the average values.
  with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    for p, assign_op, (name, value) in zip(placeholders, assign_ops,
                                           six.iteritems(var_values)):
      sess.run(assign_op, {p: value})
    # Use the built saver to save the averaged checkpoint.
    saver.save(sess, FLAGS.output_path, global_step=global_step)

  tf.logging.info("Averaged checkpoints saved in %s", FLAGS.output_path)
Beispiel #42
0
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs_2()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference1(images)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Beispiel #43
0
def test(load_version, sift_test_list, iss_test_list, submap_id, cam_id,
         submap_image_id):
    print('-----------------  START to test  -----------------')

    sift_test_list = sift_test_list[submap_id - 1][submap_image_id - 1]
    iss_test_list = iss_test_list[submap_id - 1]

    # record test_list for checking
    with open('sift_test_list.txt', 'w') as file:
        for i in range(len(sift_test_list)):
            file.write('%s\n' % sift_test_list[i])

    with open('iss_test_list.txt', 'w') as file:
        for i in range(len(iss_test_list)):
            file.write('%s\n' % iss_test_list[i])

    # define placeholder
    image_pl = tf.placeholder(tf.float32,
                              shape=[batch_size, image_size, image_size, 3])
    pos_pcl_pl = tf.placeholder(tf.float32, shape=[batch_size, pcl_size, 3])
    neg_pcl_pl = tf.placeholder(tf.float32, shape=[batch_size, pcl_size, 3])

    is_training = tf.placeholder(tf.bool)

    # build model
    print('build model')
    with tf.device('/gpu:1'):  # use gpu 1 to forward
        with tf.variable_scope('image_branch') as scope:
            image_feature = vgg16(image_pl,
                                  is_training=True,
                                  output_dim=image_feature_dim,
                                  bn_decay=None)

        with tf.variable_scope('pointcloud_branch') as scope:
            pos_pcl_feature, _ = pointNet(pos_pcl_pl,
                                          pcl_feature_dim,
                                          is_training=is_training,
                                          use_bn=False,
                                          bn_decay=None)
            scope.reuse_variables()
            neg_pcl_feature, _ = pointNet(neg_pcl_pl,
                                          pcl_feature_dim,
                                          is_training=is_training,
                                          use_bn=False,
                                          bn_decay=None)

    saver = tf.train.Saver(tf.all_variables(),
                           max_to_keep=None)  # tf.global_variables

    # run model
    print('run model...')
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.9
    with tf.Session(config=config) as sess:

        print('initialise model...')
        sess.run(tf.global_variables_initializer())
        print('   load model...')
        save_path = 'model/' + 'v1' + '/' + load_version + '_model.ckpt'
        saver.restore(sess, save_path)
        #restore_tf_model(sess)
        print("   Model loaded from: %s" % save_path)

        # -------------------- evaluate model ---------------------
        print('**** Validate ...')
        print('   Compute image and pcl descriptors')

        # test list
        sift_batch_num = len(sift_test_list) // batch_size
        sift_test_num = sift_batch_num * batch_size
        iss_batch_num = len(iss_test_list) // batch_size
        iss_test_num = iss_batch_num * batch_size

        img_feature = np.zeros([sift_test_num, image_feature_dim])
        pcl_feature = np.zeros([iss_test_num, pcl_feature_dim])

        # feed sift test list into the network
        batch_counter = 0
        print('---------- test sift ----------')
        for i in range(sift_batch_num):
            print("  *** sift progress: %d/%d" % (i, sift_batch_num))
            img_batch = getSIFTTestBatch(sift_test_list, i)
            #print img_batch.shape
            feed_dict = {image_pl: img_batch, is_training: False}
            img_batch_feature = sess.run(image_feature, feed_dict=feed_dict)
            #print type(img_batch_feature)
            img_feature[batch_counter:batch_counter +
                        img_batch_feature.shape[0], :] = img_batch_feature
            batch_counter += img_batch_feature.shape[0]

        # feed iss test list into the network
        batch_counter = 0
        print('-------- test iss --------------')
        for i in range(iss_batch_num):
            print("  *** iss progress: %d/%d" % (i, iss_batch_num))
            pcl_batch = getISSTestBatch(iss_test_list, i)
            feed_dict = {pos_pcl_pl: pcl_batch, is_training: False}
            pcl_batch_feature = sess.run(pos_pcl_feature, feed_dict=feed_dict)
            pcl_feature[batch_counter:batch_counter +
                        pcl_batch_feature.shape[0], :] = pcl_batch_feature
            batch_counter += pcl_batch_feature.shape[0]

        # compute distance array between img_feature and pcl_feature
        img_vec = np.sum(np.multiply(img_feature, img_feature),
                         axis=1,
                         keepdims=True)
        pcl_vec = np.sum(np.multiply(pcl_feature, pcl_feature),
                         axis=1,
                         keepdims=True)
        dist_array = img_vec + np.transpose(pcl_vec) - 2 * np.matmul(
            img_feature, np.transpose(pcl_feature))
        print("  image patch num: %d, submap pcl num: %d" %
              (dist_array.shape[0], dist_array.shape[1]))

        # find correspondences and record
        img_pcl_correspondences = []
        txt_file_path = "%s/%03d_cam%d_%03d.txt" % (
            sift_iss_correspond_dir, submap_id, cam_id, submap_image_id)
        with open(txt_file_path, "w") as file:
            for i in range(dist_array.shape[0]):
                min_dist_id = np.argmin(dist_array[i, :])
                min_dist_val = dist_array[i, min_dist_id]
                #print min_dist_val
                if min_dist_val <= thresh_dist:
                    img_pcl_correspondences.append(
                        [sift_test_list[i], iss_test_list[min_dist_id]])
                    file.write('%d %d %s %s\n' %
                               ((i + 1), (min_dist_id + 1), sift_test_list[i],
                                iss_test_list[min_dist_id]))
logging.debug("Initializing...")

logging.debug("Agent...")
agent = Agent(saved_execution_times_prefix, adjacency_matrix_filename, feature_matrix_filename, benchmark, execution_features, output_schedule_filename=output_schedule_filename, adjacency_is_sparse=SPARSE_ADJ, num_repeats=NUM_REPEATS)

logging.debug("Replay Memory...")
num_update_transitions = len(agent.all_nodes_to_allocate)*NUM_EPISODES_PER_UPDATE
replay_memory = ReplayMemory(num_update_transitions)

logging.debug("Policy Network...")
with tf.variable_scope('policy_network'):
	flags.DEFINE_string('method_type', "reinforce_policy", "'value' or 'policy' based policy_network")
	flags.DEFINE_float('learning_rate', POLICY_LR, 'Initial learning rate.')
	policy_network = ReinforcePolicyNetwork_SimpleMLP(agent.undirected_adj, agent.feature_matrix.shape[0], agent.feature_matrix.shape[1], agent.actions_vector, include_partial_solution_feature=INCLUDE_PARTIAL_SOLUTION_FEATURE, zero_non_included_nodes=ZERO_NON_INCLUDED_NODES, variable_support=VARIABLE_SUPPORT)
	policy_saver = tf.train.Saver([v for v in tf.all_variables() if 'policy_network' in v.name])
	
logging.debug("Value Network...")
tf.flags.FLAGS.__delattr__('method_type')
tf.flags.FLAGS.__delattr__('learning_rate')
flags.DEFINE_string('method_type', "batched_statevalue", "'value' or 'policy' or 'statevalue' network")
flags.DEFINE_float('learning_rate', STATE_LR, 'Initial learning rate.')
with tf.variable_scope('statevalue_network'):
	value_network = BatchedStateValueNetwork(agent.undirected_adj, agent.feature_matrix.shape[0], agent.feature_matrix.shape[1], agent.actions_vector)
	value_saver = tf.train.Saver([v for v in tf.all_variables() if 'statevalue_network' in v.name])

logging.debug("Tensorflow session...")
session_config = tf.ConfigProto()
session_config.gpu_options.allow_growth=True
sess = tf.Session(config=session_config)
sess.run(tf.global_variables_initializer())
def main(argv=None):
    # 将简单的运算放在CPU上,只有神经网络的训练过程放在GPU上。
    with tf.Graph().as_default(), tf.device('/cpu:0'):

        # 定义基本的训练过程
        x, y_ = get_input()
        regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
                                                   global_step,
                                                   60000 / BATCH_SIZE,
                                                   LEARNING_RATE_DECAY)

        opt = tf.train.GradientDescentOptimizer(learning_rate)

        tower_grads = []
        reuse_variables = False
        # 将神经网络的优化过程跑在不同的GPU上。
        for i in range(N_GPU):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope(
                        'GPU_%d' %
                        i) as scope:  # name_scope并不会影响get_variable的命名空间
                    cur_loss = get_loss(
                        x, y_, regularizer, scope,
                        reuse_variables)  # 总共有4个变量,2个weight以及2个bias
                    reuse_variables = True
                    grads = opt.compute_gradients(
                        cur_loss
                    )  # A list of (gradient, variable) pairs. [(grad1, var1),(grad2,var2),(grad3,var3),(grad4,var4)]
                    # 之所以有4个梯度,是因为有4个变量,weight1, bias1, weight2,bias2,注意此处与gpu 的个数无关
                    tower_grads.append(grads)

        # 计算变量的平均梯度。
        # 变量是共享的,将所有gpu上的梯度进行求和平均
        grad_and_vars = average_gradients(
            tower_grads
        )  # [ 第1个gpu: [(g1, v1),(g2,v2),(g3,v3),(g4,v4)], 第2个gpu: [(g1, v1),(g2,v2),(g3,v3),(g4,v4)],...]
        for grad, var in grad_and_vars:
            if grad is not None:
                tf.summary.histogram('gradients_on_average/%s' % var.op.name,
                                     grad)

        # 使用平均梯度更新参数。
        apply_gradient_op = opt.apply_gradients(grad_and_vars,
                                                global_step=global_step)
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)

        # 计算变量的滑动平均值。
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_to_average = (tf.trainable_variables() +
                                tf.moving_average_variables())
        variables_averages_op = variable_averages.apply(variables_to_average)
        # 每一轮迭代需要更新变量的取值并更新变量的滑动平均值。
        train_op = tf.group(apply_gradient_op, variables_averages_op)
        #sys.exit(-1)
        saver = tf.train.Saver(tf.all_variables())
        summary_op = tf.summary.merge_all()
        init = tf.initialize_all_variables()
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, log_device_placement=True)) as sess:
            # 初始化所有变量并启动队列。
            init.run()
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            summary_writer = tf.summary.FileWriter(MODEL_SAVE_PATH, sess.graph)

            for step in range(TRAINING_STEPS):
                # 执行神经网络训练操作,并记录训练操作的运行时间。
                start_time = time.time()
                _, loss_value = sess.run([train_op, cur_loss])
                duration = time.time() - start_time

                # 每隔一段时间数据当前的训练进度,并统计训练速度。
                if step != 0 and step % 10 == 0:
                    # 计算使用过的训练数据个数。
                    num_examples_per_step = BATCH_SIZE * N_GPU
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = duration / N_GPU

                    # 输出训练信息。
                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)'
                    )
                    print(format_str % (datetime.now(), step, loss_value,
                                        examples_per_sec, sec_per_batch))

                    # 通过TensorBoard可视化训练过程。
                    summary = sess.run(summary_op)
                    summary_writer.add_summary(summary, step)

                # 每隔一段时间保存当前的模型。
                if step % 1000 == 0 or (step + 1) == TRAINING_STEPS:
                    checkpoint_path = os.path.join(MODEL_SAVE_PATH, MODEL_NAME)
                    saver.save(sess, checkpoint_path, global_step=step)

            coord.request_stop()
            coord.join(threads)
Beispiel #46
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info('Prepare to export model to: %s', FLAGS.export_path)

    with tf.Graph().as_default():
        image, image_size, resized_image_size = _create_input_tensors()

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: FLAGS.num_classes},
            crop_size=FLAGS.crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        if tuple(FLAGS.inference_scales) == (1.0, ):
            tf.logging.info('Exported model performs single-scale inference.')
            predictions = model.predict_labels(
                image,
                model_options=model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Exported model performs multi-scale inference.')
            if FLAGS.quantize_delay_step >= 0:
                raise ValueError(
                    'Quantize mode is not supported with multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                image,
                model_options=model_options,
                eval_scales=FLAGS.inference_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        raw_predictions = tf.identity(
            tf.cast(predictions[common.OUTPUT_TYPE], tf.float32),
            _RAW_OUTPUT_NAME)
        raw_probabilities = tf.identity(
            predictions[common.OUTPUT_TYPE + model.PROB_SUFFIX],
            _RAW_OUTPUT_PROB_NAME)

        # Crop the valid regions from the predictions.
        semantic_predictions = raw_predictions[:, :resized_image_size[0], :
                                               resized_image_size[1]]
        semantic_probabilities = raw_probabilities[:, :resized_image_size[0], :
                                                   resized_image_size[1]]

        # Resize back the prediction to the original image size.
        def _resize_label(label, label_size):
            # Expand dimension of label to [1, height, width, 1] for resize operation.
            label = tf.expand_dims(label, 3)
            resized_label = tf.image.resize_images(
                label,
                label_size,
                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                align_corners=True)
            return tf.cast(tf.squeeze(resized_label, 3), tf.int32)

        semantic_predictions = _resize_label(semantic_predictions, image_size)
        semantic_predictions = tf.identity(semantic_predictions,
                                           name=_OUTPUT_NAME)

        semantic_probabilities = tf.image.resize_bilinear(
            semantic_probabilities,
            image_size,
            align_corners=True,
            name=_OUTPUT_PROB_NAME)

        if FLAGS.quantize_delay_step >= 0:
            contrib_quantize.create_eval_graph()

        saver = tf.train.Saver(tf.all_variables())

        dirname = os.path.dirname(FLAGS.export_path)
        tf.gfile.MakeDirs(dirname)
        graph_def = tf.get_default_graph().as_graph_def(add_shapes=True)
        freeze_graph.freeze_graph_with_def_protos(
            graph_def,
            saver.as_saver_def(),
            FLAGS.checkpoint_path,
            _OUTPUT_NAME + ',' + _OUTPUT_PROB_NAME,
            restore_op_name=None,
            filename_tensor_name=None,
            output_graph=FLAGS.export_path,
            clear_devices=True,
            initializer_nodes=None)

        if FLAGS.save_inference_graph:
            tf.train.write_graph(graph_def, dirname, 'inference_graph.pbtxt')
def question_gen_run(argv):
    #parser = argparse.ArgumentParser()
    #parser.add_argument('--model_prefix', type=str, required=True, help='Prefix to the models.')
    #parser.add_argument('--in_path', type=str, required=True, help='The path to the test file.')
    #parser.add_argument('--out_path', type=str, required=True, help='The path to the output file.')
    #parser.add_argument('--mode', type=str, required=True, help='Can be `greedy` or `beam`')

    #args, unparsed = parser.parse_known_args()

    #model_prefix = args.model_prefix
    #in_path = args.in_path
    #out_path = args.out_path
    #mode = args.mode
    print(sys.argv)
    model_prefix = argv[0]
    in_path = argv[1]
    out_path = argv[2]
    mode = argv[3]

    print("CUDA_VISIBLE_DEVICES " + os.environ['CUDA_VISIBLE_DEVICES'])

    # load the configuration file
    print('Loading configurations from ' + model_prefix + ".config.json")
    FLAGS = namespace_utils.load_namespace(model_prefix + ".config.json")
    FLAGS = NP2P_trainer.enrich_options(FLAGS)

    # load vocabs
    print('Loading vocabs.')
    word_vocab = char_vocab = POS_vocab = NER_vocab = None
    if FLAGS.with_word:
        word_vocab = Vocab(FLAGS.word_vec_path, fileformat='txt2')
        print('word_vocab: {}'.format(word_vocab.word_vecs.shape))
    if FLAGS.with_char:
        char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2')
        print('char_vocab: {}'.format(char_vocab.word_vecs.shape))
    if FLAGS.with_POS:
        POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2')
        print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape))
    if FLAGS.with_NER:
        NER_vocab = Vocab(model_prefix + ".NER_vocab", fileformat='txt2')
        print('NER_vocab: {}'.format(NER_vocab.word_vecs.shape))

    print('Loading test set.')
    if FLAGS.infile_format == 'fof':
        testset, _ = NP2P_data_stream.read_generation_datasets_from_fof(
            in_path, isLower=FLAGS.isLower)
    elif FLAGS.infile_format == 'plain':
        testset, _ = NP2P_data_stream.read_all_GenerationDatasets(
            in_path, isLower=FLAGS.isLower)
    else:
        testset, _ = NP2P_data_stream.read_all_GQA_questions(
            in_path, isLower=FLAGS.isLower, switch=FLAGS.switch_qa)
    print('Number of samples: {}'.format(len(testset)))

    print('Build DataStream ... ')
    batch_size = -1
    if mode.find('beam') >= 0: batch_size = 1
    devDataStream = NP2P_data_stream.QADataStream(testset,
                                                  word_vocab,
                                                  char_vocab,
                                                  POS_vocab,
                                                  NER_vocab,
                                                  options=FLAGS,
                                                  isShuffle=False,
                                                  isLoop=False,
                                                  isSort=True,
                                                  batch_size=batch_size)
    print('Number of instances in testDataStream: {}'.format(
        devDataStream.get_num_instance()))
    print('Number of batches in testDataStream: {}'.format(
        devDataStream.get_num_batch()))

    best_path = model_prefix + ".best.model"
    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-0.01, 0.01)
        with tf.name_scope("Valid"):
            with tf.variable_scope("Model",
                                   reuse=False,
                                   initializer=initializer):
                valid_graph = ModelGraph(word_vocab=word_vocab,
                                         char_vocab=char_vocab,
                                         POS_vocab=POS_vocab,
                                         NER_vocab=NER_vocab,
                                         options=FLAGS,
                                         mode="decode")

        ## remove word _embedding
        vars_ = {}
        for var in tf.all_variables():
            if "word_embedding" in var.name: continue
            if not var.name.startswith("Model"): continue
            vars_[var.name.split(":")[0]] = var
        saver = tf.train.Saver(vars_)

        initializer = tf.global_variables_initializer()
        #gpu_fraction = 0.1
        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
        #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        sess = tf.Session()
        sess.run(initializer)

        saver.restore(sess, best_path)  # restore the model

        total = 0
        correct = 0
        if mode.endswith('evaluate'):
            ref_outfile = open(out_path + ".ref", 'wt')
            pred_outfile = open(out_path + ".pred", 'wt')
        else:
            outfile = open(out_path, 'wt')
        total_num = devDataStream.get_num_batch()
        devDataStream.reset()
        for i in range(total_num):
            cur_batch = devDataStream.get_batch(i)
            if mode == 'pointwise':
                (sentences, prediction_lengths, generator_input_idx,
                 generator_output_idx) = search(sess,
                                                valid_graph,
                                                word_vocab,
                                                cur_batch,
                                                FLAGS,
                                                decode_mode=mode)
                for j in xrange(cur_batch.batch_size):
                    cur_total = cur_batch.answer_lengths[j]
                    cur_correct = 0
                    for k in xrange(cur_total):
                        if generator_output_idx[
                                j, k] == cur_batch.in_answer_words[j, k]:
                            cur_correct += 1.0
                    total += cur_total
                    correct += cur_correct
                    outfile.write(
                        cur_batch.instances[j][1].tokText.encode('utf-8') +
                        "\n")
                    outfile.write(sentences[j].encode('utf-8') + "\n")
                    outfile.write("========\n")
                outfile.flush()
                print('Current dev accuracy is %d/%d=%.2f' %
                      (correct, total, correct / float(total) * 100))
            elif mode in ['greedy', 'multinomial']:
                print('Batch {}'.format(i))
                (sentences, prediction_lengths, generator_input_idx,
                 generator_output_idx) = search(sess,
                                                valid_graph,
                                                word_vocab,
                                                cur_batch,
                                                FLAGS,
                                                decode_mode=mode)
                for j in xrange(cur_batch.batch_size):
                    outfile.write(
                        cur_batch.instances[j][1].ID_num.encode('utf-8') +
                        "\n")
                    outfile.write(
                        cur_batch.instances[j][1].tokText.encode('utf-8') +
                        "\n")
                    outfile.write(sentences[j].encode('utf-8') + "\n")
                    outfile.write("========\n")
                outfile.flush()
            elif mode == 'greedy_evaluate':
                print('Batch {}'.format(i))
                (sentences, prediction_lengths, generator_input_idx,
                 generator_output_idx) = search(sess,
                                                valid_graph,
                                                word_vocab,
                                                cur_batch,
                                                FLAGS,
                                                decode_mode="greedy")
                for j in xrange(cur_batch.batch_size):
                    ref_outfile.write(
                        cur_batch.instances[j][1].tokText.encode('utf-8') +
                        "\n")
                    pred_outfile.write(sentences[j].encode('utf-8') + "\n")
                ref_outfile.flush()
                pred_outfile.flush()
            elif mode == 'beam_evaluate':
                print('Instance {}'.format(i))
                ref_outfile.write(
                    cur_batch.instances[0][1].tokText.encode('utf-8') + "\n")
                ref_outfile.flush()
                hyps = run_beam_search(sess, valid_graph, word_vocab,
                                       cur_batch, FLAGS)
                cur_passage = cur_batch.instances[0][0]
                cur_id2phrase = None
                if FLAGS.with_phrase_projection:
                    (cur_phrase2id, cur_id2phrase) = cur_batch.phrase_vocabs[0]
                cur_sent = hyps[0].idx_seq_to_string(cur_passage,
                                                     cur_id2phrase, word_vocab,
                                                     FLAGS)
                pred_outfile.write(cur_sent.encode('utf-8') + "\n")
                pred_outfile.flush()
            else:  # beam search
                print('Instance {}'.format(i))
                hyps = run_beam_search(sess, valid_graph, word_vocab,
                                       cur_batch, FLAGS)
                outfile.write(
                    "Input: " +
                    cur_batch.instances[0][0].tokText.encode('utf-8') + "\n")
                outfile.write(
                    "Truth: " +
                    cur_batch.instances[0][1].tokText.encode('utf-8') + "\n")
                for j in xrange(len(hyps)):
                    hyp = hyps[j]
                    cur_passage = cur_batch.instances[0][0]
                    cur_id2phrase = None
                    if FLAGS.with_phrase_projection:
                        (cur_phrase2id,
                         cur_id2phrase) = cur_batch.phrase_vocabs[0]
                    cur_sent = hyp.idx_seq_to_string(cur_passage,
                                                     cur_id2phrase, word_vocab,
                                                     FLAGS)
                    outfile.write("Hyp-{}: ".format(j) +
                                  cur_sent.encode('utf-8') +
                                  " {}".format(hyp.avg_log_prob()) + "\n")
                #outfile.write("========\n")
                outfile.flush()
        if mode.endswith('evaluate'):
            ref_outfile.close()
            pred_outfile.close()
        else:
            outfile.close()
Beispiel #48
0
def main(argv=None):  # pylint: disable=unused-argument
    if FLAGS.model_name:
        subdir = FLAGS.model_name
        preload_model = True
    else:
        subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
        preload_model = False
    log_dir = os.path.join(os.path.expanduser(FLAGS.logs_base_dir), subdir)
    model_dir = os.path.join(os.path.expanduser(FLAGS.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.mkdir(model_dir)

    np.random.seed(seed=FLAGS.seed)
    dataset = facenet.get_dataset(FLAGS.data_dir)
    train_set, validation_set = facenet.split_dataset(dataset,
                                                      FLAGS.train_set_fraction,
                                                      FLAGS.split_mode)

    print('Model directory: %s' % model_dir)

    with tf.Graph().as_default():
        tf.set_random_seed(FLAGS.seed)
        global_step = tf.Variable(0, trainable=False)

        # Placeholder for input images
        images_placeholder = tf.placeholder(tf.float32,
                                            shape=(FLAGS.batch_size,
                                                   FLAGS.image_size,
                                                   FLAGS.image_size, 3),
                                            name='input')

        # Placeholder for phase_train
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        # Build the inference graph
        embeddings = facenet.inference_nn4_max_pool_96(
            images_placeholder, phase_train=phase_train_placeholder)

        # Split example embeddings into anchor, positive and negative
        anchor, positive, negative = tf.split(0, 3, embeddings)

        # Calculate triplet loss
        loss = facenet.triplet_loss(anchor, positive, negative)

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op, _ = facenet.train(loss, global_step)

        # Create a saver
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=0)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        summary_writer = tf.train.SummaryWriter(log_dir, sess.graph)

        with sess.as_default():

            if preload_model:
                ckpt = tf.train.get_checkpoint_state(model_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                else:
                    raise ValueError('Checkpoint not found')

            # Training and validation loop
            for epoch in range(FLAGS.max_nrof_epochs):
                # Train for one epoch
                step = train(sess, train_set, epoch, images_placeholder,
                             phase_train_placeholder, global_step, embeddings,
                             loss, train_op, summary_op, summary_writer)
                # Validate epoch
                validate(sess, validation_set, epoch, images_placeholder,
                         phase_train_placeholder, global_step, embeddings,
                         loss, train_op, summary_op, summary_writer)

                # Save the model checkpoint after each epoch
                print('Saving checkpoint')
                checkpoint_path = os.path.join(model_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                graphdef_dir = os.path.join(model_dir, 'graphdef')
                graphdef_filename = 'graph_def.pb'
                if (not os.path.exists(
                        os.path.join(graphdef_dir, graphdef_filename))):
                    print('Saving graph definition')
                    tf.train.write_graph(sess.graph_def, graphdef_dir,
                                         graphdef_filename, False)
Beispiel #49
0
    def __init__(self,
                 vocab_size,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 num_samples=512,
                 forward_only=False,
                 max_dialog_length=10,
                 max_answer_length=20):

        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        self.max_dialog_length = max_dialog_length
        self.max_answer_length = max_answer_length

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None

        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.vocab_size:
            with tf.device("/cpu:0"):
                w = tf.get_variable("proj_w", [size, self.vocab_size])
                w_t = tf.transpose(w)
                b = tf.get_variable("proj_b", [self.vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                with tf.device("/cpu:0"):
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                      num_samples,
                                                      self.vocab_size)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
        cell = single_cell
        if num_layers > 1:
            cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return dialog_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                vocab_size,
                output_projection=output_projection,
                feed_previous=do_decode)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []

        for i in range(0, max_dialog_length):
            one_turn_encoder_inputs = []
            one_turn_decoder_inputs = []
            one_turn_target_weights = []
            for j in range(0, max_answer_length):
                one_turn_encoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="encoder{0}_{1}".format(i, j)))

            for j in range(0, max_answer_length + 1):
                one_turn_decoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="decoder{0}_{1}".format(i, j)))
                one_turn_target_weights.append(
                    tf.placeholder(tf.float32,
                                   shape=[None],
                                   name="weight{0}_{1}".format(i, j)))

            self.encoder_inputs.append(list(one_turn_encoder_inputs))
            self.decoder_inputs.append(list(one_turn_decoder_inputs))
            self.target_weights.append(list(one_turn_target_weights))

        # Our targets are decoder inputs shifted by one.
        targets = []
        for i in range(0, max_dialog_length):
            targets.append([
                self.decoder_inputs[i][j + 1]
                for j in xrange(len(self.decoder_inputs[i]) - 1)
            ])

        # Training outputs and losses.
        if forward_only:
            self.outputs, _ = seq2seq_f(self.encoder_inputs,
                                        self.decoder_inputs, True)

            self.loss = 0
            for i in range(0, max_dialog_length):
                self.loss += sequence_loss(
                    self.outputs[i][:-1],
                    targets[i],
                    self.target_weights[i][:-1],
                    softmax_loss_function=softmax_loss_function)

            # If we use output projection, we need to project outputs for decoding.
            if output_projection is not None:
                self.outputs = tf.matmul(
                    self.outputs, output_projection[0]) + output_projection[1]
        else:
            self.outputs, _ = seq2seq_f(self.encoder_inputs,
                                        self.decoder_inputs, False)

            self.loss = 0
            for i in range(0, max_dialog_length):
                self.loss += sequence_loss(
                    self.outputs[i][:-1],
                    targets[i],
                    self.target_weights[i][:-1],
                    softmax_loss_function=softmax_loss_function)

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        if not forward_only:
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)

            gradients = tf.gradients(self.loss, params)
            clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
                gradients, max_gradient_norm)
            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
Beispiel #50
0
def create_model(session, forward_only, beam_search):
    dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
    model = seq2seq_model.Seq2SeqModel(
            FLAGS.post_vocab_size,
            FLAGS.response_vocab_size,
            _buckets,
            FLAGS.size,
            FLAGS.num_layers,
            FLAGS.max_gradient_norm,
            FLAGS.batch_size,
            FLAGS.learning_rate,
            FLAGS.learning_rate_decay_factor,
            embedding_size=FLAGS.embedding_size,
            forward_only=forward_only,
            beam_search=beam_search,
            beam_size=FLAGS.beam_size,
            category=FLAGS.category,
            use_emb=FLAGS.use_emb,
            use_imemory=FLAGS.use_imemory,
            use_ememory=FLAGS.use_ememory,
            emotion_size=FLAGS.emotion_size,
            imemory_size=FLAGS.imemory_size,
            dtype=dtype)
    see_variable = True
    if see_variable == True:
        for i in tf.global_variables():
            print(i.name, i.get_shape())
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    pre_ckpt = tf.train.get_checkpoint_state(FLAGS.pretrain_dir)
    if ckpt: #and tf.gfile.Exists(ckpt.model_checkpoint_path+".index"):
        if FLAGS.load_model == 0:
            print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
            model.saver.restore(session, ckpt.model_checkpoint_path)
        else:
            path = ckpt.model_checkpoint_path[:ckpt.model_checkpoint_path.find('-')+1]+str(FLAGS.load_model)
            print("Reading model parameters from %s" % path)
            model.saver.restore(session, path)
    else:
        if pre_ckpt:
            session.run(tf.initialize_variables(model.initial_var))
            if FLAGS.pretrain > -1:
                path = pre_ckpt.model_checkpoint_path[:pre_ckpt.model_checkpoint_path.find('-')+1]+str(FLAGS.pretrain)
                print("Reading pretrain model parameters from %s" % path)
                model.pretrain_saver.restore(session, path)
            else:
                print("Reading pretrain model parameters from %s" % pre_ckpt.model_checkpoint_path)
                model.pretrain_saver.restore(session, pre_ckpt.model_checkpoint_path)
        else:
            print("Created model with fresh parameters.")
            session.run(tf.global_variables_initializer())
            vec_post, vec_response = data_utils.get_data(FLAGS.data_dir, FLAGS.post_vocab_size, FLAGS.response_vocab_size)
            initvec_post = tf.constant(vec_post, dtype=dtype, name='init_wordvector_post')
            initvec_response = tf.constant(vec_response, dtype=dtype, name='init_wordvector_response')
            embedding_post = [x for x in tf.trainable_variables() if x.name == 'embedding_attention_seq2seq/rnn/embedding_wrapper/embedding:0'][0] #bug1: RNN-rnn, EmbeddingWrapper-embedding_wrapper
            embedding_response = [x for x in tf.trainable_variables() if x.name == 'embedding_attention_seq2seq/embedding_attention_decoder/embedding:0'][0]
            session.run(embedding_post.assign(initvec_post))
            session.run(embedding_response.assign(initvec_response))
        if FLAGS.use_ememory:
            vec_ememory = data_utils.get_ememory(FLAGS.data_dir, FLAGS.response_vocab_size)
            initvec_ememory = tf.constant(vec_ememory, dtype=dtype, name='init_ememory')
            ememory = [x for x in tf.all_variables() if x.name == 'embedding_attention_seq2seq/embedding_attention_decoder/external_memory:0'][0]
            session.run(ememory.assign(initvec_ememory))
    return model
Beispiel #51
0
    def train(self, train_images, model_path, logs_path, learning_rate=1e-4, beta1=0.9, train_epochs=100,
              batch_size=128):
        # divide trainable variables into a group for D and a group for G
        t_vars = tf.trainable_variables()
        D_vars = [var for var in t_vars if 'd_' in var.name]
        G_vars = [var for var in t_vars if 'g_' in var.name]

        trainD_op = tf.train.AdamOptimizer(learning_rate, beta1).minimize(self.d_loss, var_list=D_vars)
        trainG_op = tf.train.AdamOptimizer(learning_rate, beta1).minimize(self.g_loss, var_list=G_vars)

        tf.get_variable_scope().reuse_variables()

        """ Summary """
        d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
        g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)

        # final summary operations
        g_sum_op = tf.summary.merge([g_loss_sum])
        d_sum_op = tf.summary.merge([d_loss_sum])
        '''
        TensorFlow Session
        '''
        # start TensorFlow session
        init = tf.initialize_all_variables()
        saver = tf.train.Saver(tf.all_variables())
        sess = tf.InteractiveSession()
        logdir = logs_path + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
        summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph)
        sess.run(init)

        DISPLAY_STEP = 10
        index_in_epoch = 0

        # Pre-train discriminator
        for i in range(30):
            z_batch = np.random.normal(0, 1, size=[batch_size, self.z_dim]).astype(np.float32)
            batch_xs, index_in_epoch = _next_batch(train_images, batch_size, index_in_epoch)
            sess.run([trainD_op], feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1})
        # Train generator and discriminator together
        for i in range(train_epochs):
            # get new batch
            z_batch = np.random.normal(0, 1, size=[batch_size, self.z_dim]).astype(np.float32)
            batch_xs, index_in_epoch = _next_batch(train_images, batch_size, index_in_epoch)
            # train on batch
            # Train discriminator on both real and fake images
            _, summaryD = sess.run([trainD_op, d_sum_op],
                                   feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1})
            summary_writer.add_summary(summaryD, i)
            # Train generator
            _, summaryG = sess.run([trainG_op, g_sum_op], feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1})
            summary_writer.add_summary(summaryG, i)
            # check progress on every 1st,2nd,...,10th,20th,...,100th... step
            if i % DISPLAY_STEP == 0 or (i + 1) == train_epochs:
                dLoss, gLoss = sess.run([self.d_loss, self.g_loss],
                                        feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1})
                print("=========== updating G&D ==========")
                print("iteration:", i)
                print("gen loss:", gLoss)
                print("dis loss:", dLoss)

                outimage = self.Gen.eval(feed_dict={self.Z: z_batch, self.phase: 1}, session=sess)

                for index in range(3):
                    result = (outimage[index].astype(np.float32)) * 255.
                    result = np.clip(result, 0, 255).astype('uint8')
                    result = np.reshape(result, (28, 28))
                    cv2.imwrite("out" + str(index + 1) + ".bmp", result)

                if i % (DISPLAY_STEP * 10) == 0 and i:
                    DISPLAY_STEP *= 10

        summary_writer.close()

        save_path = saver.save(sess, model_path)
        print("Model saved in file:", save_path)
Beispiel #52
0
    def train(self, train_images, model_path, logs_path, learning_rate=1e-4, train_epochs=100):
        # divide trainable variables into a group for D and a group for G
        t_vars = tf.trainable_variables()
        D_vars = [var for var in t_vars if 'd_' in var.name]
        G_vars = [var for var in t_vars if 'g_' in var.name]

        trainD_op = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.9).minimize(self.d_loss, var_list=D_vars)
        trainG_op = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.9).minimize(self.g_loss, var_list=G_vars)

        tf.get_variable_scope().reuse_variables()

        """ Summary """
        d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
        g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)

        # final summary operations
        g_sum_op = tf.summary.merge([g_loss_sum])
        d_sum_op = tf.summary.merge([d_loss_sum])
        '''
        TensorFlow Session
        '''
        # start TensorFlow session
        init = tf.initialize_all_variables()
        saver = tf.train.Saver(tf.all_variables())
        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
        logdir = logs_path + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
        summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph)
        sess.run(init)

        DISPLAY_STEP = 10
        index_in_epoch = 0
        start_time = time.time()

        # Train generator and discriminator together
        for i in range(train_epochs):
            d_iters = 5
            # get new batch
            batch_xs, index_in_epoch = _next_batch(train_images, self.batch_size, index_in_epoch)
            for _ in range(0, d_iters):
                z_batch = np.random.normal(0, 1, size=[self.batch_size, self.z_dim]).astype(np.float32)
                # train on batch
                # Train discriminator on both real and fake images
                _, summaryD, dLoss = sess.run([trainD_op, d_sum_op, self.d_loss],
                                              feed_dict={self.X: batch_xs, self.Z: z_batch, self.phase: 1})
                summary_writer.add_summary(summaryD, i)
            # Train generator
            z_batch = np.random.normal(0, 1, size=[self.batch_size, self.z_dim]).astype(np.float32)
            _, summaryG, gLoss = sess.run([trainG_op, g_sum_op, self.g_loss],
                                          feed_dict={self.Z: z_batch, self.phase: 1})
            summary_writer.add_summary(summaryG, i)
            # check progress on every 1st,2nd,...,10th,20th,...,100th... step
            if i % DISPLAY_STEP == 0 or (i + 1) == train_epochs:
                print("=========== updating G&D ==========")
                print('Time: %.2f' % (time.time() - start_time))
                print("iteration:", i)
                print("gen loss:", gLoss)
                print("dis loss:", dLoss)
                z_batch = np.random.normal(0, 1, size=[self.batch_size, self.z_dim]).astype(np.float32)
                outimage = self.Gen.eval(feed_dict={self.Z: z_batch, self.phase: 1}, session=sess)
                save_images(outimage, [8, 8], 'img/' + 'sample_%d_epoch.png' % (i))

                if i % (DISPLAY_STEP * 10) == 0 and i:
                    DISPLAY_STEP *= 10

        summary_writer.close()

        save_path = saver.save(sess, model_path)
        print("Model saved in file:", save_path)
Beispiel #53
0
def train(trainpath,  ckptfile):
    immm = loadimg(trainpath)
    print ('testing() called')
    V = 64
    vz = 32
    margin = 32
    re_filed = 0
#    data_size = get_datasize(trainpath)
    
#    print ('training size:', data_size)


    with tf.Graph().as_default():
                 
        # placeholders for graph input
        view_ = tf.placeholder('float32', shape=(None, V, V, vz,1), name='im0')
#        y_ = tf.placeholder('int64', shape=(N`-one,V-16,V-16,V-16), name='y')
        keep_prob_ = tf.placeholder('float32')

        # graph outputs
        fc8 = model.inference_multiview(view_, g_.NUM_CLASSES, keep_prob_)
#        pr=tf.nn.softmax(fc8)
#        loss = model.loss(fc8, y_)
#        train_op = model.train(loss, global_step, data_size)
        prediction = model.classify(fc8)




        # must be after merge_all_summaries
#        validation_loss = tf.placeholder('float32', shape=(), name='validation_loss')
#        validation_summary = tf.summary.scalar('validation_loss', validation_loss)
#        validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy')
#        validation_acc_summary = tf.summary.scalar('validation_accuracy', validation_acc)

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000)
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement))
        
        saver.restore(sess, ckptfile)
        print ('restore variables done')


        immm = padding(immm,margin)
#        zmmm = np.zeros(immm.shape)
        imum = get_cen(immm.shape,V,V,vz,margin,re_filed)
        
        for num in range(len(imum)):
                print(num)
                center_point = imum[num]
#                center_point[0] = center_point[0] +margin
#                center_point[1] = center_point[1] +margin
#
#                center_point[2] = center_point[2] +margi/home/ttt/model/model3.cpkt-41000n

                #print('center point:', center_point)
                image = immm[center_point[0]-V//2:center_point[0]+V//2,
                             center_point[1]-V//2:center_point[1]+V//2,
                             center_point[2]-vz//2:center_point[2]+vz//2]
                if image.shape !=(V,V,vz):
                    break
    
                image = np.expand_dims(image, axis = 0)
                image = np.expand_dims(image, axis = 4)
                        
#                        start_time = time.time()
                feed_dict = {view_: image,
                             keep_prob_: 0.5 }
        #                feed_dict_1 = {view_: batch_x,
        #                             keep_prob_: 0.5 }
        #                p_fc,p_softmax = sess.run(
        #                        [fc8,pr],
        #                        feed_dict=feed_dict_1)
        #                print(p_fc,p_softmax)
                        
                pred = sess.run(
                        prediction,
                        feed_dict=feed_dict)
#                pred = np.argmax(pred,-1)
                pred = pred[:,:,:,:,1]
                pred = np.array(pred)
                pred = np.squeeze(pred)
                bnn = np.argwhere(pred)
                pred = pred>0.5
                print(bnn.shape[0])
                immm[center_point[0]-V//2:center_point[0]+V//2,
                     center_point[1]-V//2:center_point[1]+V//2,
                     center_point[2]-vz//2:center_point[2]+vz//2] = pred
        immm = depadding(immm,margin)
        immm[immm>1] = 0
        immm = immm*255
        writetiff3d('/media/ttt/Elements/TanYinghui/TP/5_7.tif',immm.astype('uint8'))
    def test(self, model):
        self.mode = 'test'

        test_input_path = './dataset/Xu et al.\'s dataset/TEST/INPUT/'
        test_gt_path = './dataset/Xu et al.\'s dataset/TEST/GT/'
        save_path = './dataset/Xu et al.\'s dataset/M0/JointFinetune/'
        if not os.path.exists(save_path):
            os.mkdir(save_path)

        test_input_list = [im for im in os.listdir(test_input_path) if im.endswith('.png')]
        test_gt_list = [im for im in os.listdir(test_gt_path) if im.endswith('.png')]
        
        test_num = len(test_input_list)
        print('Num. of test patches: ', test_num)

        edge_psnr_file = np.zeros(test_num)
        edge_ssim_file = np.zeros(test_num)
        psnr_file = np.zeros(test_num)
        ssim_file = np.zeros(test_num)

        test_size = 200
        test_down_size = test_size // self.sr_scale

        with tf.Graph().as_default():
            EPCNN_input = tf.placeholder(shape=[None, self.train_down_size, self.train_down_size, 4], dtype=tf.float32)
            GRCNN_input = tf.placeholder(shape=[None, self.train_down_size, self.train_down_size, 3], dtype=tf.float32)
            Tar_edge = tf.placeholder(shape=[None, self.train_size, self.train_size, 1], dtype=tf.float32)
            Tar_image = tf.placeholder(shape=[None, self.train_size, self.train_size, 3], dtype=tf.float32)

            EPCNN_output = self.EPCNN_inference(EPCNN_input)
            EPCNN_output = tf.clip_by_value(EPCNN_output, 0.0, 255.0)
            GRCNN_output = self.GRCNN_inference(GRCNN_input, EPCNN_output)
            GRCNN_output = tf.clip_by_value(GRCNN_output, 0.0, 255.0)

            para_num = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])
            print('Num. of Parameters: ', para_num)

            var_list = [v for v in tf.all_variables() if v.name.startswith('EPCNN') or v.name.startswith('GRCNN')]
            saver = tf.train.Saver(var_list)

            with tf.Session() as sess:
                saver.restore(sess, os.path.join(self.model_path, model))

                for i in range(test_num):
                    ep_input, gr_input, target_edge, target_image = im2tfrecord.generatingSyntheticEdge(os.path.join(test_input_path, test_input_list[i]),
                                                                                                        os.path.join(test_gt_path, test_gt_list[i]))
                    ep_input = ep_input.astype(np.float32)
                    gr_input = gr_input.astype(np.float32)
                    target_edge = target_edge.astype(np.float32)
                    target_image = target_image.astype(np.float32)
                    ep_input = np.expand_dims(ep_input, axis=0)
                    gr_input = np.expand_dims(gr_input, axis=0)
                    target_edge = np.expand_dims(target_edge, axis=0)
                    target_edge = np.expand_dims(target_edge, axis=3)
                    target_image = np.expand_dims(target_image, axis=0)

                    ep_output, gr_output = sess.run([EPCNN_output, GRCNN_output], feed_dict={EPCNN_input: ep_input, GRCNN_input: gr_input, 
                                                               Tar_edge: target_edge, Tar_image: target_image})
                    ep_output = np.squeeze(ep_output)
                    gr_output = np.squeeze(gr_output)
                    target_edge = np.squeeze(target_edge)
                    target_image = np.squeeze(target_image)
                    
                    ep_output = ep_output.astype('uint8')
                    gr_output = gr_output.astype('uint8')
                    target_edge = target_edge.astype('uint8')
                    target_image = target_image.astype('uint8')

                    edge_psnr_file[i] = psnr(ep_output, target_edge)
                    edge_ssim_file[i] = ssim(ep_output, target_edge)
                    psnr_file[i] = psnr(gr_output, target_image)
                    ssim_file[i] = ssim(gr_output, target_image)

                    save_name = test_input_list[i].split('.')[0][:-5]
                    cv2.imwrite(os.path.join(save_path, save_name + '_output_edge.png'), ep_output)
                    cv2.imwrite(os.path.join(save_path, save_name + '_output.png'), gr_output)

                print('JointFinetune: ', model)
                print('Edge PSNR: ', str(np.mean(edge_psnr_file)))
                print('Edge SSIM: ', str(np.mean(edge_ssim_file)))
                print('PSNR: ', str(np.mean(psnr_file)))
                print('SSIM: ', str(np.mean(ssim_file)))
Beispiel #55
0
def test(load_version, sift_test_list, iss_test_list, submap_id):
    print ('-----------------  START to test  -----------------')
        
    #sift_test_list = sift_test_list[submap_id-1][submap_image_id-1]
    iss_test_list = iss_test_list[submap_id-1]
    iss_test_file = "iss_test_list_txt/%03d.txt" % submap_id        
    with open(iss_test_file, 'w') as file:
        for i in range(len(iss_test_list)):
            file.write('%s\n' % iss_test_list[i])
    
    # define placeholder
    image_pl   = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 3])
    pos_pcl_pl = tf.placeholder(tf.float32, shape=[batch_size, pcl_size, 3])
    neg_pcl_pl = tf.placeholder(tf.float32, shape=[batch_size, pcl_size, 3])
    
    is_training = tf.placeholder(tf.bool)
    
    # build model
    print ('build model')
    with tf.device('/gpu:1'):   # use gpu 1 to forward
        with tf.variable_scope('image_branch') as scope:
            image_feature = vgg16(image_pl, is_training=True, output_dim=image_feature_dim,
                                  bn_decay=None)
        
        with tf.variable_scope('pointcloud_branch') as scope:
            pos_pcl_feature,_ = pointNet(pos_pcl_pl, pcl_feature_dim, is_training=is_training, 
                                       use_bn=False, bn_decay=None)
            scope.reuse_variables()
            neg_pcl_feature,_ = pointNet(neg_pcl_pl, pcl_feature_dim, is_training=is_training, 
                                       use_bn=False, bn_decay=None)

    saver = tf.train.Saver(tf.all_variables(), max_to_keep=None)  # tf.global_variables

    # run model
    print('run model...')
    config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.9
    with tf.Session(config=config) as sess:
        
        print('initialise model...')
        sess.run(tf.global_variables_initializer())
        print('   load model...')
        save_path = 'model/' + 'v1' + '/' + load_version +'_model.ckpt'
        saver.restore(sess, save_path)
        #restore_tf_model(sess)
        print("   Model loaded from: %s" % save_path)
                    
        # -------------------- evaluate model ---------------------
        print('**** Validate ...')
        print('   Compute image and pcl descriptors')
        

        iss_batch_num = len(iss_test_list) // batch_size 
        iss_test_num = iss_batch_num * batch_size      
        
        pcl_feature = np.zeros([iss_test_num, pcl_feature_dim])      
        
        # feed iss test list into the network
        batch_counter = 0
        print('-------- test iss --------------')
        for i in range(iss_batch_num):
            print("  *** iss progress: %d/%d" % (i, iss_batch_num))
            pcl_batch = getISSTestBatch(iss_test_list,i)
            feed_dict = {pos_pcl_pl:pcl_batch, is_training: False}
            pcl_batch_feature = sess.run(pos_pcl_feature, feed_dict=feed_dict)
            pcl_feature[batch_counter: batch_counter+pcl_batch_feature.shape[0],:] = pcl_batch_feature
            batch_counter += pcl_batch_feature.shape[0] 
            
        print('---------- test sift ----------')
        sift_submap_test_list = sift_test_list[submap_id-1]     # all images
        for k in range(len(sift_submap_test_list)):
            sift_test_list = sift_submap_test_list[k]       # image id: i+1            
            cam_id = sift_test_list[0].split('/')[-2]   # expected 'cam1_xxx'
            # record test_list for checking
            sift_test_file = "sift_test_list_txt/%03d_%s.txt" % (submap_id, cam_id)
            with open(sift_test_file, 'w') as file:
                for i in range(len(sift_test_list)):
                    file.write('%s\n' % sift_test_list[i])
            
            # test the patches from one image in the submap
            sift_batch_num = len(sift_test_list) // batch_size
            sift_test_num = sift_batch_num * batch_size
            img_feature = np.zeros([sift_test_num, image_feature_dim])
            
            # feed sift test list into the network
            batch_counter = 0
            print("  *** image id: %d/%d" % (k,len(sift_submap_test_list)))
            for i in range(sift_batch_num):
                #print("  *** image id: %d/%d, batch id: %d/%d" % (k, len(sift_submap_test_list), i, sift_batch_num))
                img_batch = getSIFTTestBatch(sift_test_list, i)
                #print img_batch.shape
                feed_dict = {image_pl:img_batch, is_training: False}
                img_batch_feature = sess.run(image_feature, feed_dict=feed_dict)
                #print type(img_batch_feature)
                img_feature[batch_counter: batch_counter+img_batch_feature.shape[0],:] = img_batch_feature
                batch_counter += img_batch_feature.shape[0]          
            
            # compute distance array between img_feature and pcl_feature
            img_vec = np.sum(np.multiply(img_feature, img_feature), axis=1, keepdims=True)
            pcl_vec = np.sum(np.multiply(pcl_feature, pcl_feature), axis=1, keepdims=True)
            dist_array = img_vec + np.transpose(pcl_vec) - 2*np.matmul(img_feature, np.transpose(pcl_feature))
            print("  image patch num: %d, submap pcl num: %d" % (dist_array.shape[0], dist_array.shape[1]))
        
            # find correspondences and record
            # img_pcl_correspondences = [];
            cam_id = sift_test_list[0].split('/')[-2]
            txt_folder = "%s/%03d" % (sift_iss_correspond_dir, submap_id)
            if not os.path.exists(txt_folder):
                os.makedirs(txt_folder)
            txt_file_path = "%s/%s.txt" % (txt_folder, cam_id)
            top_k = 10
            with open(txt_file_path, "w") as file:
                for i in range(dist_array.shape[0]):
                    #min_dist_id = np.argmin(dist_array[i,:])
                    min_dist_id  = np.argsort(dist_array[i,:])[:top_k]
                    idx = np.concatenate((np.array([i+1]), min_dist_id+1))
                    #print(idx)
                    idx=idx.reshape(1, idx.shape[0])
                    np.savetxt(file, idx,fmt='%d')
Beispiel #56
0
def train_and_eval():
    with tf.Graph().as_default():
        # common part on cpu
        with tf.device('/cpu:0'):
            # train/test phase indicator
            phase_train = tf.placeholder(tf.bool, name='phase_train')

            # learning rate is manually set
            learning_rate = tf.placeholder(tf.float32, name='learning_rate')

            # global step
            global_step = tf.Variable(0, trainable=False, name='global_step')

            # optimizer
            learning_rate_weights = learning_rate
            learning_rate_biases = 2.0 * learning_rate  # double learning rate for biases
            optim_weights = tf.train.MomentumOptimizer(learning_rate_weights,
                                                       0.9)
            optim_biases = tf.train.MomentumOptimizer(learning_rate_biases,
                                                      0.9)

        gpu_grads = []
        # per gpu
        for i in xrange(FLAGS.num_gpu):
            print('Initialize the {0}th gpu'.format(i))
            with tf.device('/gpu:{0}'.format(i)):
                with tf.name_scope('gpu_{0}'.format(i)) as scope:
                    if i > 0:
                        m.add_to_collection = False

                    # only one gpu's is used
                    # only one gpu's info is printed out, but all summaried
                    # multigpu is actived by the train_op (because of the average_gradients as a sync barrier)
                    # when test, only one gpu is used
                    loss, accuracy, logits = loss_and_accuracy_per_gpu(
                        phase_train, scope)

                    # Reuse variables
                    tf.get_variable_scope().reuse_variables()

                    weights, biases = tf.get_collection(
                        'weights'), tf.get_collection('biases')
                    assert (len(weights) + len(biases) == len(
                        tf.trainable_variables()))

                    params = weights + biases
                    gradients = tf.gradients(loss, params, name='gradients')
                    gpu_grads.append(gradients)
        # add summary for all the entropy_losses and weight_l2_loss
        m.summary_losses()

        with tf.device('/cpu:0'):
            # set up train_op
            weights, biases = tf.get_collection('weights'), tf.get_collection(
                'biases')
            averaged_grads = average_gradients(gpu_grads)
            weights_grads = averaged_grads[:len(weights)]
            biases_grads = averaged_grads[len(weights):]
            apply_weights_op = optim_weights.apply_gradients(
                zip(weights_grads, weights), global_step=global_step)
            apply_biases_op = optim_biases.apply_gradients(
                zip(biases_grads, biases), global_step=global_step)
            train_op = tf.group(apply_weights_op, apply_biases_op)

            # saver
            saver = tf.train.Saver(tf.all_variables())

            # start session
            sess = tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                                    allow_soft_placement=True))

            # summary
            summary_op = tf.merge_all_summaries()
            summary_writer = tf.train.SummaryWriter(FLAGS.log_dir,
                                                    graph_def=sess.graph_def)
            for var in tf.trainable_variables():
                tf.histogram_summary('params/' + var.op.name, var)

            # initialization
            init_op = tf.initialize_all_variables()

        if FLAGS.restore_path is None:
            print('Initializing...')
            sess.run(init_op, {phase_train.name: True})
        else:
            print('Restore variable from %s' % FLAGS.restore_path)
            saver.restore(sess, FLAGS.restore_path)

        # train loop
        tf.train.start_queue_runners(sess=sess)
        curr_lr = 0.0
        lr_scale = 1.0
        # NOTE: the interval should be the multiple of the num_gpu
        for step in xrange(0, FLAGS.max_steps, FLAGS.num_gpu):
            # set learning rate manually
            if step <= 32000:
                _lr = lr_scale * 1e-1
            elif step <= 48000:
                _lr = lr_scale * 1e-2
            else:
                _lr = lr_scale * 1e-3
            if curr_lr != _lr:
                curr_lr = _lr
                print('Learning rate set to %f' % curr_lr)

            fetches = [train_op, loss]
            if step % FLAGS.summary_interval == 0:
                fetches += [accuracy, summary_op]
            sess_outputs = sess.run(fetches, {
                phase_train.name: True,
                learning_rate.name: curr_lr
            })

            if step % FLAGS.summary_interval == 0:
                train_loss_value, train_acc_value, summary_str = sess_outputs[
                    1:]
                print(
                    '[%s] Iteration %d, train loss = %f, train accuracy = %f' %
                    (datetime.now(), step, train_loss_value, train_acc_value))
                summary_writer.add_summary(summary_str, step)

            if step > 0 and step % FLAGS.save_interval == 0:
                checkpoint_path = os.path.join(FLAGS.log_dir, 'checkpoint')
                saver.save(sess, checkpoint_path, global_step=step)
                print('Checkpoint saved at %s' % checkpoint_path)
Beispiel #57
0
def train():
    print('[Dataset Configuration]')
    print('\tCIFAR-100 dir: %s' % FLAGS.data_dir)
    print('\tNumber of classes: %d' % FLAGS.num_classes)
    print('\tNumber of test images: %d' % FLAGS.num_test_instance)

    print('[Network Configuration]')
    print('\tBatch size: %d' % FLAGS.batch_size)
    #print('\tResidual blocks per group: %d' % FLAGS.num_residual_units)
    #print('\tNetwork width multiplier: %d' % FLAGS.k)

    print('[Testing Configuration]')
    print('\tCheckpoint path: %s' % FLAGS.ckpt_path)
    print('\tDataset: %s' % ('Training' if FLAGS.train_data else 'Test'))
    print('\tNumber of testing iterations: %d' % FLAGS.test_iter)
    print('\tOutput path: %s' % FLAGS.output)
    print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction)
    print('\tLog device placement: %d' % FLAGS.log_device_placement)

    with tf.Graph().as_default():
        # The CIFAR-100 dataset
        with tf.variable_scope('test_image'):
            test_images, test_labels = data_input.inputs(
                not FLAGS.train_data, FLAGS.data_dir, FLAGS.batch_size)

        # The class labels
        with open(os.path.join(FLAGS.data_dir, 'batches.meta.txt')) as fd:
            classes = [temp.strip() for temp in fd.readlines()]

        # Build a Graph that computes the predictions from the inference model.
        images = tf.placeholder(tf.float32, [
            FLAGS.batch_size, data_input.IMAGE_SIZE, data_input.IMAGE_SIZE, 3
        ])
        labels = tf.placeholder(tf.int32, [FLAGS.batch_size])

        # Build model
        decay_step = FLAGS.lr_step_epoch * FLAGS.num_train_instance / FLAGS.batch_size
        hp = wrinc.HParams(batch_size=FLAGS.batch_size,
                           num_classes=FLAGS.num_classes,
                           initial_lr=FLAGS.initial_lr,
                           decay_step=decay_step,
                           lr_decay=FLAGS.lr_decay,
                           momentum=FLAGS.momentum)
        network = wrinc.wrinc(hp, images, labels, None)
        network.build_model()
        # network.build_train_op()  # NO training op

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=FLAGS.gpu_fraction),
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000)
        if os.path.isdir(FLAGS.ckpt_path):
            ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_path)
            # Restores from checkpoint
            if ckpt and ckpt.model_checkpoint_path:
                print('\tRestore from %s' % ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print('No checkpoint file found in the dir [%s]' %
                      FLAGS.ckpt_path)
                sys.exit(1)
        elif os.path.isfile(FLAGS.ckpt_path):
            print('\tRestore from %s' % FLAGS.ckpt_path)
            saver.restore(sess, FLAGS.ckpt_path)
        else:
            print('No checkpoint file found in the path [%s]' %
                  FLAGS.ckpt_path)
            sys.exit(1)

        # Start queue runners
        tf.train.start_queue_runners(sess=sess)

        # Testing!
        result_ll = [[0, 0] for _ in range(FLAGS.num_classes)
                     ]  # Correct/wrong counts for each class
        test_loss = 0.0, 0.0
        for i in range(FLAGS.test_iter):
            test_images_val, test_labels_val = sess.run(
                [test_images, test_labels])
            preds_val, loss_value, acc_value = sess.run(
                [network.preds, network.loss, network.acc],
                feed_dict={
                    network.is_train: False,
                    images: test_images_val,
                    labels: test_labels_val
                })
            test_loss += loss_value
            for j in range(FLAGS.batch_size):
                correct = 0 if test_labels_val[j] == preds_val[j] else 1
                result_ll[test_labels_val[j] % FLAGS.num_classes][correct] += 1
        test_loss /= FLAGS.test_iter

        # Summary display & output
        acc_list = [float(r[0]) / float(r[0] + r[1]) for r in result_ll]
        result_total = np.sum(np.array(result_ll), axis=0)
        acc_total = float(result_total[0]) / np.sum(result_total)

        print 'Class    \t\t\tT\tF\tAcc.'
        format_str = '%-31s %7d %7d %.5f'
        for i in range(FLAGS.num_classes):
            print format_str % (classes[i], result_ll[i][0], result_ll[i][1],
                                acc_list[i])
        print(format_str %
              ('(Total)', result_total[0], result_total[1], acc_total))

        # Output to file(if specified)
        if FLAGS.output.strip():
            with open(FLAGS.output, 'w') as fd:
                fd.write('Class    \t\t\tT\tF\tAcc.\n')
                format_str = '%-31s %7d %7d %.5f'
                for i in range(FLAGS.num_classes):
                    t, f = result_ll[i]
                    format_str = '%-31s %7d %7d %.5f\n'
                    fd.write(format_str %
                             (classes[i].replace(' ', '-'), t, f, acc_list[i]))
                fd.write(
                    format_str %
                    ('(Total)', result_total[0], result_total[1], acc_total))
Beispiel #58
0
def train():
    print "Building training graph ..."
    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-FLAGS.init_scale,
                                                    FLAGS.init_scale)
        with tf.variable_scope("char-rnn", initializer=initializer):
            keep_prob = tf.placeholder(dtype=tf.float32,
                                       shape=[],
                                       name='keep_prob')
            cell = model.build_cell(keep_prob)

            inputs = tf.placeholder(dtype=tf.int32,
                                    shape=[FLAGS.batch_size, FLAGS.num_steps],
                                    name='inputs')
            targets = tf.placeholder(dtype=tf.int32,
                                     shape=[FLAGS.batch_size, FLAGS.num_steps],
                                     name='targets')
            lr = tf.placeholder(dtype=tf.float32,
                                shape=[],
                                name='learning_rate')
            initial_state = tf.placeholder(
                dtype=tf.float32,
                shape=[FLAGS.batch_size, cell.state_size],
                name='initial_state')

            logits, final_state = model.predict(inputs, cell, initial_state,
                                                keep_prob)
            loss = model.loss(logits, targets)
            train_op = model.train_batch(loss, lr)

        # create saver and summary
        saver = tf.train.Saver(tf.all_variables())
        summary_op = tf.merge_all_summaries()

        sess = tf.Session()
        sess.run(tf.initialize_all_variables())
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                graph_def=sess.graph_def)

        # load data
        print "Loading data ..."
        reader = text_input.TextReader(
            os.path.join(FLAGS.data_dir, FLAGS.data_file))
        reader.prepare_data()
        train_loader = text_input.DataLoader(
            os.path.join(FLAGS.data_dir, 'train.cPickle'), FLAGS.batch_size,
            FLAGS.num_steps)
        test_loader = text_input.DataLoader(
            os.path.join(FLAGS.data_dir, 'test.cPickle'), FLAGS.batch_size,
            FLAGS.num_steps)

        total_steps = FLAGS.num_epochs * train_loader.num_batch
        save_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        zero_state = cell.zero_state(FLAGS.batch_size,
                                     dtype=tf.float32).eval(session=sess)
        global_step = 0

        def eval(sess, loader, state):
            test_loss = 0.
            for _ in xrange(loader.num_batch):
                x_batch, y_batch = loader.next_batch()
                feed = {
                    inputs: x_batch,
                    targets: y_batch,
                    keep_prob: 1.,
                    initial_state: state
                }
                state, loss_value = sess.run([final_state, loss],
                                             feed_dict=feed)
                test_loss += loss_value
            return test_loss / loader.num_batch

        # training
        for epoch in xrange(FLAGS.num_epochs):
            current_lr = FLAGS.init_lr * (FLAGS.lr_decay**(max(
                epoch - FLAGS.decay_after + 1, 0)))
            state = zero_state
            training_loss = 0.
            for _ in xrange(train_loader.num_batch):
                global_step += 1
                start_time = time.time()
                x_batch, y_batch = train_loader.next_batch()
                feed = {
                    inputs: x_batch,
                    targets: y_batch,
                    keep_prob: (1. - FLAGS.dropout),
                    lr: current_lr,
                    initial_state: state
                }
                state, loss_value, _ = sess.run([final_state, loss, train_op],
                                                feed_dict=feed)
                duration = time.time() - start_time
                training_loss += loss_value

                if global_step % FLAGS.log_steps == 0:
                    format_str = (
                        '%s: step %d/%d (epoch %d/%d), loss = %.2f (%.3f sec/batch), lr: %.5f'
                    )
                    print(format_str %
                          (datetime.now(), global_step, total_steps, epoch + 1,
                           FLAGS.num_epochs, loss_value, duration, current_lr))

                if global_step % FLAGS.summary_steps == 0:
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, global_step)

            if epoch % FLAGS.save_epochs == 0:
                saver.save(sess, save_path, global_step)
            train_loader.reset_pointer()

            # epoch summary
            training_loss /= train_loader.num_batch
            summary_writer.add_summary(
                _summary_for_scalar('training_loss', training_loss),
                global_step)
            test_loss = eval(sess, test_loader, zero_state)
            test_loader.reset_pointer()
            summary_writer.add_summary(
                _summary_for_scalar('test_loss', test_loss), global_step)
            print("Epoch %d: training_loss = %.2f, test_loss = %.2f" %
                  (epoch + 1, training_loss, test_loss))
Beispiel #59
0
                    #print tf.cast(predicted_labels, tf.int32),labels_ph
                    correct_prediction = tf.equal(predicted,
                                                  labels_ph)  #预测值与label进行比较
                    accuracy = tf.reduce_mean(
                        tf.cast(correct_prediction, tf.float32))
                    print("训练图片识别:{0}".format(
                        accuracy.eval({
                            images_ph: sample_images,
                            labels_ph: sample_labels
                        })))
                    #a = accuracy.eval({images_ph: sample_images, labels_ph: sample_labels})
                    #print '最终的测试正确率:{4}'.format(accuracy)
                    #print '最终的测试正确率:{4}'.format(predicted)

                    # save the variables on disk
                    variables = tf.all_variables()
                    saver = tf.train.Saver(variables)
                    saver.save(sess, "data/data.ckpt")

                    # save the model to file and wo can use it predict sth like images
                    # tf.train.write_graph(sess.graph_def, 'graph', 'model.ph', False)
                    '''
                node_seq = {}  # Keyed by node name.
                seq = 4
                for node in g.as_graph_def().node:

                    seq += 1
                print g.as_graph_def().node[22]
                print seq
                '''
                    #a ="Accuracy/predicted_labels".split('.')
Beispiel #60
0
    def __init__(self,
                 phase,
                 visualize,
                 data_path,
                 output_dir,
                 batch_size,
                 initial_learning_rate,
                 num_epoch,
                 steps_per_checkpoint,
                 target_vocab_size,
                 model_dir,
                 target_embedding_size,
                 attn_num_hidden,
                 attn_num_layers,
                 clip_gradients,
                 max_gradient_norm,
                 session,
                 load_model,
                 gpu_id,
                 use_gru,
                 use_distance=True,
                 max_image_width=160,
                 max_image_height=60,
                 max_prediction_length=8,
                 reg_val=0):

        self.use_distance = use_distance

        # We need resized width, not the actual width
        self.max_original_width = max_image_width
        self.max_width = int(math.ceil(1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT))

        self.encoder_size = int(math.ceil(1. * self.max_width / 4))
        self.decoder_size = max_prediction_length + 2
        self.buckets = [(self.encoder_size, self.decoder_size)]

        gpu_device_id = '/gpu:' + str(gpu_id)
        self.gpu_device_id = gpu_device_id
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        logging.info('loading data')
        # load data
        if phase == 'train':
            self.s_gen = DataGen(data_path, self.buckets, epochs=num_epoch, max_width=self.max_original_width)
        else:
            batch_size = 1
            self.s_gen = DataGen(data_path, self.buckets, epochs=1, max_width=self.max_original_width)

        logging.info('phase: %s' % phase)
        logging.info('model_dir: %s' % (model_dir))
        logging.info('load_model: %s' % (load_model))
        logging.info('output_dir: %s' % (output_dir))
        logging.info('steps_per_checkpoint: %d' % (steps_per_checkpoint))
        logging.info('batch_size: %d' % (batch_size))
        logging.info('num_epoch: %d' % num_epoch)
        logging.info('learning_rate: %d' % initial_learning_rate)
        logging.info('reg_val: %d' % (reg_val))
        logging.info('max_gradient_norm: %f' % max_gradient_norm)
        logging.info('clip_gradients: %s' % clip_gradients)
        logging.info('max_image_width %f' % max_image_width)
        logging.info('max_prediction_length %f' % max_prediction_length)
        logging.info('target_vocab_size: %d' % target_vocab_size)
        logging.info('target_embedding_size: %f' % target_embedding_size)
        logging.info('attn_num_hidden: %d' % attn_num_hidden)
        logging.info('attn_num_layers: %d' % attn_num_layers)
        logging.info('visualize: %s' % visualize)

        if use_gru:
            logging.info('using GRU in the decoder.')

        self.reg_val = reg_val
        self.sess = session
        self.steps_per_checkpoint = steps_per_checkpoint
        self.model_dir = model_dir
        self.output_dir = output_dir
        self.batch_size = batch_size
        self.num_epoch = num_epoch
        self.global_step = tf.Variable(0, trainable=False)
        self.phase = phase
        self.visualize = visualize
        self.learning_rate = initial_learning_rate
        self.clip_gradients = clip_gradients

        if phase == 'train':
            self.forward_only = False
        elif phase == 'test':
            self.forward_only = True
        else:
            assert False, phase

        with tf.device(gpu_device_id):

            self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32)
            self.height_float = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.float64)

            self.img_pl = tf.placeholder(tf.string, name='input_image_as_bytes')
            self.img_data = tf.cond(
                tf.less(tf.rank(self.img_pl), 1),
                lambda: tf.expand_dims(self.img_pl, 0),
                lambda: self.img_pl
            )
            self.img_data = tf.map_fn(self._prepare_image, self.img_data, dtype=tf.float32)
            num_images = tf.shape(self.img_data)[0]

            # TODO: create a mask depending on the image/batch size
            self.encoder_masks = []
            for i in xrange(self.encoder_size + 1):
                self.encoder_masks.append(
                    tf.tile([[1.]], [num_images, 1])
                )

            self.decoder_inputs = []
            self.target_weights = []
            for i in xrange(self.decoder_size + 1):
                self.decoder_inputs.append(
                    tf.tile([0], [num_images])
                )
                if i < self.decoder_size:
                    self.target_weights.append(tf.tile([1.], [num_images]))
                else:
                    self.target_weights.append(tf.tile([0.], [num_images]))

            # TODO: not 2, 2 is static (???)

            self.zero_paddings = tf.zeros([num_images, 2, 512], dtype=np.float32)

            cnn_model = CNN(self.img_data, True)
            self.conv_output = cnn_model.tf_output()
            self.concat_conv_output = tf.concat(axis=1, values=[self.conv_output, self.zero_paddings])
            self.perm_conv_output = tf.transpose(self.concat_conv_output, perm=[1, 0, 2])
            self.attention_decoder_model = Seq2SeqModel(
                encoder_masks=self.encoder_masks,
                encoder_inputs_tensor=self.perm_conv_output,
                decoder_inputs=self.decoder_inputs,
                target_weights=self.target_weights,
                target_vocab_size=target_vocab_size,
                buckets=self.buckets,
                target_embedding_size=target_embedding_size,
                attn_num_layers=attn_num_layers,
                attn_num_hidden=attn_num_hidden,
                forward_only=self.forward_only,
                use_gru=use_gru)

            table = tf.contrib.lookup.MutableHashTable(
                key_dtype=tf.int64,
                value_dtype=tf.string,
                default_value="",
                checkpoint=True,
            )

            insert = table.insert(
                tf.constant(range(len(DataGen.CHARMAP)), dtype=tf.int64),
                tf.constant(DataGen.CHARMAP),
            )

            with tf.control_dependencies([insert]):

                num_feed = []

                for l in xrange(len(self.attention_decoder_model.output)):
                    guess = tf.argmax(self.attention_decoder_model.output[l], axis=1)
                    num_feed.append(guess)

                trans_output = tf.transpose(num_feed)
                trans_output = tf.map_fn(
                    lambda m: tf.foldr(
                        lambda a, x: tf.cond(
                            tf.equal(x, DataGen.EOS_ID),
                            lambda: '',
                            lambda: table.lookup(x) + a
                        ),
                        m,
                        initializer=''
                    ),
                    trans_output,
                    dtype=tf.string
                )

                self.prediction = tf.cond(
                    tf.equal(tf.shape(trans_output)[0], 1),
                    lambda: trans_output[0],
                    lambda: trans_output
                )

            if not self.forward_only:  # train
                self.updates = []
                self.summaries_by_bucket = []

                params = tf.trainable_variables()
                opt = tf.train.AdadeltaOptimizer(learning_rate=initial_learning_rate)

                if self.reg_val > 0:
                    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
                    logging.info('Adding %s regularization losses', len(reg_losses))
                    logging.debug('REGULARIZATION_LOSSES: %s', reg_losses)
                    loss_op = self.reg_val * tf.reduce_sum(reg_losses) + self.attention_decoder_model.loss
                else:
                    loss_op = self.attention_decoder_model.loss

                gradients, params = zip(*opt.compute_gradients(loss_op, params))
                if self.clip_gradients:
                    gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
                # Add summaries for loss, variables, gradients, gradient norms and total gradient norm.
                summaries = []
                summaries.append(tf.summary.scalar("loss", loss_op))
                summaries.append(tf.summary.scalar("total_gradient_norm", tf.global_norm(gradients)))
                all_summaries = tf.summary.merge(summaries)
                self.summaries_by_bucket.append(all_summaries)
                # update op - apply gradients
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    self.updates.append(opt.apply_gradients(zip(gradients, params), global_step=self.global_step))


        self.saver_all = tf.train.Saver(tf.all_variables())
        self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt")

        ckpt = tf.train.get_checkpoint_state(model_dir)
        if ckpt and load_model:
            logging.info("Reading model parameters from %s" % ckpt.model_checkpoint_path)
            self.saver_all.restore(self.sess, ckpt.model_checkpoint_path)
        else:
            logging.info("Created model with fresh parameters.")
            self.sess.run(tf.initialize_all_variables())