예제 #1
0
    def __call__(self, flow=None):
        """Constructs the layer in `Tensorflow` graph.

        Args:
            flow: This argument is ignored. (Default value = None)

        Returns:
            Output of this layer.

        """

        with tf.variable_op_scope([flow], self.name, 'Embedding', reuse=self.reuse):
            if not self.reuse:
                self._table_loader = tf.placeholder(tf.float32, shape=self._init_values.shape, name='loader')
                self._lookup_table = tf.get_variable(
                    'lookup_table',
                    initializer=self._table_loader,
                    trainable=self.trainable)
                self.params.append(self._lookup_table)
                tf.initialize_variables(self.params).run(feed_dict={self._table_loader: self._init_values})
                self.reuse = True

            flow = tf.placeholder(tf.int64, [None] + self._input_shape, 'input')
            tf.add_to_collection(GraphKeys.MODEL_INPUTS, flow)
            flow = tf.nn.embedding_lookup(self._lookup_table, flow)

        tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, flow)
        return flow
예제 #2
0
  def testInitializeFromValue(self):
    with self.test_session() as sess:
      init = tf.constant(0.1)
      w = tf.get_variable("v", initializer=init)
      sess.run(tf.initialize_variables([w]))
      self.assertAllClose(w.eval(), 0.1)

      with self.assertRaisesRegexp(ValueError, "shape"):
        # We disallow explicit shape specification when initializer is constant.
        tf.get_variable("u", [1], initializer=init)

      with tf.variable_scope("foo", initializer=init):
        # Constant initializer can be passed through scopes if needed.
        v = tf.get_variable("v")
        sess.run(tf.initialize_variables([v]))
        self.assertAllClose(v.eval(), 0.1)

      # Check that non-float32 initializer creates a non-float32 variable.
      init = tf.constant(1, dtype=tf.int32)
      t = tf.get_variable("t", initializer=init)
      self.assertEqual(t.dtype.base_dtype, tf.int32)

      # Raise error if `initializer` dtype and `dtype` are not identical.
      with self.assertRaisesRegexp(ValueError, "don't match"):
        tf.get_variable("s", initializer=init, dtype=tf.float64)
예제 #3
0
 def testVarScopeRegularizer(self):
   with self.test_session() as sess:
     init = tf.constant_initializer(0.3)
     def regularizer1(v):
       return tf.reduce_mean(v) + 0.1
     def regularizer2(v):
       return tf.reduce_mean(v) + 0.2
     with tf.variable_scope("tower", regularizer=regularizer1) as tower:
       with tf.variable_scope("foo", initializer=init):
         v = tf.get_variable("v", [])
         sess.run(tf.initialize_variables([v]))
         losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
         self.assertEqual(1, len(losses))
         self.assertAllClose(losses[0].eval(), 0.4)
       with tf.variable_scope(tower, initializer=init) as vs:
         u = tf.get_variable("u", [])
         vs.set_regularizer(regularizer2)
         w = tf.get_variable("w", [])
         # Next 3 variable not regularized to test disabling regularization.
         x = tf.get_variable("x", [], regularizer=tf.no_regularizer)
         with tf.variable_scope("baz", regularizer=tf.no_regularizer):
           y = tf.get_variable("y", [])
         vs.set_regularizer(tf.no_regularizer)
         z = tf.get_variable("z", [])
         # Check results.
         losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
         self.assertEqual(3, len(losses))
         sess.run(tf.initialize_variables([u, w, x, y, z]))
         self.assertAllClose(losses[0].eval(), 0.4)
         self.assertAllClose(losses[1].eval(), 0.4)
         self.assertAllClose(losses[2].eval(), 0.5)
       with tf.variable_scope("foo", reuse=True):
         v = tf.get_variable("v", [])  # "v" is alredy there, reused
         losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
         self.assertEqual(3, len(losses))  # No new loss added.
예제 #4
0
  def evaluate_model(self, accuracy, num_steps, feed_vars=(), feed_data=None,
                     summary_tag=None, print_every=0):
    """Evaluates the given model.

    Args:
      accuracy: The metric that is being evaluated.
      num_steps: The number of steps to run in the evaluator.
      feed_vars: A list or tuple of the variables that will be fed.
      feed_data: A generator that produces tuples of the same length as
        feed_vars.
      summary_tag: If provided, the final result of running the model will be
        published to this tag.
      print_every: Print a summary every so many steps, use 0 to disable.
    Returns:
      The accuracy.
    """
    test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES)
    if test_vars:
      tf.initialize_variables(test_vars).run()
    result = self.run_model([accuracy],
                            num_steps,
                            feed_vars=feed_vars,
                            feed_data=feed_data,
                            print_every=print_every,
                            allow_initialize=False)
    if summary_tag and self._summary_writer:
      summary = tf.Summary(
          value=[tf.Summary.Value(tag=summary_tag,
                                  simple_value=float(result[1]))])
      event = tf.Event(wall_time=time.time(),
                       summary=summary,
                       step=int(result[0]))
      self._summary_writer.add_event(event)
    return result[1]
    def train(self, session, text, num_steps):
        """ Train embeddings on given text"""
        generator = bigram_batch.SkipgramGenerator(
            text, self._batch_size, self._num_skips)

        is_own = lambda x: x.name.startswith(self._scope_name)
        tf.initialize_variables(filter(is_own, tf.all_variables())).run()
        print('Initialized')
        average_loss = 0
        step = 0
        while step < num_steps:
            batches_labels = zip(*generator.next())
            for step, (batch, label) in enumerate(batches_labels, step):
                feed_dict = {self._train_dataset: batch,
                             self._train_labels: label.reshape(label.shape[0], 1)}

                _, l = session.run(
                    [self._optimizer, self._loss], feed_dict=feed_dict)
                average_loss += l
                if step % 2000 == 0:
                    if step > 0:
                        average_loss = average_loss / 2000
                    # The average loss is an estimate of the loss over the last
                    # 2000 batches.
                    print('Average loss at step %d: %f' % (step, average_loss))
                    average_loss = 0
예제 #6
0
 def test_variable(self):
   with self.test_session() as sess:
     x = tf.Variable(2.0, name="CustomName")
     y = tf.constant(3.0)
     z = x * y
     z_new = copy(z)
     tf.initialize_variables([x]).run()
     self.assertEqual(z_new.eval(), 6.0)
예제 #7
0
 def test_tensor_variable(self):
   with self.test_session() as sess:
     x = tf.constant(2.0)
     y = tf.constant(3.0)
     z = x * y
     qx = tf.Variable(4.0, name="CustomName")
     z_new = copy(z, {x: qx})
     tf.initialize_variables([qx]).run()
     self.assertEqual(z_new.eval(), 12.0)
예제 #8
0
파일: hp2trend.py 프로젝트: ahangchen/NN
 def init_vars(self, init_hp, session, reset_hp=False):
     print(init_hp)
     init_feed = dict()
     init_feed[self.ph_hypers] = init_hp
     if os.path.exists(self.save_path):
         # Restore variables from disk.
         self.saver.restore(session, self.save_path)
         if reset_hp:
             tf.initialize_variables(var_list=self.reset_vars).run(feed_dict=init_feed)
     else:
         tf.initialize_all_variables().run(feed_dict=init_feed)
def var_collection_example():
    g1 = tf.Graph()
    with g1.as_default():
        with tf.name_scope('scope1') as scope1:
            a = tf.Variable(tf.constant(1.0, shape=[1]), name='a')
            b = tf.Variable(tf.constant(2.0, shape=[1]), name='b')
        with tf.name_scope('scope2') as scope2:
            c = tf.Variable(tf.constant(3.0, shape=[1]), name='c')

    g2 = tf.Graph()
    with g2.as_default():
        with tf.name_scope('scope1') as scope1:
            a = tf.Variable(tf.constant(4.0, shape=[1]), name='a')
            b = tf.Variable(tf.constant(5.0, shape=[1]), name='b')
        with tf.name_scope('scope2') as scope2:
            c = tf.Variable(tf.constant(6.0, shape=[1]), name='c')

    vars_g1 = var_collect.collect_all(graph=g1)
    vars_g1_scope1 = var_collect.collect_scope('scope1', graph=g1)
    var_g1_scope1_a = var_collect.collect_name('scope1/a', graph=g1)

    vars_g2 = var_collect.collect_all(graph=g2)
    vars_g2_dict = var_collect.collect_list(
        ['scope1/a', 'scope1/b', 'scope2/c'],
        graph=g2)

    sess = tf.Session(graph=g1)
    sess.run(tf.initialize_variables(vars_g1))
    y_hat = [var.eval(sess)[0] for var in vars_g1]
    y = [1.0, 2.0, 3.0]
    print 'Graph g1: '
    print 'y: [' + ', '.join([str(l) for l in y]) + ']'
    print 'y_hat: [' + ', '.join([str(l) for l in y_hat]) + ']'
    sess.close()

    sess = tf.Session(graph=g2)
    sess.run(tf.initialize_variables(vars_g2))
    y_hat = [var.eval(sess)[0] for var in vars_g2]
    y = [4.0, 5.0, 6.0]
    print 'Graph g2: '
    print 'y: [' + ', '.join([str(l) for l in y]) + ']'
    print 'y_hat: [' + ', '.join([str(l) for l in y_hat]) + ']'

    var_collect.print_var_list(vars_g1, name='vars_g1')
    var_collect.print_var_list(vars_g2, name='vars_g2')
    var_collect.print_var_list(vars_g1_scope1, name='vars_g1_scope1')
    var_collect.print_var_list([var_g1_scope1_a], name='vars_g1_scope1_a')

    print 'vars_g2_dict = {'
    for key, value in vars_g2_dict.items():
        print '    {}: {},'.format(key, value.eval(sess)[0])
    print '}'
    sess.close()
예제 #10
0
    def testInitFromNonTensorValue(self):
        with self.test_session() as sess:
            v = tf.get_variable("v", initializer=4, dtype=tf.int32)
            sess.run(tf.initialize_variables([v]))
            self.assertAllClose(v.eval(), 4)

            w = tf.get_variable("w", initializer=numpy.array([1, 2, 3]), dtype=tf.int32)
            sess.run(tf.initialize_variables([w]))
            self.assertAllClose(w.eval(), [1, 2, 3])

            with self.assertRaises(TypeError):
                tf.get_variable("x", initializer={})
예제 #11
0
 def testVarScopeIntializer(self):
   with self.test_session() as sess:
     init = tf.constant_initializer(0.3)
     with variable_scope.variable_scope("tower") as tower:
       with variable_scope.variable_scope("foo", initializer=init):
         v = variable_scope.get_variable("v", [])
         sess.run(tf.initialize_variables([v]))
         self.assertAllClose(v.eval(), 0.3)
       with variable_scope.variable_scope(tower, initializer=init):
         w = variable_scope.get_variable("w", [])
         sess.run(tf.initialize_variables([w]))
         self.assertAllClose(w.eval(), 0.3)
예제 #12
0
파일: enc_dec.py 프로젝트: amoliu/deeprl
    def __init__(self, settings, session):
        self.s = session

        self.action_type = settings["action"]["type"]
        if self.action_type == "discrete":
            self.num_actions = settings["action"]["num_actions"]
        else:
            assert False, "Unknown action type:" % (self.action_type,)

        self.create_variables(settings)
        self.s.run(tf.initialize_variables(self.variables()))
        self.s.run(tf.initialize_variables(self.gradients()))
예제 #13
0
 def test_local_variable(self):
   with self.test_session() as sess:
     self.assertEquals([], tf.local_variables())
     value0 = 42
     tf.contrib.framework.local_variable(value0)
     value1 = 43
     tf.contrib.framework.local_variable(value1)
     variables = tf.local_variables()
     self.assertEquals(2, len(variables))
     self.assertRaises(tf.OpError, sess.run, variables)
     tf.initialize_variables(variables).run()
     self.assertAllEqual(set([value0, value1]), set(sess.run(variables)))
예제 #14
0
    def __call__(self, flow):
        """Applies this layer to the input `Tensor` and returns the output `Tensor`.

        Args:
            flow: The input `Tensor`.

        Returns:
            Output of this layer.

        """

        with tf.variable_op_scope([flow], self.name, 'Conv', reuse=self.reuse):
            if not self.reuse:
                full_shape = self._filter_shape + [flow.get_shape()[-1].value, self._n_output_channels]
                self.filter = tf.get_variable(
                    'filter',
                    full_shape,
                    initializer=self._weight_init,
                    regularizer=self._weight_regularizer,
                    trainable=self.trainable)
                self.params.append(self.filter)
                tf.add_to_collection(tf.GraphKeys.WEIGHTS, self.filter)

                if self._has_bias:
                    self.bias = tf.get_variable(
                        'bias',
                        self._n_output_channels,
                        initializer=self._bias_init,
                        regularizer=self._bias_regularizer,
                        trainable=self.trainable)
                    self.params.append(self.bias)
                    tf.add_to_collection(tf.GraphKeys.BIASES, self.bias)

                tf.initialize_variables(self.params).run()
                self.reuse = True

            flow = tf.nn.conv2d(
                flow,
                self.filter,
                [1] + self._strides + [1],
                self._padding,
                self._use_cudnn_on_gpu)

            flow = tf.nn.bias_add(flow, self.bias)

            if self._activation_fn is not None:
                flow = self._activation_fn(flow)

        tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, flow)
        return flow
예제 #15
0
def create_model(session, forward_only, batch_size=FLAGS["batch_size"], model_name = SAVE_NAME):
	"""Create translation model and initialize or load parameters in session."""
	model = seq2seq_model.Seq2SeqModel(
			FLAGS["source_vocab_size"], FLAGS["target_vocab_size"], _buckets,
			FLAGS["size"], FLAGS["num_layers"], FLAGS["max_gradient_norm"], batch_size,
			FLAGS["learning_rate"], FLAGS["learning_rate_decay_factor"],
			forward_only=forward_only)
	ckpt = tf.train.get_checkpoint_state("src/model/forex_trader/"+model_name)
	if ckpt:
	# if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
		print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
		model.saver.restore(session, ckpt.model_checkpoint_path)

		if not forward_only:
			# set new learning rate
			print("Old Learning Rate: ",model.learning_rate.eval(session=session))
			new_learning_rate = tf.Variable(float(FLAGS["learning_rate"]), trainable=False)
			op = tf.assign(model.learning_rate, new_learning_rate)
			op_init = tf.initialize_variables([new_learning_rate])
			session.run([op_init])
			session.run([op])
			print("New Learning Rate: ",model.learning_rate.eval(session=session))

	else:
		print("Creating model with fresh parameters.")
		session.run(tf.initialize_all_variables())

	

	return model
예제 #16
0
    def _create_state(self):
        """Prepare stateful variables modified during the recurrence."""

        # Both the queue and the stack are flattened stack_size * batch_size
        # tensors. `stack_size` many blocks of `batch_size` values
        stack_shape = (self.stack_size * self.batch_size, self.model_dim)
        self.stack = tf.Variable(tf.zeros(stack_shape, dtype=tf.float32),
                                 trainable=False, name="stack")
        self.queue = tf.Variable(tf.zeros((self.stack_size * self.batch_size,), dtype=tf.float32),
                                 trainable=False, name="queue")

        self.buff_cursors = tf.Variable(tf.zeros((self.batch_size,), dtype=tf.float32),
                                          trainable=False, name="buff_cursors")
        self.cursors = tf.Variable(tf.ones((self.batch_size,), dtype=tf.float32) * - 1,
                                   trainable=False, name="cursors")

        # TODO make parameterizable
        self.tracking_value = tf.Variable(tf.zeros((self.batch_size, self.tracking_dim), dtype=tf.float32),
                                          trainable=False, name="tracking_value")

        # Create an Op which will (re-)initialize the auxiliary variables
        # declared above.
        self._aux_vars = [self.stack, self.queue, self.buff_cursors, self.cursors,
                          self.tracking_value]
        self.variable_initializer = tf.initialize_variables(self._aux_vars)
예제 #17
0
    def __init__(self, settings):
        self.settings       = update_settings(DEFAULT_SETTINGS, settings)

        # network and training
        self.q_network = parse_block(settings["model"])
        self.optimizer = parse_optimizer(settings["optimizer"])

        out_sh = self.q_network.output_shape()
        assert len(out_sh) == 2 and out_sh[0] is None, \
                "Output of the Discrete DeepQ must be (None, num_actions), where None corresponds to batch_size"
        self.num_actions      = out_sh[1]
        self.minipatch_size   = self.settings["minibatch_size"]

        self.train_every_nth              = self.settings['train_every_nth']
        self.discount_rate    = self.settings["discount_rate"]

        self.transitions_so_far        = 0
        self.exploration_period        = self.settings['exploration_period']
        self.random_action_probability = self.settings['random_action_probability']

        self.replay_buffer                = deque()
        self.store_every_nth              = self.settings['store_every_nth']
        self.replay_buffer_size           = self.settings['replay_buffer_size']

        self.target_network_update_rate   = self.settings['target_network_update_rate']

        self.summary_writer = None

        self.s = tf.Session()

        self.create_variables()
        self.s.run(tf.initialize_variables(
                self.q_network.variables() + self.target_q_network.variables()))
예제 #18
0
 def init_gradients(self, loss, var_train):
     if self.play_mode:
         return
     
     with tf.device(self.args.device):
         var_refs = [v.ref() for v in var_train]
         train_gradients = tf.gradients(
             loss, var_refs,
             gate_gradients=False,
             aggregation_method=None,
             colocate_gradients_with_ops=False)
 
         acc_gradient_list = []
         train_step_list = []
         new_grad_vars = []
         self.grad_list = []
         var_list = []
         for grad, var in zip(train_gradients, self.global_vars):
             acc_gradient = tf.Variable(tf.zeros(grad.get_shape()), trainable=False)
             acc_gradient_list.append(acc_gradient)
             train_step_list.append(acc_gradient.assign_add(grad))
             new_grad_vars.append((tf.convert_to_tensor(acc_gradient, dtype=tf.float32), var))
             self.grad_list.append(acc_gradient)
             var_list.append(var)
         
         self.train_step = tf.group(*train_step_list)                
         
         self.reset_acc_gradients = tf.initialize_variables(acc_gradient_list)        
         self.apply_grads = self.global_optimizer.apply_gradients(new_grad_vars)
 
         sync_list = []
         for i in range(0, len(self.global_vars)):
             sync_list.append(var_train[i].assign(self.global_vars[i]))
         self.sync = tf.group(*sync_list)
    def test_tf_resize_new_values(self):
        var = tf.Variable(range(20))
        self.session.run(tf.initialize_variables([var]))

        tf_resize(self.session, var, new_values=np.array(range(10)))

        self.assertEqual(len(self.session.run(var)), 10)
예제 #20
0
 def initialize_op(self):
   """Returns an op for initializing tensorflow variables."""
   all_vars = self._row_factors + self._col_factors
   if self._row_weights is not None:
     assert self._col_weights is not None
     all_vars.extend(self._row_weights + self._col_weights)
   return tf.initialize_variables(all_vars)
예제 #21
0
 def _run_init_test_vars_op(self):
     test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES)
     if test_vars:
         if test_vars != self._test_vars:
             self._test_vars = list(test_vars)
             self._test_var_init_op = tf.initialize_variables(test_vars)
         return self._test_var_init_op.run()
예제 #22
0
 def __init__(self, placeholder, session, noise_std=1.0, name='NoisyInput'):
     super(NoisyInputLayer, self).__init__(placeholder, name)
     self._noise_std = noise_std
     self._session = session
     with self.name_scope():
         self._predict = tf.Variable(noise_std, name='predict')
         self._session.run(tf.initialize_variables([self._predict]))
예제 #23
0
 def fit(self, dataset, n_epochs = {1:10,3:10,5:10}, n_mini_batch = 1, learning_rate=0.01):
     """ learn parameters by performing n_epochs loop """
     self.data = dataset
     user_placeholder, M_placeholder = placeholder_inputs()
     self.sess.run(tf.initialize_all_variables()) 
     
     tot_epochs = 0
     for k in n_epochs.keys():
         print("perform CD",k)
         
         for epochs in range(n_epochs[k]):
             tot_epochs+=1
             print("epochs",tot_epochs)
             user_batchs = chunks(range(self.num_users),n_mini_batch)
             
             b = 0
             for batch in user_batchs:
                 b+=1
                 print("batch :",b,"/",n_mini_batch)
                 # the learning rate is divided by the batch-size
                 # the last batch does not necesarilly have the same size as 
                 # the offer, so we have to init train_op here
                 train_op = self.train(user_placeholder,M_placeholder,k,learning_rate/len(batch))
                 update_op = self.update_weight()
                 
                 # re-initialize the gradient
                 self.sess.run(tf.initialize_variables([self.delta_w,self.delta_vb,self.delta_hb]))
                 for u in batch:
                     feed_dict = fill_feed_dict(self.data, u, user_placeholder, M_placeholder)
                     # update the gradient
                     self.sess.run(train_op, feed_dict = feed_dict)
                 # update the weight for this mini-batch
                 self.sess.run(update_op)
예제 #24
0
파일: driver.py 프로젝트: bentzinir/Buffe
    def reset_module(self, module):

        temp = set(tf.all_variables())

        module.backward(module.loss)

        self.sess.run(tf.initialize_variables(set(tf.all_variables()) - temp))
예제 #25
0
def train_dnn(data_folder, model_file): 
    # Output of dnn using input x
    y = DNN(x)
    
    print "Loading training pickles..."  
    train_set = import_data.load_dataset(data_folder + '/train_data.pickle', 
                                         data_folder + '/train_labels.pickle',
                                         context_frames=context_frames)      
        
    # Create the dir for the model
    if not os.path.isdir('%s/models/%s'%(save_loc,start_date)):
        try:
            os.makedirs('%s/models/%s'%(save_loc,start_date))
        except OSError:
            if not os.path.isdir('%s/models/%s'%(save_loc,start_date)):
                raise
    
    # Create the session
    global sess
    sess = tf.InteractiveSession()    
    global summary_op
    global train_writer
    global saver
    saver = tf.train.Saver()
        
    # Op for merging all summaries
    summary_op = tf.merge_all_summaries()
    # Summary Writer
    train_writer = tf.train.SummaryWriter('%ssummaries/%s'%(save_loc, start_date), sess.graph)
        
    # Cost function
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
    # Optimizer
    # For gradient descend, learning rate = 0.002 (see Hinton et al.)
    # For AdamOptimizer, learning rate = 0.0001 (better than default (exp 1.2))
    if (optimizer_name == 'Adam'):
        # Hacky solution for always making sure that the beta2_power var
        # is always initialized
        temp = set(tf.all_variables())
        optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost)
        sess.run(tf.initialize_variables(set(tf.all_variables()) - temp))
    else:
        optimizer = tf.train.GradientDescentOptimizer(0.02).minimize(cost)
    
    if model_file:
        saver.restore(sess, model_file)
        print "Model restored"
    else:
        # Initialization
        init_op = tf.initialize_all_variables()
        sess.run(init_op)    
    
    print("Training network. Date: %s" % start_date)
    train(train_set, y, cost, optimizer)
    
    save_path = saver.save(sess, "%s/models/%s/model.ckpt"%(save_loc, start_date))
    print("Model saved in file: %s" % save_path)
    print("Summaries written to summaries/%s" % start_date)
    
    evaluate_dnn(data_folder, y)
예제 #26
0
    def __init__(self, session, optimizer_critic, optimizer_actor, critic_network, actor_network, gamma_lmbda,
                 state_dim, num_actions, summary_writer=None, summary_every=5):

        self.session = session
        self.summary_writer = summary_writer
        self.optimizer_critic = optimizer_critic
        self.optimizer_actor = optimizer_actor

        self.actor_network = actor_network
        self.critic_network = critic_network

        self.state_dim = state_dim
        self.num_actions = num_actions
        self.gamma_lmbda = tf.constant(gamma_lmbda)

        # initialize the graph on tensorflow
        self.create_variables()
        var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
        self.session.run(tf.initialize_variables(var_lists))

        # make sure the variables in graph are initialized
        self.session.run(tf.assert_variables_initialized())

        if self.summary_writer is not None:
            self.summary_writer.add_graph(self.session.graph)
            self.summary_every = summary_every
예제 #27
0
 def guarantee_initialized_variables(self, session, list_of_variables = None):
     if list_of_variables is None:
         list_of_variables = tf.all_variables()
     uninitialized_variables = list(tf.get_variable(name) for name in
             session.run(tf.report_uninitialized_variables(list_of_variables)))
     session.run(tf.initialize_variables(uninitialized_variables))
     return uninitialized_variables
예제 #28
0
  def testIntializeFromValue(self):
    with self.test_session() as sess:
      init = tf.constant(0.1)
      w = tf.get_variable("v", initializer=init)
      sess.run(tf.initialize_variables([w]))
      self.assertAllClose(w.eval(), 0.1)

      with self.assertRaisesRegexp(ValueError, "shape"):
        # We disallow explicit shape specification when initializer is constant.
        tf.get_variable("u", [1], initializer=init)

      with tf.variable_scope("foo", initializer=init):
        # Constant initializer can be passed through scopes if needed.
        v = tf.get_variable("v")
        sess.run(tf.initialize_variables([v]))
        self.assertAllClose(v.eval(), 0.1)
예제 #29
0
 def restoreBaseReg(self,sess,restore):
     outfilename = os.path.join(self.conf.cachedir,self.conf.baseregoutname)
     traindatafilename = os.path.join(self.conf.cachedir,self.conf.baseregdataname)
     latest_ckpt = tf.train.get_checkpoint_state(self.conf.cachedir,
                                         latest_filename = self.conf.baseregckptname)
     if not latest_ckpt or not restore:
         self.baseregstartat = 0
         self.baseregtrainData = {'train_err':[], 'val_err':[], 'step_no':[],
                               'train_dist':[], 'val_dist':[] }
         sess.run(tf.initialize_variables(PoseTools.get_vars('base')))
         print("Not loading base variables. Initializing them")
         return False
     else:
         self.baseregsaver.restore(sess,latest_ckpt.model_checkpoint_path)
         matchObj = re.match(outfilename + '-(\d*)',latest_ckpt.model_checkpoint_path)
         self.baseregstartat = int(matchObj.group(1))+1
         with open(traindatafilename,'rb') as tdfile:
             inData = pickle.load(tdfile)
             if not isinstance(inData,dict):
                 self.baseregtrainData, loadconf = inData
                 print('Parameters that dont match for base:')
                 PoseTools.compare_conf(self.conf, loadconf)
             else:
                 print("No config was stored for base. Not comparing conf")
                 self.baseregtrainData = inData
         print("Loading base variables from %s"%latest_ckpt.model_checkpoint_path)
         return True
예제 #30
0
def main(_):
  """Load a word embedding."""
  if not FLAGS.model_file or not FLAGS.vocab_file:
    print("--model_file --vocab_file and must be specified.")
    sys.exit(1)

  # get the word to id mapping
  word2id = {}
  with open(FLAGS.vocab_file, "r") as file:
    for i, line in enumerate(reader(file, delimiter=" ")):
      word2id[line[0]] = i

  # load word embeddings
  with tf.Graph().as_default(), tf.Session() as session:
    #with tf.device("/cpu:0"):
    
    w_in = tf.Variable(tf.zeros([len(word2id), FLAGS.embedding_size]), 
      trainable=False, name="w_in")
    saver = tf.train.Saver({"w_in": w_in})
    saver.restore(session, FLAGS.model_file)

    tensor = tf.concat(0, [w_in.value(), tf.zeros([2, FLAGS.embedding_size])])
    embeddings = tf.Variable(tensor, trainable=True, name="embeddings")

    word_ids = tf.constant([[0, 1, 2], [3, 4, 71291]])
    word_emb = tf.nn.embedding_lookup(embeddings, word_ids)

    #word_emb = tf.Print(word_emb, [word_emb[0]])

    init = tf.initialize_variables([embeddings])
    session.run(init)

    word_emb = session.run(word_emb)
    print word_emb
예제 #31
0
    what do I need to do:

        - take charid (not 1-hot vector) => create new 'bigram' class which uses BatchGenerator to create batches of charids
        - graph requirement: 1 TF train_input placeholder, has shape = [ batch_size * (num_unrollings + 1)  ]

    '''
        _, l, predictions, lr = session.run(
            [optimizer, loss, train_prediction, learning_rate],
            feed_dict=feed_dict)

# LSTM...tbc...

# initialize all other variables
    is_lstm = lambda x: x.name.startswith("lstm_scope")
    tf.initialize_variables(filter(is_lstm, tf.all_variables())).run()

    #

    # train lstm

    print('Initialized')
    mean_loss = 0
    for step in range(num_steps):
        batches = train_batches.next()
        feed_dict = dict()
        for i in range(num_unrollings + 1):
            feed_dict[train_data[i]] = batches[i]
        '''TEST - CHECK TENSOR SHAPES AND TYPES'''
        if step == 0:
            print('step==0')
예제 #32
0
    rnn_classification_loss = weighted_sum_cross_entropy(z_logits, z)
    tf.summary.scalar('rnn_classification_loss', rnn_classification_loss)
    tf.summary.scalar('l2_z', l2)

    total_loss =  FLAGS.w_c*rnn_classification_loss + FLAGS.w_i*inverse_loss + FLAGS.w_l2*l2
    tf.summary.scalar('loss', total_loss)
    rnn_acc        = match_all(z_, z)
    tf.summary.scalar('rnn_acc', rnn_acc)


    learning_rate   = 0.0001

    with tf.variable_scope('optimizer'):
        optimizer= minimize(total_loss, { 'learning rate' : learning_rate}, algo='adam')

    sess.run(tf.initialize_variables(list(set(tf.all_variables()) - set(prev_vars)) ))

    train_writer = tf.summary.FileWriter('cifar10_rnn/train', graph=sess.graph)
    test_writer = tf.summary.FileWriter('cifar10_rnn/test')
    summary_op = tf.summary.merge_all()


    n_epoch         = 1000
    n_batch         = 200
    
    n_display       = 10000

    
    saver2 = tf.train.Saver()

    
예제 #33
0
    def run(self,
            logdir=None,
            variables=None,
            use_coordinator=True,
            *args,
            **kwargs):
        """A simple wrapper to run inference.

    1. Initialize algorithm via ``initialize``.
    2. (Optional) Build a ``tf.train.SummaryWriter`` for TensorBoard.
    3. (Optional) Initialize TensorFlow variables.
    4. (Optional) Start queue runners.
    5. Run ``update`` for ``self.n_iter`` iterations.
    6. While running, ``print_progress``.
    7. Finalize algorithm via ``finalize``.
    8. (Optional) Stop queue runners.

    To customize the way inference is run, run these steps
    individually.

    Parameters
    ----------
    logdir : str, optional
      Directory where event file will be written. For details,
      see `tf.train.SummaryWriter`. Default is to write nothing.
    variables : list, optional
      A list of TensorFlow variables to initialize during inference.
      Default is to initialize all variables (this includes
      reinitializing variables that were already initialized). To
      avoid initializing any variables, pass in an empty list.
    use_coordinator : bool, optional
      Whether to start and stop queue runners during inference using a
      TensorFlow coordinator. For example, queue runners are necessary
      for batch training with the ``n_minibatch`` argument or with
      file readers.
    *args
      Passed into ``initialize``.
    **kwargs
      Passed into ``initialize``.
    """
        self.initialize(*args, **kwargs)

        if logdir is not None:
            self.train_writer = tf.train.SummaryWriter(logdir,
                                                       tf.get_default_graph())

        if variables is None:
            init = tf.initialize_all_variables()
        else:
            init = tf.initialize_variables(variables)

        # Feed placeholders in case initialization depends on them.
        feed_dict = {}
        for key, value in six.iteritems(self.data):
            if isinstance(key, tf.Tensor):
                feed_dict[key] = value

        init.run(feed_dict)

        if use_coordinator:
            # Start input enqueue threads.
            self.coord = tf.train.Coordinator()
            self.threads = tf.train.start_queue_runners(coord=self.coord)

        for _ in range(self.n_iter):
            info_dict = self.update()
            self.print_progress(info_dict)

        self.finalize()

        if use_coordinator:
            # Ask threads to stop.
            self.coord.request_stop()
            self.coord.join(self.threads)
예제 #34
0
def run(args, server, renderOnly=False):
    env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes, renderOnly=renderOnly)
    trainer = A3C(env, args.task, args.visualise, renderOnly=renderOnly)

    # Variable names that start with "local" are not saved in checkpoints.
    if use_tf12_api:
        variables_to_save = [v for v in tf.global_variables() if not v.name.startswith("local")]
        init_op = tf.variables_initializer(variables_to_save)
        init_all_op = tf.global_variables_initializer()
    else:
        variables_to_save = [v for v in tf.all_variables() if not v.name.startswith("local")]
        init_op = tf.initialize_variables(variables_to_save)
        init_all_op = tf.initialize_all_variables()
    saver = FastSaver(variables_to_save)


    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
    logger.info('Trainable vars:')
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run(init_all_op)

    config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)])
    logdir = os.path.join(args.log_dir, 'train')

    if use_tf12_api:
        summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task)
    else:
        summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task)

    logger.info("Events directory: %s_%s", logdir, args.task)
    sv = tf.train.Supervisor(is_chief=(args.task == 0),
                             logdir=logdir,
                             saver=saver,
                             summary_op=None,
                             init_op=init_op,
                             init_fn=init_fn,
                             summary_writer=summary_writer,
                             ready_op=tf.report_uninitialized_variables(variables_to_save),
                             global_step=trainer.global_step,
                             save_model_secs=30,
                             save_summaries_secs=30)

    num_global_steps = 100000000

    logger.info(
        "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " +
        "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified.")
    with sv.managed_session(server.target, config=config) as sess, sess.as_default():
        sess.run(trainer.sync)
        trainer.start(sess, summary_writer)
        global_step = sess.run(trainer.global_step)
        logger.info("Starting training at step=%d", global_step)
        while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps):
            #logger.info("About to process")
            trainer.process(sess)
            global_step = sess.run(trainer.global_step)

    # Ask for all the services to stop.
    sv.stop()
    logger.info('reached %s steps. worker stopped.', global_step)
예제 #35
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_biasCNN.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
    
    dataset_val = dataset_biasCNN.get_dataset(
        FLAGS.dataset_name, 'validation', FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    
    if FLAGS.weights_initializer is None:
      weights_initializer = None
      # default value will be defined in argscope, it is xavier_initializer
    elif FLAGS.weights_initializer=='zeros':
      weights_initializer = tf.zeros_initializer()
    elif FLAGS.weights_initializer=='ones':
      weights_initializer = tf.ones_initializer()
    elif FLAGS.weights_initializer=='trunc_normal':
      weights_initializer = tf.truncated_normal_initializer()
    elif FLAGS.weights_initializer=='xavier':
      weights_initializer = initializers.xavier_initializer()
    elif FLAGS.weights_initializer=='var_scaling':
      weights_initializer = initializers.variance_scaling_initializer()
    else:
      raise ValueError('weight initializer not found')
      
    if FLAGS.biases_initializer is None:
      biases_initializer = None
      # default value will be defined in argscope, it is zeros_initializer
    elif biases_initializer=='zeros':
       biases_initializer = tf.zeros_initializer()
    elif FLAGS.biases_initializer=='ones':
       biases_initializer = tf.ones_initializer()
    elif FLAGS.biases_initializer=='trunc_normal':
      biases_initializer = tf.truncated_normal_initializer()
    elif FLAGS.biases_initializer=='xavier':
      biases_initializer = initializers.xavier_initializer()
    elif FLAGS.biases_initializer=='var_scaling':
      biases_initializer = initializers.variance_scaling_initializer()
    else:
      raise ValueError('biases initializer not found')
    
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        weights_initializer=weights_initializer,
        biases_initializer=biases_initializer,
        is_training=True)

    network_fn_val = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weights_initializer=weights_initializer,
        biases_initializer=biases_initializer,
        is_training=False)
    
    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing(
        preprocessing_name,
        is_training=True, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, 
	is_windowed = FLAGS.is_windowed)

    image_preprocessing_fn_val = preprocessing_biasCNN.get_preprocessing(
        preprocessing_name,
        is_training=False, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, 
	is_windowed=FLAGS.is_windowed)
    
    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)
      
      ############################################
      # Create a provider for the validation set #
      ############################################
      provider_val = slim.dataset_data_provider.DatasetDataProvider(
          dataset_val,
          shuffle=True,
          common_queue_capacity=2 * FLAGS.batch_size_val,
          common_queue_min=FLAGS.batch_size_val)
      [image_val, label_val] = provider_val.get(['image', 'label'])
      label_val -= FLAGS.labels_offset
      
      eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size
  
      image_val = image_preprocessing_fn_val(image_val, eval_image_size, eval_image_size)
  
      images_val, labels_val = tf.train.batch(
          [image_val, label_val],
          batch_size=FLAGS.batch_size_val,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size_val)
      labels_val_onehot = slim.one_hot_encoding(
          labels_val, dataset.num_classes - FLAGS.labels_offset)
      
    ###############################
    # Define the model (training) #
    ###############################
    
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      
      with tf.variable_scope('my_scope'):
          logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4,
            scope='aux_loss')
        
      tf.losses.softmax_cross_entropy(
          labels, logits, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      # adding in a picture of the activations at each layer, this is a good way to double check that the rotated images look rotated to our eyes
      if 'conv' in end_point:
        dims = x.get_shape()
        for ii in range(5):
          summaries.add(tf.summary.image('image_out/' + end_point + '/image_' + str(ii), tf.slice(x,[ii,0,0,0],[1,dims[1],dims[2],1])))
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    if FLAGS.quantize_delay >= 0:
      tf.contrib.quantize.create_training_graph(
          quant_delay=FLAGS.quantize_delay)
      
    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          total_num_replicas=FLAGS.worker_replicas,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))
 
    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    #################################
    # Define the model (validation) #
    #################################
    
    # get the validation set logits (predictions)
    with tf.variable_scope('my_scope',reuse=True):
      logits_val, _ = network_fn_val(images_val)
        
    predictions_val = tf.argmax(logits_val, 1)
    
    # Define loss on validation set, add a summary
    tf.losses.softmax_cross_entropy(
      labels_val_onehot, logits_val, label_smoothing=FLAGS.label_smoothing, 
      weights=1.0, loss_collection = 'eval_losses')
    
    for loss in tf.get_collection('eval_losses'):
      summaries.add(tf.summary.scalar('eval_losses/%s' % loss.op.name, loss))
      
    # Define the validation set metrics: 
    # Will define each metric twice as separate operation. 
    # One set will be made resettable, the other set will be streaming.
    with tf.name_scope('eval_metrics'):
      eval_acc_value, eval_acc_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val)    
      eval_recall_5_value, eval_recall_5_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) 
      # add these variables as summaries for tensorboard
      summaries.add(tf.summary.scalar('eval_recall_5', eval_recall_5_value))
      summaries.add(tf.summary.scalar('eval_acc', eval_acc_value))
      
    with tf.name_scope('eval_metrics_streaming'):
      eval_acc_streaming_value, eval_acc_streaming_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val) 
      eval_recall_5_streaming_value, eval_recall_5_streaming_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) 
      # add these variables as summaries for tensorboard
      summaries.add(tf.summary.scalar('eval_recall_5_streaming', eval_recall_5_streaming_value))
      summaries.add(tf.summary.scalar('eval_acc_streaming', eval_acc_streaming_value))
    
   # also add summaries of all the local variables used to compute the eval metrics...
    for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_metrics'):
      summaries.add(tf.summary.scalar('%s' % metric.op.name, metric))
    for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_streaming_metrics'):
      summaries.add(tf.summary.scalar('%s' % metric.op.name, metric))

    # gather up all the variables that are used to compute eval metrics
    stream_vars = [i for i in tf.local_variables() if i.name.split('/')[0]=='eval_metrics']
    # make an operation that'll let us re-initialize just these vars.
    reset_op = tf.initialize_variables(stream_vars)
   
    # make an operation that'll let us run evaluation (all metrics)
    eval_op = list([eval_acc_op, eval_recall_5_op, eval_acc_streaming_op, eval_recall_5_streaming_op])
    
    # Gather validation summaries
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    
    # Merge all summaries together (this includes training summaries too).
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    # Create a non-default saver so we don't delete all the old checkpoints.
    my_saver = tf_saver.Saver(max_to_keep=FLAGS.max_checkpoints_to_keep,
               keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,)
    
    # Create a non-default dictionary of options for train_step_fn
    # This is a hack that lets us pass everything we need to run evaluation, into the training loop function
    with ops.name_scope('train_step'):
        train_step_kwargs = {}

        if FLAGS.max_number_of_steps:
          should_stop_op = math_ops.greater_equal(global_step, FLAGS.max_number_of_steps)
        else:
          should_stop_op = constant_op.constant(False)
        train_step_kwargs['should_stop'] = should_stop_op
        if FLAGS.log_every_n_steps > 0:
          train_step_kwargs['should_log'] = math_ops.equal(
              math_ops.mod(global_step, FLAGS.log_every_n_steps), 0)
        train_step_kwargs['should_val'] = math_ops.equal(
                math_ops.mod(global_step, FLAGS.val_every_n_steps),0)
        train_step_kwargs['should_reset_eval_metrics'] = math_ops.equal(
                math_ops.mod(global_step, tf.to_int64(math_ops.multiply(FLAGS.reset_eval_metrics_every_n_vals, FLAGS.val_every_n_steps))),0)
        train_step_kwargs['eval_op'] = eval_op
        train_step_kwargs['reset_op'] = reset_op

  
    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None,
        saver=my_saver, 
        train_step_fn=learning_biasCNN.train_step_fn,
        train_step_kwargs = train_step_kwargs)
예제 #36
0
        # 将输出的信号转化为概率值
        prediction = tf.nn.softmax(wx_plus_b)

# 原先采用二次代价函数
with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.square(y - prediction))
    tf.summary.scalar('loss', loss)

# 使用梯度梯度下降法
with tf.name_scope('train'):
    train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)

# 初始化变量
if int((tf.__version__).split('.')[1]) < 12 and int(
    (tf.__version__).split('.')[0]) < 1:
    init = tf.initialize_variables()
else:
    init = tf.global_variables_initializer()

# 结果存放到一个布尔类型的列表中,生成1*100的布尔矩阵
# argmax返回一维张量中最大的值所在的位置
with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
        correct_prediction = tf.equal(tf.argmax(y, 1),
                                      tf.argmax(prediction, 1))
    with tf.name_scope('accuracy'):
        # 求准确率,现将布尔类型矩阵转换为浮点类型矩阵
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', accuracy)

# 合并所有的summary
예제 #37
0
 def test_all_initialized(self):
     with self.test_session() as sess:
         x = tf.Variable(tf.zeros([]))
         sess.run(tf.initialize_variables([x]))
         self.assertEqual([], tdc._init_uninitialized(sess))
예제 #38
0
def build_graph(cluster, image_url, return_list):
    prob_list = return_list
    num_workers = cluster.num_tasks('worker')
    
    # default picture for testing
    if image_url == None:
        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/7/7e/Bow_bow.jpg/800px-Bow_bow.jpg"
    image_string = urllib.urlopen(image_url).read()
    #image_string = tf.read_file("/home/philiptkd/Downloads/Dependency_Tree.png") # I lost internet
    image_size = inception.inception_v1_dist.default_image_size
    
    # shared done list, ready list, and image
    with tf.device("/job:ps/task:0"):
        done_list = tf.get_variable("done_list", [num_workers+1], tf.int32, tf.zeros_initializer)
        ready_list = tf.get_variable("ready_list", [num_workers], tf.int32, tf.zeros_initializer)
    with tf.device("/job:worker/task:0"):
        # image
        image = tf.image.decode_jpeg(image_string, channels=3)
        processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
        processed_images  = tf.expand_dims(processed_image, 0)
        shared_image = tf.Variable(processed_images, name="shared_image") 

    #download the inception v1 checkpoint if we need to 
    url = "http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz"
    checkpoints_dir = '/tmp/checkpoints'
    if not tf.gfile.Exists(checkpoints_dir):
        tf.gfile.MakeDirs(checkpoints_dir)
    if not tf.gfile.Exists(checkpoints_dir+'/inception_v1_2016_08_28.tar.gz'):
        dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir)
    # end download

    server = tf.train.Server(cluster, job_name="ps", task_index=0)
    sess = tf.Session(target=server.target)

    # Create the model, use the default arg scope to configure the batch norm parameters.
    with slim.arg_scope(inception.inception_v1_dist_arg_scope()):
        logits, _ = inception.inception_v1_dist(shared_image, num_workers, num_classes=1001, is_training=False, reuse=tf.AUTO_REUSE)
        probabilities = tf.nn.softmax(logits)

    # initialization function that uses saved parameters
    init_fn = slim.assign_from_checkpoint_fn(
        os.path.join(checkpoints_dir, 'inception_v1.ckpt'),
        slim.get_model_variables('InceptionV1'))
    sess.run(tf.initialize_variables([done_list, ready_list, shared_image])) # initialize variables that aren't model parameters
    init_fn(sess)
    
    # wait for workers to acknowledge variables have been initialized
    while sess.run(tf.reduce_sum(ready_list)) < num_workers:
        pass

    # do the thing
    print("before getting probs")
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
    np_image, probabilities = sess.run([shared_image, probabilities], options=run_options, run_metadata=run_metadata)
    print("after getting probs")

    # see who did what
    for device in run_metadata.step_stats.dev_stats:
        print(device.device)
        for node in device.node_stats:
            print("  ", node.node_name)

    # indicate that the ps task is done
    sess.run(tf.scatter_update(done_list, [0], 1))
   
    # wait until all tasks are done
    num_done = 1
    while num_done < num_workers+1:
        num_done = sess.run(tf.reduce_sum(done_list)) 

    sess.close()

    probabilities = probabilities[0, 0:]
    sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]

    names = imagenet.create_readable_names_for_imagenet_labels()
    for i in range(5):
        index = sorted_inds[i]
        probability = 'Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index])
        prob_list.append(probability)
        print(probability)
예제 #39
0
def run(args, server):
    env = create_env(args.env_id,
                     client_id=str(args.task),
                     remotes=args.remotes)
    if args.teacher:
        teacher = model.LSTMPolicy(env.observation_space.shape,
                                   env.action_space.n,
                                   name="global")
        teacher_init_op = teacher.load_model_from_checkpoint(
            args.checkpoint_path)

        trainer = A3C(env,
                      args.task,
                      args.visualise,
                      teacher=teacher,
                      name="student")

    else:
        teacher = None
        trainer = A3C(env, args.task, args.visualise, teacher=teacher)

    # Variable names that start with "local" are not saved in checkpoints.
    if use_tf12_api:
        variables_to_save = trainer.global_var_list
        all_trainable_variables = [
            v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            if trainer.scope in v.name
        ]
        init_op = tf.variables_initializer(variables_to_save)
        init_all_op = tf.variables_initializer(all_student_variables)

    else:

        variables_to_save = trainer.global_var_list
        init_op = tf.initialize_variables(variables_to_save)
        all_trainable_variables = [
            v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            if trainer.scope in v.name
        ]
        init_all_op = tf.variables_initializer(all_student_variables)

    saver = FastSaver(variables_to_save)

    logger.info('Trainable vars:')

    for v in all_trainable_variables:
        logger.info('{} {}'.format(v.name, v.get_shape()))

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run([init_all_op])

    def get_init_fn():
        if args.teacher:
            return tf.contrib.framework.assign_from_checkpoint_fn(
                args.checkpoint_path,
                teacher.var_list,
                ignore_missing_vars=True)
        else:
            return lambda sess: init_fn(sess)

    config = tf.ConfigProto(device_filters=[
        "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)
    ])
    logdir = os.path.join(args.log_dir, 'train')

    if use_tf12_api:
        summary_writer = tf.summary.FileWriter(logdir +
                                               "_{}".format(args.task))
    else:
        summary_writer = tf.train.SummaryWriter(logdir +
                                                "_'{}".format(args.task))

    logger.info("Events directory: {}_{}".format(logdir, args.task))

    sv = tf.train.Supervisor(
        is_chief=(args.task == 0),
        logdir=logdir,
        saver=saver,
        summary_op=None,
        init_op=init_op,
        init_fn=get_init_fn(),
        summary_writer=summary_writer,
        ready_op=tf.report_uninitialized_variables(variables_to_save),
        global_step=trainer.global_step,
        save_model_secs=30,
        save_summaries_secs=30)

    num_global_steps = 100000000

    logger.info(
        "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. "
        +
        "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified."
    )
    with sv.managed_session(server.target,
                            config=config) as sess, sess.as_default():
        sess.run(trainer.sync)
        trainer.start(sess, summary_writer)
        global_step = sess.run(trainer.global_step)
        logger.info("Starting training at step={}".format(global_step))
        while not sv.should_stop() and (not num_global_steps
                                        or global_step < num_global_steps):
            trainer.process(sess)
            global_step = sess.run(trainer.global_step)

    # Ask for all the services to stop.
    sv.stop()
    logger.info('reached {} steps. worker stopped.'.format(global_step))
예제 #40
0
def train(sess,
          repModel,
          evalModel,
          permModel,
          save_dir,
          dataset,
          oracle,
          freeze,
          min_feats="1",
          max_feats="10",
          inner_batch_size=5,
          inner_iters=20,
          learning_rate=0.0001,
          meta_step_size=0.1,
          meta_batch_size=1,
          meta_iters=15001,
          reptile_fn=Reptile,
          perm_epochs=501,
          perm_lr=0.00001,
          feature_split=0,
          name="Model",
          name_affix="",
          save_path="exp1",
          log_fn=id_print,
          job_id=0):

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    if saving_raw:
        raw_path = "./results/" + save_path + "/" + name

        if not os.path.exists(raw_path):
            os.system(f"mkdir -p {raw_path}")

        if not os.path.exists(os.path.join(raw_path, name_affix)):
            os.mkdir(os.path.join(raw_path, name_affix))

    # Save scratch model vars, so they can be reset during training
    evalVars = []
    for v in tf.trainable_variables():
        if "EvalMod" in v.name:
            evalVars.append(v)

    # init reptile process
    reptile = reptile_fn(sess)
    saver = tf.train.Saver()

    # Loading Data
    start = time.time()
    train_gen = dataset.generate(dataset.totalLabels,
                                 ast.literal_eval(min_feats),
                                 ast.literal_eval(max_feats),
                                 inner_batch_size,
                                 inner_batch_size,
                                 meta_batch_size,
                                 test=False,
                                 oracle=oracle)

    if oracle:
        test_data = dataset.test_data_oracle
    else:
        test_data = dataset.test_data

    log_fn(
        job_id,
        f"------------------Finished loading data in {time.time()-start}--------------------------"
    )
    log_fn(
        job_id,
        f"Training data shape: X{dataset.train_x.shape} y{dataset.train_y.shape}"
    )
    log_fn(job_id,
           f"Test data shape: X{dataset.val_x.shape} y{dataset.val_y.shape}")
    log_fn(job_id, "")

    # Declare the tensorflow graph
    # Each component has an optimizer and update ops
    if permModel is not None:
        # Optimizer here used for pretraining
        perm_opt = tf.train.AdamOptimizer(learning_rate=perm_lr)
        perm_gradients, perm_variables = zip(
            *perm_opt.compute_gradients(permModel.loss))
        perm_train_op = perm_opt.apply_gradients(
            zip(perm_gradients, perm_variables))

    if not freeze:
        rep_opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
        rep_gradients, rep_variables = zip(
            *rep_opt.compute_gradients(repModel.totalLoss))
        rep_train_op = rep_opt.apply_gradients(
            zip(rep_gradients, rep_variables))
        repModel.setTrainOP(rep_train_op)
    else:
        rep_opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
        rep_train_op = rep_opt.minimize(
            repModel.totalLoss,
            var_list=[
                var for var in tf.trainable_variables()
                if "Perm" not in var.name and "RepMod" in var.name
            ])
        repModel.setTrainOP(rep_train_op)

    if evalModel is not None:
        eval_opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
        eval_gradients, eval_variables = zip(
            *eval_opt.compute_gradients(evalModel.totalLoss))
        eval_train_op = eval_opt.apply_gradients(
            zip(eval_gradients, eval_variables))
        evalModel.setTrainOP(eval_train_op)

    #Initializing variables
    sess.run(tf.initializers.global_variables())
    if evalModel is not None:
        init_eval_vars = tf.initialize_variables(evalVars)

    #Pre-train permutation model
    if permModel is not None:
        log_fn(job_id, "")
        log_fn(
            job_id,
            "-------------------------------Training Chameleon network:-------------------------------"
        )
        start = time.time()

        # Sample a random task for pre training from meta data set "data"
        def sampleTask(data, minF, maxF, maxK, meta_batch=1):

            meta_x = []
            meta_y = []

            num_feat = np.random.randint(minF, maxF + 1)
            for b in range(meta_batch):
                features = np.random.choice(range(len(data[0])),
                                            num_feat,
                                            replace=False)
                out = np.transpose(np.array([data[:, i] for i in features]))
                order = np.eye(maxK)[features.astype(int)]

                meta_x.append(out)
                meta_y.append(order)

            return meta_x, meta_y

        trainLoss_buffer = []
        train_losses = []
        train_indexes = []

        for epoch in range(1, perm_epochs + 1):

            X_pre = dataset.train_x[:, dataset.train_f]
            np.random.shuffle(X_pre)

            loss_per_epoch = []
            val_loss_per_epoch = []
            val_acc_per_epoch = []

            for minibatch in range(
                    int(len(X_pre) /
                        (inner_batch_size * dataset.totalLabels))):
                # CUrrently only mb = 1
                X = X_pre[(inner_batch_size * dataset.totalLabels) *
                          minibatch:(inner_batch_size * dataset.totalLabels) *
                          minibatch + (inner_batch_size * dataset.totalLabels)]
                X_perm, order = sampleTask(X, len(X[0]), len(X[0]),
                                           len(dataset.train_f), 32)
                loss, _, out = sess.run(
                    [permModel.loss, perm_train_op, permModel.out],
                    feed_dict={
                        permModel.task: X_perm,
                        permModel.label: order
                    })  #, permModel.train_mode:True})
                loss_per_epoch.append(loss)

            trainLoss_buffer.append(np.mean(loss_per_epoch))
            train_losses.append(np.mean(loss_per_epoch))
            train_indexes.append(epoch)

            if epoch % 50 == 0:
                log_fn(
                    job_id,
                    f"Epoch {epoch}: Permutation loss: {np.mean(trainLoss_buffer):.3f}"
                )
                trainLoss_buffer = []

        if perm_epochs != 0 and not freeze:
            if saving_plots:
                savePlot(train_losses,
                         "Plots",
                         dataset.path.split("/")[-1],
                         "Permutation",
                         "Chameleon",
                         xticks=train_indexes,
                         xaxis="Meta Epochs",
                         yaxis="Loss",
                         run=name)
            if saving_raw:
                np.save(
                    os.path.join(raw_path, name_affix) + "/perm_loss.npy",
                    np.array([train_indexes, train_losses]))

    log_fn(job_id, f"Finished pre-training in {time.time()-start:.2f}s")
    log_fn(job_id, "")

    if evalModel is not None:
        log_fn(
            job_id,
            "-------------------------------Evaluating Test Data with Scratch Training:---------------"
        )
        reptile.testScratch(evalModel,
                            inner_iters,
                            init_eval_vars,
                            data=test_data,
                            train_f=len(dataset.train_f))
        log_fn(
            job_id,
            f"Scratch Evaluation: -- Test Loss {reptile.scratch_loss} -- Test Acc {reptile.scratch_acc}"
        )
        log_fn(job_id, "")
        if saving_plots:
            savePlot([reptile.scratch_loss, reptile.scratch_loss],
                     "Plots",
                     dataset.path.split("/")[-1],
                     "Final_Metatest_ValLoss",
                     "Scratch",
                     xticks=[0, meta_iters],
                     xaxis="Meta Epochs",
                     yaxis="Loss",
                     run=name)
            savePlot([reptile.scratch_acc, reptile.scratch_acc],
                     "Plots",
                     dataset.path.split("/")[-1],
                     "Final_Metatest_ValAcc",
                     "Scratch",
                     xticks=[0, meta_iters],
                     xaxis="Meta Epochs",
                     yaxis="Accuracy",
                     run=name)
        if saving_raw:
            np.save(
                os.path.join(raw_path, name_affix) +
                "/Scratch_Metatest_ValLoss.npy",
                np.array([reptile.scratch_loss, reptile.scratch_loss]))
            np.save(
                os.path.join(raw_path, name_affix) +
                "/Scratch_Metatest_ValAcc.npy",
                np.array([reptile.scratch_acc, reptile.scratch_acc]))

    if permModel is not None:
        log_fn(
            job_id,
            "-------------------------------Training Chameleon and Base Model with reptile:-----------"
        )
    else:
        log_fn(
            job_id,
            "--------------------------------------Training Base Model with reptile:------------------"
        )

    # Evaluate the initialized model
    val_final = []
    val_index = []
    train_final = []
    train_index = []

    train_buffer = []
    full_start = time.time()
    start = time.time()
    if oracle:
        # make sure oracle is padded to testfeats
        t_f = dataset.totalFeatures
    else:
        t_f = len(dataset.train_f)

    reptile.evaluate(repModel=repModel,
                     permModel=permModel,
                     inner_iters=inner_iters,
                     data=test_data,
                     train_f=t_f,
                     exp_name=dataset.path.split("/")[-1],
                     meta_epoch=0,
                     name=name)
    log_fn(
        job_id,
        f"Val Epoch {0}: Initial Train Loss: {reptile.eval_train_losses[0]:.2f} -- Final Train Loss: {reptile.eval_train_losses[-1]:.2f} -- Val Loss: {reptile.eval_test_loss:.2f} -- Val Acc: {reptile.eval_test_acc:.2f} in {time.time()-start:.2f}s"
    )
    log_fn(job_id, "")
    val_final.append([
        reptile.eval_train_losses[0], reptile.eval_train_losses[-1],
        reptile.eval_test_loss, reptile.eval_test_acc
    ])

    # Perform reptile joint training on the model
    for meta_epoch in range(1, meta_iters + 1):
        start = time.time()
        # Perform one train step
        reptile.train_step(repModel=repModel,
                           permModel=permModel,
                           inner_iters=inner_iters,
                           meta_step_size=meta_step_size,
                           data=train_gen,
                           train_f=t_f,
                           exp_name=dataset.path.split("/")[-1],
                           meta_epoch=meta_epoch,
                           name=name)
        train_final.append([
            reptile.run_train_losses[0], reptile.run_train_losses[-1],
            reptile.run_test_loss_before, reptile.run_test_loss_after
        ])
        train_index.append(meta_epoch)
        train_buffer.append([
            reptile.run_train_losses[0], reptile.run_train_losses[-1],
            reptile.run_test_loss_before, reptile.run_test_loss_after
        ])

        # log_fn Train Step
        if meta_epoch % 100 == 0:
            train_buffer = np.mean(train_buffer, axis=0)
            log_fn(
                job_id,
                f"Train Epoch {meta_epoch}: Initial Train Loss: {train_buffer[0]:.2f} -- Final Train Loss: {train_buffer[1]:.2f} -- Initial Val Loss: {train_buffer[2]:.2f} -- Final Val Loss: {train_buffer[3]:.2f}"
            )
            train_buffer = []

        # Validates performance on test data
        if meta_epoch % 100 == 0:
            reptile.evaluate(repModel=repModel,
                             permModel=permModel,
                             inner_iters=inner_iters,
                             data=test_data,
                             train_f=t_f,
                             exp_name=dataset.path.split("/")[-1],
                             meta_epoch=meta_epoch,
                             name=name)
            val_final.append([
                reptile.eval_train_losses[0], reptile.eval_train_losses[-1],
                reptile.eval_test_loss, reptile.eval_test_acc
            ])
            val_index.append(meta_epoch)
            log_fn(
                job_id,
                f"Val Epoch {0}: Initial Train Loss: {reptile.eval_train_losses[0]:.2f} -- Final Train Loss: {reptile.eval_train_losses[-1]:.2f} -- Val Loss: {reptile.eval_test_loss:.2f} -- Val Acc: {reptile.eval_test_acc:.2f} in {time.time()-start:.2f}s"
            )

    log_fn(job_id, f"Finished joint training in {time.time()-full_start}s")
    log_fn(job_id, "")

    if permModel is not None:
        permName = "Chameleon+Reptile"
    else:
        permName = "Reptile"

    if freeze:
        permName += "_Frozen"
    if perm_epochs == 0:
        permName += "_Untrained"

    log_fn(job_id, "Final Shape", np.array(train_final).shape)
    log_fn(job_id, "Final Shape", np.array(val_final).shape)
    log_fn(job_id, val_index)

    if saving_plots:
        savePlot(np.array(train_final)[:, 0],
                 "Plots",
                 dataset.path.split("/")[-1],
                 "Initial_Metatrain_Loss",
                 permName,
                 xticks=train_index,
                 xaxis="Meta Epochs",
                 yaxis="Loss",
                 run=name)
        savePlot(np.array(train_final)[:, 1],
                 "Plots",
                 dataset.path.split("/")[-1],
                 "Final_Metatrain_Loss",
                 permName,
                 xticks=train_index,
                 xaxis="Meta Epochs",
                 yaxis="Loss",
                 run=name)
        savePlot(np.array(train_final)[:, 2],
                 "Plots",
                 dataset.path.split("/")[-1],
                 "Initial_Metatrain_ValLoss",
                 permName,
                 xticks=train_index,
                 xaxis="Meta Epochs",
                 yaxis="Loss",
                 run=name)
        savePlot(np.array(train_final)[:, 3],
                 "Plots",
                 dataset.path.split("/")[-1],
                 "Final_Metatrain_ValLoss",
                 permName,
                 xticks=train_index,
                 xaxis="Meta Epochs",
                 yaxis="Loss",
                 run=name)

        savePlot(np.array(val_final)[:, 0],
                 "Plots",
                 dataset.path.split("/")[-1],
                 "Initial_Metatest_Loss",
                 permName,
                 xticks=val_index,
                 xaxis="Meta Epochs",
                 yaxis="Loss",
                 run=name)
        savePlot(np.array(val_final)[:, 1],
                 "Plots",
                 dataset.path.split("/")[-1],
                 "Final_Metatest_Loss",
                 permName,
                 xticks=val_index,
                 xaxis="Meta Epochs",
                 yaxis="Loss",
                 run=name)
        savePlot(np.array(val_final)[:, 2],
                 "Plots",
                 dataset.path.split("/")[-1],
                 "Final_Metatest_ValLoss",
                 permName,
                 xticks=val_index,
                 xaxis="Meta Epochs",
                 yaxis="Loss",
                 run=name)
        savePlot(np.array(val_final)[:, 3],
                 "Plots",
                 dataset.path.split("/")[-1],
                 "Final_Metatest_ValAcc",
                 permName,
                 xticks=val_index,
                 xaxis="Meta Epochs",
                 yaxis="Accuracy",
                 run=name)

    if saving_raw:
        np.save(
            os.path.join(raw_path, name_affix) + "/TrainIndexes.npy",
            train_index)
        np.save(
            os.path.join(raw_path, name_affix) + "/Initial_Metatrain_Loss.npy",
            np.array(train_final)[:, 0])
        np.save(
            os.path.join(raw_path, name_affix) + "/Final_Metatrain_Loss.npy",
            np.array(train_final)[:, 1])
        np.save(
            os.path.join(raw_path, name_affix) +
            "/Initial_Metatrain_ValLoss.npy",
            np.array(train_final)[:, 2],
        )
        np.save(
            os.path.join(raw_path, name_affix) +
            "/Final_Metatrain_ValLoss.npy",
            np.array(train_final)[:, 3])

        np.save(
            os.path.join(raw_path, name_affix) + "/ValIndexes.npy", val_index)
        np.save(
            os.path.join(raw_path, name_affix) + "/Initial_Metatest_Loss.npy",
            np.array(val_final)[:, 0])
        np.save(
            os.path.join(raw_path, name_affix) + "/Final_Metatest_Loss.npy",
            np.array(val_final)[:, 1])
        np.save(
            os.path.join(raw_path, name_affix) + "/Final_Metatest_ValLoss.npy",
            np.array(val_final)[:, 2])
        np.save(
            os.path.join(raw_path, name_affix) + "/Final_Metatest_ValAcc.npy",
            np.array(val_final)[:, 3])
예제 #41
0
# Helper ops
nonzero_indicator1 = tf.to_float(tf.not_equal(W_fc1, tf.zeros_like(W_fc1)))
nonzero_indicator2 = tf.to_float(tf.not_equal(W_fc2, tf.zeros_like(W_fc2)))
nonzero_indicator3 = tf.to_float(tf.not_equal(W_fc3, tf.zeros_like(W_fc3)))
count_parameters1 = tf.reduce_sum(nonzero_indicator1)
count_parameters2 = tf.reduce_sum(nonzero_indicator2)
count_parameters3 = tf.reduce_sum(nonzero_indicator3)

# Create a saver for writing training checkpoints.
saver = tf.train.Saver()

# Run training in a session
sess = tf.Session()
sess.run(tf.initialize_all_variables())
sess.run(tf.initialize_variables(tf.get_collection(
    tf.GraphKeys.PRUNING_MASKS)))


def print_mask_parameter_counts():
    print("# Mask Parameter Counts")
    print("  - Mask1: {0}".format(
        sess.run(
            tf.reduce_sum(
                tf.to_float(
                    tf.not_equal(indicator_matrix1,
                                 tf.zeros_like(indicator_matrix1)))))))
    print("  - Mask2: {0}".format(
        sess.run(
            tf.reduce_sum(
                tf.to_float(
                    tf.not_equal(indicator_matrix2,
예제 #42
0
 def reinit(self):
     init = tf.initialize_variables(tf.trainable_variables())
     self.sess.run(init)
예제 #43
0
    def train(self, config):
        global_step1 = tf.Variable(0,
                                   name='global_step_train1',
                                   trainable=False)
        global_step2 = tf.Variable(0,
                                   name='global_step_train2',
                                   trainable=False)
        global_step3 = tf.Variable(0,
                                   name='global_step_train3',
                                   trainable=False)

        train_optim_ver = tf.train.AdamOptimizer(
            config.learning_rate).minimize(self.loss_fine_ver,
                                           global_step=global_step1)
        train_optim_hor = tf.train.AdamOptimizer(
            config.learning_rate).minimize(self.loss_fine_hor,
                                           global_step=global_step2)
        train_optim_views = tf.train.AdamOptimizer(
            config.learning_rate).minimize(self.loss_fine_views,
                                           global_step=global_step3)

        t_vars = tf.trainable_variables()
        self.var_list1 = [var for var in t_vars if 'first_two_' in var.name]
        self.var_list2 = [var for var in t_vars if 'last' in var.name]
        self.var_list3 = [var for var in t_vars if 'ver' in var.name]
        self.var_list4 = [var for var in t_vars if 'hor' in var.name]
        self.var_list5 = [var for var in t_vars if 'views' in var.name]
        self.var_list6 = [var for var in t_vars if 'shared_' in var.name]
        self.var_list7 = [var for var in t_vars if 'shread_' in var.name]

        tf.initialize_all_variables().run()

        if config.is_finetune:
            # Initialize Spanet and load pretrained network
            tmp = self.var_list1 + self.var_list2
            self.saver = tf.train.Saver(var_list=tmp, max_to_keep=1)
            tf.initialize_variables(tmp).run()  #load trained network
            if self.loadnet(self.checkpoint_dir,
                            'spaSR'):  #Load Spatial SR network
                print('Load pretrained spatial network')
            else:
                print(' Load Fail!!')

            tmp = self.var_list3 + self.var_list4 + self.var_list5 + self.var_list6 + self.var_list7
            self.saver = tf.train.Saver(var_list=tmp, max_to_keep=1)
            tf.initialize_variables(tmp).run()

            if self.loadnet(self.checkpoint_dir,
                            'allviews'):  #Load Spatial SR network
                print('Load pretrained angular network')
            else:
                print(' Load Fail!!')
            self.saver = tf.train.Saver(max_to_keep=1)

        else:
            self.saver = tf.train.Saver(max_to_keep=1)
            if self.loadnet(self.checkpoint_dir,
                            'finetune'):  #Load Spatial SR network
                print('Load pretrained angular network')
            else:
                print(' Load Fail!!')

        train_ver_input, train_hor_input, train_views_input, train_ver_sr_gt, train_hor_sr_gt, train_views_sr_gt, train_ver_ang_gt, train_hor_ang_gt, train_views_ang_gt = load_traindata(
        )
        [
            val_ver_input, val_hor_input, val_views_input, val_ver_sr_gt,
            val_hor_sr_gt, val_views_sr_gt, val_ver_ang_gt, val_hor_ang_gt,
            val_views_ang_gt
        ] = load_valdata()

        batch_idxs_views = train_views_input.shape[-1] / self.batch_size
        val_batch_idxs_views = val_views_input.shape[-1] / self.batch_size
        for epoch in xrange(config.epochs):
            rand_idx_ver = np.random.permutation(
                range(train_ver_input.shape[-1]))
            rand_idx_hor = np.random.permutation(
                range(train_hor_input.shape[-1]))
            rand_idx_views = np.random.permutation(
                range(train_views_input.shape[-1]))
            val_rand_idx_ver = np.random.permutation(
                range(val_ver_input.shape[-1]))
            val_rand_idx_hor = np.random.permutation(
                range(val_hor_input.shape[-1]))
            val_rand_idx_views = np.random.permutation(
                range(val_views_input.shape[-1]))

            train_spa_MSE = 0.0
            train_ang_MSE = 0.0
            train_total_MSE = 0.0
            val_spa_MSE = 0.0
            val_ang_MSE = 0.0
            val_total_MSE = 0.0

            for idx in xrange(0, batch_idxs_views):
                if epoch == 0:
                    f_train_epoch = open(
                        os.path.join("logs", self.date, 'train_epoch.log'),
                        'w')
                    f_val = open(os.path.join("logs", self.date, 'val.log'),
                                 'w')
                else:
                    f_train_epoch = open(
                        os.path.join("logs", self.date, 'train_epoch.log'),
                        'aw')
                    f_val = open(os.path.join("logs", self.date, 'val.log'),
                                 'aw')
                randview = np.random.permutation(range(3))
                for view in randview:
                    if view == 0:
                        batch_files = rand_idx_ver[idx *
                                                   config.batch_size:(idx +
                                                                      1) *
                                                   config.batch_size]
                        batches = [
                            get_image(train_ver_input[0, batch],
                                      train_ver_sr_gt[0, batch],
                                      train_ver_ang_gt[0,
                                                       batch], self.image_wid)
                            for batch in batch_files
                        ]
                        batches = np.array(batches).astype(np.float32)
                        input1 = batches[:, :, :, 0]
                        input1 = np.expand_dims(input1, axis=-1)
                        input2 = batches[:, :, :, 1]
                        input2 = np.expand_dims(input2, axis=-1)
                        spa_gt1 = batches[:, :, :, 2]
                        spa_gt1 = np.expand_dims(spa_gt1, axis=-1)
                        spa_gt2 = batches[:, :, :, 3]
                        spa_gt2 = np.expand_dims(spa_gt2, axis=-1)
                        ang_gt = batches[:, :, :, 4]
                        ang_gt = np.expand_dims(ang_gt, axis=-1)

                        _, total_MSE, spa1_MSE, spa2_MSE, ang_MSE = self.sess.run(
                            [
                                train_optim_ver, self.loss_fine_ver,
                                self.loss_spa1, self.loss_spa2, self.loss_ver
                            ],
                            feed_dict={
                                self.train_input1: input1,
                                self.train_input2: input2,
                                self.train_spa_gt1: spa_gt1,
                                self.train_spa_gt2: spa_gt2,
                                self.train_ang_gt: ang_gt
                            })
                        self.count += 1
                        train_ang_MSE += ang_MSE
                        train_total_MSE += total_MSE
                        train_spa_MSE = (spa1_MSE +
                                         spa2_MSE) / 2. + train_spa_MSE
                    elif view == 1:
                        batch_files = rand_idx_hor[idx *
                                                   config.batch_size:(idx +
                                                                      1) *
                                                   config.batch_size]
                        batches = [
                            get_image(train_hor_input[0, batch],
                                      train_hor_sr_gt[0, batch],
                                      train_hor_ang_gt[0,
                                                       batch], self.image_wid)
                            for batch in batch_files
                        ]
                        batches = np.array(batches).astype(np.float32)

                        input1 = batches[:, :, :, 0]
                        input1 = np.expand_dims(input1, axis=-1)
                        input2 = batches[:, :, :, 1]
                        input2 = np.expand_dims(input2, axis=-1)
                        spa_gt1 = batches[:, :, :, 2]
                        spa_gt1 = np.expand_dims(spa_gt1, axis=-1)
                        spa_gt2 = batches[:, :, :, 3]
                        spa_gt2 = np.expand_dims(spa_gt2, axis=-1)
                        ang_gt = batches[:, :, :, -1]
                        ang_gt = np.expand_dims(ang_gt, axis=-1)
                        _, total_MSE, spa1_MSE, spa2_MSE, ang_MSE = self.sess.run(
                            [
                                train_optim_hor, self.loss_fine_hor,
                                self.loss_spa1, self.loss_spa2, self.loss_hor
                            ],
                            feed_dict={
                                self.train_input1: input1,
                                self.train_input2: input2,
                                self.train_spa_gt1: spa_gt1,
                                self.train_spa_gt2: spa_gt2,
                                self.train_ang_gt: ang_gt
                            })

                        self.count += 1
                        train_ang_MSE += ang_MSE
                        train_total_MSE += total_MSE
                        train_spa_MSE = (spa1_MSE +
                                         spa2_MSE) / 2. + train_spa_MSE

                    else:
                        batch_files = rand_idx_views[idx *
                                                     config.batch_size:(idx +
                                                                        1) *
                                                     config.batch_size]
                        batches = [
                            get_image(train_views_input[0, batch],
                                      train_views_sr_gt[0, batch],
                                      train_views_ang_gt[0, batch],
                                      self.image_wid) for batch in batch_files
                        ]
                        batches = np.array(batches).astype(np.float32)
                        input1 = batches[:, :, :, 0]
                        input1 = np.expand_dims(input1, axis=-1)
                        input2 = batches[:, :, :, 1]
                        input2 = np.expand_dims(input2, axis=-1)
                        input3 = batches[:, :, :, 2]
                        input3 = np.expand_dims(input3, axis=-1)
                        input4 = batches[:, :, :, 3]
                        input4 = np.expand_dims(input4, axis=-1)
                        spa_gt1 = batches[:, :, :, 4]
                        spa_gt1 = np.expand_dims(spa_gt1, axis=-1)
                        spa_gt2 = batches[:, :, :, 5]
                        spa_gt2 = np.expand_dims(spa_gt2, axis=-1)
                        spa_gt3 = batches[:, :, :, 6]
                        spa_gt3 = np.expand_dims(spa_gt3, axis=-1)
                        spa_gt4 = batches[:, :, :, 7]
                        spa_gt4 = np.expand_dims(spa_gt4, axis=-1)

                        ang_gt = batches[:, :, :, -1]
                        ang_gt = np.expand_dims(ang_gt, axis=-1)
                        _, total_MSE, spa1_MSE, spa2_MSE, spa3_MSE, spa4_MSE, ang_MSE = self.sess.run(
                            [
                                train_optim_views, self.loss_fine_views,
                                self.loss_spa1, self.loss_spa2, self.loss_spa3,
                                self.loss_spa4, self.loss_views
                            ],
                            feed_dict={
                                self.train_input1: input1,
                                self.train_input2: input2,
                                self.train_input3: input3,
                                self.train_input4: input4,
                                self.train_spa_gt1: spa_gt1,
                                self.train_spa_gt2: spa_gt2,
                                self.train_spa_gt3: spa_gt3,
                                self.train_spa_gt4: spa_gt4,
                                self.train_ang_gt: ang_gt
                            })

                        self.count += 1
                        train_ang_MSE += ang_MSE
                        train_spa_MSE = (spa1_MSE + spa2_MSE + spa3_MSE +
                                         spa4_MSE) / 4. + train_spa_MSE
                        train_total_MSE += total_MSE

            print(
                'Epoch train[%2d] total MSE: %.4f spa MSE: %.4f ang MSE: %.4f \n'
                % (epoch, train_total_MSE /
                   (3 * batch_idxs_views), train_spa_MSE /
                   (3 * batch_idxs_views), train_ang_MSE /
                   (3 * batch_idxs_views)))

            #Validation
            for val_idx in xrange(0, val_batch_idxs_views):

                randview = np.random.permutation(range(3))
                for view in randview:
                    if view == 0:
                        batch_files = val_rand_idx_ver[val_idx *
                                                       config.batch_size:
                                                       (val_idx + 1) *
                                                       config.batch_size]
                        batches = [
                            get_image(val_ver_input[0, batch],
                                      val_ver_sr_gt[0, batch],
                                      val_ver_ang_gt[0, batch], self.image_wid)
                            for batch in batch_files
                        ]
                        batches = np.array(batches).astype(np.float32)
                        input1 = batches[:, :, :, 0]
                        input1 = np.expand_dims(input1, axis=-1)
                        input2 = batches[:, :, :, 1]
                        input2 = np.expand_dims(input2, axis=-1)
                        spa_gt1 = batches[:, :, :, 2]
                        spa_gt1 = np.expand_dims(spa_gt1, axis=-1)
                        spa_gt2 = batches[:, :, :, 3]
                        spa_gt2 = np.expand_dims(spa_gt2, axis=-1)
                        ang_gt = batches[:, :, :, 4]
                        ang_gt = np.expand_dims(ang_gt, axis=-1)
                        total_MSE, spa1_MSE, spa2_MSE, ang_MSE = self.sess.run(
                            [
                                self.loss_fine_ver, self.loss_spa1,
                                self.loss_spa2, self.loss_ver
                            ],
                            feed_dict={
                                self.train_input1: input1,
                                self.train_input2: input2,
                                self.train_spa_gt1: spa_gt1,
                                self.train_spa_gt2: spa_gt2,
                                self.train_ang_gt: ang_gt
                            })

                        val_ang_MSE += ang_MSE
                        val_total_MSE += total_MSE
                        val_spa_MSE = spa1_MSE + spa2_MSE + train_spa_MSE

                    elif view == 1:
                        batch_files = val_rand_idx_hor[val_idx *
                                                       config.batch_size:
                                                       (val_idx + 1) *
                                                       config.batch_size]
                        batches = [
                            get_image(val_hor_input[0, batch],
                                      val_hor_sr_gt[0, batch],
                                      val_hor_ang_gt[0, batch], self.image_wid)
                            for batch in batch_files
                        ]
                        batches = np.array(batches).astype(np.float32)
                        input1 = batches[:, :, :, 0]
                        input1 = np.expand_dims(input1, axis=-1)
                        input2 = batches[:, :, :, 1]
                        input2 = np.expand_dims(input2, axis=-1)
                        spa_gt1 = batches[:, :, :, 2]
                        spa_gt1 = np.expand_dims(spa_gt1, axis=-1)
                        spa_gt2 = batches[:, :, :, 3]
                        spa_gt2 = np.expand_dims(spa_gt2, axis=-1)
                        ang_gt = batches[:, :, :, -1]
                        ang_gt = np.expand_dims(ang_gt, axis=-1)
                        total_MSE, spa1_MSE, spa2_MSE, ang_MSE = self.sess.run(
                            [
                                self.loss_fine_hor, self.loss_spa1,
                                self.loss_spa2, self.loss_hor
                            ],
                            feed_dict={
                                self.train_input1: input1,
                                self.train_input2: input2,
                                self.train_spa_gt1: spa_gt1,
                                self.train_spa_gt2: spa_gt2,
                                self.train_ang_gt: ang_gt
                            })
                        val_ang_MSE += ang_MSE
                        val_total_MSE += total_MSE
                        val_spa_MSE = spa1_MSE + spa2_MSE + train_spa_MSE

                    else:
                        batch_files = val_rand_idx_views[val_idx *
                                                         config.batch_size:
                                                         (val_idx + 1) *
                                                         config.batch_size]
                        batches = [
                            get_image(val_views_input[0, batch],
                                      val_views_sr_gt[0, batch],
                                      val_views_ang_gt[0,
                                                       batch], self.image_wid)
                            for batch in batch_files
                        ]
                        batches = np.array(batches).astype(np.float32)
                        input1 = batches[:, :, :, 0]
                        input1 = np.expand_dims(input1, axis=-1)
                        input2 = batches[:, :, :, 1]
                        input2 = np.expand_dims(input2, axis=-1)
                        input3 = batches[:, :, :, 2]
                        input3 = np.expand_dims(input3, axis=-1)
                        input4 = batches[:, :, :, 3]
                        input4 = np.expand_dims(input4, axis=-1)
                        spa_gt1 = batches[:, :, :, 4]
                        spa_gt1 = np.expand_dims(spa_gt1, axis=-1)
                        spa_gt2 = batches[:, :, :, 5]
                        spa_gt2 = np.expand_dims(spa_gt2, axis=-1)
                        spa_gt3 = batches[:, :, :, 6]
                        spa_gt3 = np.expand_dims(spa_gt3, axis=-1)
                        spa_gt4 = batches[:, :, :, 7]
                        spa_gt4 = np.expand_dims(spa_gt4, axis=-1)

                        ang_gt = batches[:, :, :, -1]
                        ang_gt = np.expand_dims(ang_gt, axis=-1)
                        total_MSE, spa1_MSE, spa2_MSE, spa3_MSE, spa4_MSE, ang_MSE = self.sess.run(
                            [
                                self.loss_fine_views, self.loss_spa1,
                                self.loss_spa2, self.loss_spa3, self.loss_spa4,
                                self.loss_views
                            ],
                            feed_dict={
                                self.train_input1: input1,
                                self.train_input2: input2,
                                self.train_input3: input3,
                                self.train_input4: input4,
                                self.train_spa_gt1: spa_gt1,
                                self.train_spa_gt2: spa_gt2,
                                self.train_spa_gt3: spa_gt3,
                                self.train_spa_gt4: spa_gt4,
                                self.train_ang_gt: ang_gt
                            })

                        val_ang_MSE += ang_MSE
                        val_spa_MSE = spa1_MSE + spa2_MSE + spa3_MSE + spa4_MSE + train_spa_MSE
                        val_total_MSE += total_MSE

            print(
                'Epoch val[%2d] total MSE: %.4f spa MSE: %.4f ang MSE: %.4f \n'
                % (epoch, val_total_MSE /
                   (3 * val_batch_idxs_views), val_spa_MSE /
                   (3 * val_batch_idxs_views), val_ang_MSE /
                   (3 * val_batch_idxs_views)))
            if np.mod(epoch, 100) == 0:
                f_train_epoch.write(
                    'epoch %06d mean_total_MSE %.6f  mean_spa_MSE %.6f mean_ang_MSE %.6f\n'
                    % (epoch, train_total_MSE /
                       (3 * batch_idxs_views), train_spa_MSE /
                       (3 * batch_idxs_views), train_ang_MSE /
                       (3 * batch_idxs_views)))
                f_train_epoch.close()
                f_val.write(
                    'epoch %06d mean_total_MSE %.6f  mean_spa_MSE %.6f mean_ang_MSE %.6f\n'
                    % (epoch, val_total_MSE /
                       (3 * batch_idxs_views), val_spa_MSE /
                       (3 * batch_idxs_views), val_ang_MSE /
                       (3 * batch_idxs_views)))
                f_val.close()
                self.save(config.checkpoint_dir, 0)
예제 #44
0
    def initialize(self, assign_dict):
        # This is where the `self._hidden` map is created.
        # The `tensorflow.Variable`s of the map are initialized
        # to the values given by the user in `assign_dict`.

        if Model._current_model == self:
            raise ModelError(
                "Can't call `model.initialize()` inside the model block")

        if self._observed is None:
            raise ModelError(
                "Can't initialize latent variables before `model.observed()` has been called."
            )

        if self._hidden is not None:
            raise ModelError(
                "Can't call `model.initialize()` twice. Use `model.assign()` to change the state."
            )

        if not isinstance(assign_dict, dict) or not assign_dict:
            raise ValueError(
                "Argument to `model.initialize()` must be a dictionary with more than one element"
            )

        for key in assign_dict.keys():
            if not isinstance(key, tf.Tensor):
                raise ValueError(
                    "Key in the initialization dict is not a tf.Tensor: {}".
                    format(repr(key)))

        hidden = set(self._description.keys()).difference(set(self._observed))
        if hidden != set(assign_dict.keys()):
            raise ModelError(
                "Not all latent variables have been passed in a call to `model.initialize().\n\
                    Missing variables: {}".format(
                    hidden.difference(assign_dict.keys())))

        # Add variables to the execution graph
        with self.session.graph.as_default():
            self._hidden = dict()
            for var in hidden:
                self._hidden[var] = tf.Variable(var.dtype.as_numpy_dtype(
                    assign_dict[var]),
                                                name=var.name.split(':')[0])
        self.session.run(tf.initialize_variables(list(self._hidden.values())))
        # Sort the hidden variables so we can access them in a consistant order
        self._hidden_sorted = sorted(self._hidden.keys(), key=lambda v: v.name)
        for h in self._hidden.values():
            with self.session.graph.as_default():
                var = tf.Variable(h.dtype.as_numpy_dtype(),
                                  name=h.name.split(':')[0] + '_placeholder')
                setter = h.assign(var)
            self._setters[h] = (setter, var)

        all_vars = self._hidden.copy()
        all_vars.update(self._observed)

        self._rewrite_graph(all_vars)

        with self.session.graph.as_default():
            # observed_logps contains one element per data point
            observed_logps = [
                self._get_rewritten(self._description[v].logp)
                for v in self._observed
            ]
            # hidden_logps contains a single value
            hidden_logps = [
                self._get_rewritten(self._description[v].logp)
                for v in self._hidden
            ]

            # Handle the case where we don't have observed variables.
            # We define the probability to not observe anything as 1.
            if not observed_logps:
                observed_logps = [tf.constant(0, dtype=config.dtype)]

            self._pdf = tf.exp(tf.add_n(observed_logps))
            self._nll = -tf.add_n(
                [tf.reduce_sum(logp)
                 for logp in observed_logps] + hidden_logps)

            variables = [self._hidden[k] for k in self._hidden_sorted]
            self._nll_grad = tf.gradients(self._nll, variables)
            for i, (v, g) in enumerate(zip(variables, self._nll_grad)):
                if g is None:
                    self._nll_grad[i] = tf.constant(0, dtype=config.dtype)
                    logger.warn('Model is independent of variable {}'.format(
                        v.name.split(':')[0]))

        self.initialized = True
예제 #45
0
def main(_):
  game = pyspiel.load_game(FLAGS.game)

  # Information state length
  info_state_shape = game.information_state_tensor_shape()
  flat_info_state_length = np.prod(info_state_shape)

  # Output
  num_actions = game.num_distinct_actions()

  with tf.Session() as sess:
    net_input = tf.placeholder(
        tf.float32, [None, flat_info_state_length], name="input")

    # pylint: disable=unused-variable
    output = tf.placeholder(tf.float32, [None, num_actions], name="output")
    legals_mask = tf.placeholder(
        tf.float32, [None, num_actions], name="legals_mask")

    policy_net = tf.layers.dense(net_input, 128, activation=tf.nn.relu)
    policy_net = tf.layers.dense(policy_net, 128, activation=tf.nn.relu)
    policy_net = tf.layers.dense(policy_net, num_actions)

    # Note: subtracting the max here is to help with numerical stability.
    # However, there can still be numerical problems. If you are doing a softmax
    # here, it can return NaN when the max for the policy net is high on one of
    # the illegal actions, because policy_net - max will be small for legal
    # actions, giving all exp(small) == 0 in the denominator, returning NaN at
    # the end. One fix is to set the logits to -inf and define a custom cross
    # entropy op that ignores over the illegal actions.
    policy_net = policy_net - tf.reduce_max(policy_net, axis=-1, keepdims=True)

    masked_exp_logit = tf.multiply(tf.exp(policy_net), legals_mask)
    renormalizing_factor = tf.reduce_sum(
        masked_exp_logit, axis=-1, keepdims=True)
    # pylint: disable=unused-variable
    policy_softmax = tf.where(
        tf.equal(legals_mask, 0.),
        tf.zeros_like(masked_exp_logit),
        tf.divide(masked_exp_logit, renormalizing_factor),
        name="policy_softmax")

    policy_targets = tf.placeholder(shape=[None, num_actions], dtype=tf.float32)

    policy_cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=policy_net, labels=policy_targets),
        axis=0)

    # We make one sample.
    sampled_actions = tf.random.categorical(
        tf.log(policy_softmax), 1, name="sampled_actions")

    # pylint: disable=unused-variable
    optimizer = tf.train.AdamOptimizer(0.0001).minimize(
        policy_cost, name="train")

    # pylint: disable=unused-variable
    init = tf.initialize_variables(tf.all_variables(), name="init_all_vars_op")

    print("Writing file: {}/{}".format(FLAGS.dir, FLAGS.filename))
    tf.train.write_graph(
        sess.graph_def, FLAGS.dir, FLAGS.filename, as_text=False)
예제 #46
0
def _precompute_image_features(img, layers, shape, save_dir):
    # type: (np.ndarray, Union[Tuple[str], List[str]], Union[Tuple[int], List[int]]) -> Dict[str, np.ndarray]
    """
    Precompute the features of the image by passing it through the vgg network and storing the computed layers.
    :param img: the image of which the features would be precomputed. It must have shape (height, width, 3)
    :param layers: A list of string specifying which layers would we be returning. Check vgg.py for layer names.
    :param shape: shape of the image placeholder.
    :param vgg_data: The vgg network represented as a dictionary. It can be obtained by vgg.pre_read_net.
    :param mean_pixel: The mean pixel value for the vgg network. It can be obtained by vgg.read_net or just hardcoded.
    :param use_mrf: Whether we're using mrf loss. If true, it does not calculate and store the gram matrix.
    :param use_semantic_masks: Whether we're using semantic masks. If true, it does not calculate and store the gram
    matrix.
    :return: A dictionary containing the precomputed feature for each layer.
    """
    features_dict = {}
    g = tf.Graph()
    # Choose to use cpu here because we only need to compute this once and using cpu would provide us more memory
    # than the gpu and therefore allow us to process larger style images using the extra memory. This will not have
    # an effect on the training speed later since the gram matrix size is not related to the size of the image.
    with g.as_default(), g.device('/cpu:0'), tf.Session(config=tf.ConfigProto(
            device_count={'GPU': 0})) as sess:

        with tf.name_scope("classifier"):
            with tf.variable_scope("classifier", reuse=False):
                image = tf.placeholder(tf.uint8, shape=shape)
                image_float = tf.image.convert_image_dtype(image,
                                                           dtype=tf.float32)
                vgg = vgg19_mat.Vgg19(
                    vgg19_npy_path='imagenet-vgg-verydeep-19.mat')
                vgg.build(image_float, None)
                net = vgg.net()
                style_pre = np.array([img])
                style_pre = style_pre.astype(np.uint8)

                if '0.12.0' in tf.__version__:
                    all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
                else:
                    all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)

                if save_dir is not None:
                    discrim_tvars = [
                        var for var in all_vars
                        if var.name.startswith("classifier")
                    ]
                    saver = tf.train.Saver(discrim_tvars)

                    ckpt = tf.train.get_checkpoint_state(save_dir)
                    if ckpt and ckpt.model_checkpoint_path:
                        saver.restore(sess, ckpt.model_checkpoint_path)
                    else:
                        raise AssertionError(
                            "Cannot load from save directory.")
                    #
                    var_not_saved = [
                        item for item in all_vars if item not in discrim_tvars
                    ]
                    print('Var not saved', var_not_saved)
                    sess.run(tf.initialize_variables(var_not_saved))
                else:
                    sess.run(tf.initialize_all_variables())

                for layer in layers:
                    # Calculate and store gramian.
                    features = net[layer].eval(feed_dict={image: style_pre})
                    features = np.reshape(features, (-1, features.shape[3]))
                    gram = np.matmul(features.T, features) / features.size
                    features_dict[layer] = gram
    return features_dict
예제 #47
0
def stylize(network,
            content,
            styles,
            shape,
            iterations,
            save_dir=None,
            content_weight=5.0,
            style_weight=100.0,
            tv_weight=100.0,
            style_blend_weights=None,
            learning_rate=10.0,
            initial=None,
            use_mrf=False,
            use_semantic_masks=False,
            mask_resize_as_feature=True,
            output_semantic_mask=None,
            style_semantic_masks=None,
            semantic_masks_weight=1.0,
            print_iterations=None,
            checkpoint_iterations=None,
            semantic_masks_num_layers=4,
            content_img_style_weight_mask=None):
    """
    Stylize images.
    :param network: Path to pretrained vgg19 network. It can be downloaded at
    http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat
    :param content: The content image. If left blank, it will enter texture generation mode (style synthesis without
    context loss).
    :param styles: A list of style images as numpy arrays.
    :param shape: The shape of the output image. It should be with format (1, height, width, 3)
    :param iterations: The number of iterations to run.
    :param content_weight: The weight for content loss. The larger the weight, the more the output will look like
    the content image.
    :param style_weight: The weight for style loss. The larger the weight, the more the output will have a style that
    looks like the style images.
    :param tv_weight: The weight for total-variation loss. The larger the weight, the smoother the output will be.
    :param style_blend_weights: If inputting multiple style images, this controls the balance between their styles.
    If left as None, it will treat all style images as equal.
    :param learning_rate: As name suggests.
    :param initial: The initial starting point for the output. If left blank, the initial would just be noise.
    :param use_mrf: Whether we use markov-random-field loss instead of gramian loss. mrf_util.py contains more info.
    :param use_semantic_masks: Whether we use semantic masks as additional semantic information. Please check the paper
    "Semantic Style Transfer and Turning Two-Bit Doodles into Fine Artworks" for more information.
    :param mask_resize_as_feature: If true, resize the mask and use the resized mask as additional feature besides the
    vgg network layers. If false, pass the masks (must have exactly 3 masks) into the vgg network and use the outputted
    layers as additional features.
    :param output_semantic_mask: The semantic masks you would like to apply to the outputted image.The mask should have
    shape (batch_size, height, width, semantic_masks_num_layers) Unlike the neural doodle paper, here I use one
    black-and-white image for each semantic mask (the paper had semantic masks represented as rgb images, limiting the
    semantic channels to 3).
    :param style_semantic_masks: A list of semantic masks you would like to apply to each style image. The mask should
    have shape (batch_size, height, width, semantic_masks_num_layers)
    :param semantic_masks_weight: How heavily you'd like to weight the semantic masks as compared to other sources of
    semantic information obtained through passing the image through vgg network. Default is 1.0.
    :param print_iterations: Print loss information every n iterations.
    :param checkpoint_iterations: Save a checkpoint as well as the best image so far every n iterations.
    :param semantic_masks_num_layers: The number of semantic masks each image have.
    :param content_img_style_weight_mask: One black-and-white mask specifying how much we should "stylize" each pixel
    in the outputted image. The areas where the mask has higher value would be stylized more than other areas. A
    completely white mask would mean that we stylize the output image just as before, while a completely dark mask
    would mean that we do not stylize the output image at all, so it should look pretty much the same as content image.
    If you do not wish to use this feature, just leave it as None.
    :return: a tuple where the first item is either the current iteration or None, indicating it has finished training.
    The second item is the image that has the lowest loss so far. The tuples are yielded every 'checkpoint_iterations'
    iterations as well as the last iteration.
    :rtype: iterator[tuple[int|None,image]]
    """
    global STYLE_LAYERS
    if content is not None:
        STYLE_LAYERS = STYLE_LAYERS_WITH_CONTENT
    if use_mrf:
        raise NotImplementedError
        STYLE_LAYERS = STYLE_LAYERS_MRF  # Easiest way to be compatible with no-mrf versions.
    if use_semantic_masks:
        raise NotImplementedError
        assert semantic_masks_weight is not None
        assert output_semantic_mask is not None
        assert style_semantic_masks is not None
    if content_img_style_weight_mask is not None:
        if shape[1] != content_img_style_weight_mask.shape[1] or shape[
                2] != content_img_style_weight_mask.shape[2]:
            raise AssertionError(
                "The shape of style_weight_mask is incorrect. It must have the same height and width "
                "as the output image. The output image has shape: %s and the style weight mask has "
                "shape: %s" %
                (str(shape), str(content_img_style_weight_mask.shape)))
        if content_img_style_weight_mask.dtype != np.float32:
            raise AssertionError(
                'The dtype of style_weight_mask must be float32. it is now %s'
                % str(content_img_style_weight_mask.dtype))

    # Append a (1,) in front of the shapes of the style images. So the style_shapes contains (1, height, width, 3).
    # 3 corresponds to rgb.
    style_shapes = [(1, ) + style.shape for style in styles]
    if style_blend_weights is None:
        style_blend_weights = [1.0 / len(styles) for _ in styles]
    content_features = {}
    style_features = [{} for _ in styles]
    output_semantic_mask_features = {}

    for i in range(len(styles)):
        # Using precompute_image_features, which calculates on cpu and thus allow larger images.
        style_features[i] = _precompute_image_features(styles[i], STYLE_LAYERS,
                                                       style_shapes[i],
                                                       save_dir)

    # The default behavior of tensorflow was to allocate all gpu memory. Here it is set to only use as much gpu memory
    # as it needs.
    # TODO: CHANGE IT BACK< USING CPU NOW
    # tf_config = tf.ConfigProto(gpu_options=tf.GPUOptions(device_count = {'GPU': 1})) #
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.per_process_gpu_memory_fraction = 0.45
    with tf.Graph().as_default(), tf.Session(config=tf_config) as sess:

        # with tf.name_scope("classifier"):
        # Compute content features in feed-forward mode
        content_image = tf.placeholder(tf.uint8,
                                       shape=shape,
                                       name='content_image')
        content_image_float = tf.image.convert_image_dtype(content_image,
                                                           dtype=tf.float32)

        with tf.variable_scope("classifier", reuse=False):
            vgg_c = vgg19_mat.Vgg19(
                vgg19_npy_path='imagenet-vgg-verydeep-19.mat')
            vgg_c.build(content_image_float, None)
            net_c = vgg_c.net()
        content_features[CONTENT_LAYER] = net_c[CONTENT_LAYER]

        if content is not None:
            # content_pre = np.array([vgg.preprocess(content, mean_pixel)])
            content_pre = np.array([content])
            content_pre = content_pre.astype(dtype=np.uint8)

        # Compute style features in feed-forward mode.
        if content_img_style_weight_mask is not None:
            style_weight_mask_layer_dict = neural_doodle_util.masks_average_pool(
                content_img_style_weight_mask)

        if initial is None:
            initial = tf.random_normal(shape) * 0.001
        else:
            # initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = np.array([initial])
            initial = initial.astype('float32')
        # image = tf.Variable(initial)
        # image_uint8 = tf.cast(image, tf.uint8)
        # image_float = tf.image.convert_image_dtype(image_uint8,dtype=tf.float32) * 2 - 1

        image_float = tf.Variable(initial)
        image = tf.image.convert_image_dtype(image_float,
                                             dtype=tf.uint8,
                                             saturate=True)

        with tf.variable_scope("classifier", reuse=True):
            vgg_o = vgg19_mat.Vgg19(
                vgg19_npy_path='imagenet-vgg-verydeep-19.mat')
            vgg_o.build(image_float, None)
            net_o = vgg_o.net()

        # content loss
        _, height, width, number = map(
            lambda i: i.value, content_features[CONTENT_LAYER].get_shape())
        content_features_size = height * width * number
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net_o[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                                         content_features_size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                if content_img_style_weight_mask is not None:
                    # Apply_style_weight_mask_to_feature_layer, then normalize with average of that style weight mask.
                    layer = neural_doodle_util.vgg_layer_dot_mask(style_weight_mask_layer_dict[style_layer], layer) \
                            / (tf.reduce_mean(style_weight_mask_layer_dict[style_layer]) + 0.000001)

                if use_mrf:
                    if use_semantic_masks:
                        # TODO: Compare the effect of concatenate masks to vgg layers versus dotting them with vgg
                        # layers. If you change this to dot, don't forget to also change that in neural_doodle_util.
                        layer = neural_doodle_util.concatenate_mask_layer_tf(
                            output_semantic_mask_features[style_layer], layer)
                        # layer = neural_doodle_util.vgg_layer_dot_mask(output_semantic_mask_features[style_layer], layer)
                    style_losses.append(
                        mrf_loss(style_features[i][style_layer],
                                 layer,
                                 name='%d%s' % (i, style_layer)))
                else:
                    if use_semantic_masks:
                        gram = neural_doodle_util.gramian_with_mask(
                            layer, output_semantic_mask_features[style_layer])
                    else:
                        gram = neural_util.gramian(layer)
                    style_gram = style_features[i][style_layer]
                    style_gram_size = get_np_array_num_elements(style_gram)
                    style_losses.append(
                        tf.nn.l2_loss(gram - style_gram) / style_gram_size
                    )  # TODO: Check normalization constants. the style loss is way too big compared to the other two.
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)
        # total variation denoising
        tv_loss = tf.mul(neural_util.total_variation(image_float), tv_weight)

        # overall loss
        if content is None:  # If we are doing style/texture regeration only.
            loss = style_loss + tv_loss
        else:
            loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(
            loss, var_list=[image_float])

        def print_progress(i, feed_dict, last=False):
            stderr.write('Iteration %d/%d\n' % (i + 1, iterations))
            if last or (print_iterations is not None and print_iterations != 0
                        and i % print_iterations == 0):
                if content is not None:
                    stderr.write('  content loss: %g\n' %
                                 content_loss.eval(feed_dict=feed_dict))
                stderr.write('    style loss: %g\n' %
                             style_loss.eval(feed_dict=feed_dict))
                stderr.write('       tv loss: %g\n' %
                             tv_loss.eval(feed_dict=feed_dict))
                stderr.write('    total loss: %g\n' %
                             loss.eval(feed_dict=feed_dict))

        # Load classifier weight.

        if '0.12.0' in tf.__version__:
            all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        else:
            all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)

        if save_dir is not None:
            # discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("classifier")]
            discrim_tvars = [
                var for var in all_vars if var.name.startswith("classifier")
            ]
            saver = tf.train.Saver(discrim_tvars)
            ckpt = tf.train.get_checkpoint_state(save_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise AssertionError("Cannot load from save directory.")
            var_not_saved = [
                item for item in all_vars if item not in discrim_tvars
            ]
            print('Var not saved', var_not_saved)
            sess.run(tf.initialize_variables(var_not_saved))
        else:
            sess.run(tf.initialize_all_variables())

        # optimization
        best_loss = float('inf')
        best = np.zeros(shape=shape)
        feed_dict = {}
        if content is not None:
            feed_dict[content_image] = content_pre
        # sess.run(tf.initialize_all_variables(), feed_dict=feed_dict)
        for i in range(iterations):
            last_step = (i == iterations - 1)
            print_progress(i, feed_dict, last=last_step)
            train_step.run(feed_dict=feed_dict)

            if (checkpoint_iterations
                    and i % checkpoint_iterations == 0) or last_step:
                this_loss = loss.eval(feed_dict=feed_dict)
                if this_loss < best_loss:
                    best_loss = this_loss
                    best = image.eval()
                # yield (
                #     (None if last_step else i),
                #     vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                # )
                # print(best)
                best_float32 = image_float.eval()
                # print(best_float32)
                best_str, = sess.run(
                    [tf.image.encode_png(best[0], name="input_pngs")])

                # yield (
                #     (None if last_step else i),
                #     best.reshape(shape[1:])
                # )
                yield ((None if last_step else i), best_str)
예제 #48
0
                                  beta1=0.9,
                                  beta2=0.999,
                                  epsilon=1e-08,
                                  use_locking=False).minimize(cost,
                                                              var_list=train_params[parameters:]
                                                              )

uninitialized_vars = []
for var in tf.all_variables():
    try:
        sess.run(var)
    except tf.errors.FailedPreconditionError:
        uninitialized_vars.append(var)


init_new_vars_op = tf.initialize_variables(uninitialized_vars)
sess.run(init_new_vars_op)


log('TensorFlow Session starting...')

# TensorBoard summary (graph)
tf.summary.scalar('cost', cost)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('./tensorboard_test')
writer.add_graph(sess.graph)
log('TensorBoard infos in ./tensorboard_test')

# Save path depending on the training behaviour
if not args.transfer_model and args.transfer_cnn:
    save_path = args.save_dir+'/cnn_s2p_' + appliance_name + '_transf_' + args.cnn + '_pointnet_model'
예제 #49
0
def run(args, server):
    env = create_env(args.env_id,
                     client_id=str(args.task),
                     remotes=args.remotes,
                     envWrap=args.envWrap,
                     designHead=args.designHead,
                     noLifeReward=args.noLifeReward)

    # set one task to one cpu
    config = tf.ConfigProto(device_filters=[
        "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)
    ])
    logdir = os.path.join(args.log_dir, 'train')

    if use_tf12_api:
        summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task)
    else:
        summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task)

    trainer = A3C(env, args.task, args.visualise, args.unsup, summary_writer,
                  args.envWrap, args.designHead, args.noReward)

    # logging
    if args.task == 0:
        with open(args.log_dir + '/log.txt', 'w') as fid:
            for key, val in constants.items():
                fid.write('%s: %s\n' % (str(key), str(val)))
            fid.write('designHead: %s\n' % args.designHead)
            fid.write('input observation: %s\n' %
                      str(env.observation_space.shape))
            fid.write('env name: %s\n' % str(env.spec.id))
            fid.write('unsup method type: %s\n' % str(args.unsup))

    # Variable names that start with "local" are not saved in checkpoints.
    if use_tf12_api:
        variables_to_save = [
            v for v in tf.global_variables() if not v.name.startswith("local")
        ]
        init_op = tf.variables_initializer(variables_to_save)
        init_all_op = tf.global_variables_initializer()
    else:
        variables_to_save = [
            v for v in tf.all_variables() if not v.name.startswith("local")
        ]
        init_op = tf.initialize_variables(variables_to_save)
        init_all_op = tf.initialize_all_variables()
    saver = FastSaver(variables_to_save)

    if args.pretrain is not None:
        variables_to_restore = [
            v for v in tf.trainable_variables()
            if not v.name.startswith("local")
        ]
        pretrain_saver = FastSaver(variables_to_restore)
        pretrain = tf.train.latest_checkpoint(args.pretrain)

    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 tf.get_variable_scope().name)
    logger.info('Trainable vars:')
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run(init_all_op)
        if args.pretrain is not None:
            variables_to_restore = [
                v for v in tf.trainable_variables()
                if not v.name.startswith("local")
            ]
            pretrain = tf.train.latest_checkpoint(args.pretrain)
            print(args.pretrain)
            print(pretrain_saver)
            print(pretrain)
            logger.info("----------------------------------------")
            logger.info("----------------------------------------")
            logger.info("----------------------------------------")
            logger.info("----------------------------------------")
            logger.info("----------------------------------------")
            logger.info("==> Restoring from given pretrained checkpoint.")
            logger.info("    Pretraining address: %s", pretrain)
            pretrain_saver.restore(ses, pretrain)
            logger.info("==> Done restoring model! Restored %d variables.",
                        len(variables_to_restore))
            logger.info("----------------------------------------")
            logger.info("----------------------------------------")
            logger.info("----------------------------------------")
            logger.info("----------------------------------------")
            logger.info("----------------------------------------")

    '''
    # set one task to one cpu
    config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)])
    logdir = os.path.join(args.log_dir, 'train')

    if use_tf12_api:
        summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task)
    else:
        summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task)
    '''

    logger.info("Events directory: %s_%s", logdir, args.task)
    sv = tf.train.Supervisor(
        is_chief=(args.task == 0),
        logdir=logdir,
        saver=saver,
        summary_op=None,
        init_op=init_op,
        init_fn=init_fn,
        summary_writer=summary_writer,
        ready_op=tf.report_uninitialized_variables(variables_to_save),
        global_step=trainer.global_step,
        save_model_secs=30,
        save_summaries_secs=30)

    num_global_steps = constants['MAX_GLOBAL_STEPS']

    logger.info(
        "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. "
        +
        "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified."
    )
    with sv.managed_session(server.target,
                            config=config) as sess, sess.as_default():
        # Workaround for FailedPreconditionError
        # see: https://github.com/openai/universe-starter-agent/issues/44 and 31
        sess.run(trainer.sync)

        trainer.start(sess, summary_writer)
        global_step = sess.run(trainer.global_step)
        logger.info("-------Starting training at gobal_step=%d", global_step)
        while not sv.should_stop() and (not num_global_steps
                                        or global_step < num_global_steps):
            #print("-------Start at:" + str(trainer.global_step) + ", to:" + str(num_global_steps))
            trainer.process(sess)
            global_step = sess.run(trainer.global_step)

    # Ask for all the services to stop.
    sv.stop()
    logger.info('reached %s steps. worker stopped.', global_step)
예제 #50
0
def render_vis(model,
               objective_f,
               param_f=None,
               optimizer=None,
               transforms=None,
               thresholds=(512, ),
               print_objectives=None,
               verbose=False,
               model_name_scope='encode'):
    """Flexible optimization-based feature vis.

  There's a lot of ways one might wish to customize optimization-based
  feature visualization. It's hard to create an abstraction that stands up
  to all the things one might wish to try.

  This function probably can't do *everything* you want, but it's much more
  flexible than a naive attempt. The basic abstraction is to split the problem
  into several parts. Consider the arguments:

  Args:
    model: The model to be visualized, from Alex' modelzoo.
    objective_f: The objective our visualization maximizes.
      See the objectives module for more details.
    param_f: Paramaterization of the image we're optimizing.
      See the paramaterization module for more details.
      Defaults to a naively paramaterized [1, 128, 128, 3] image.
    optimizer: Optimizer to optimize with. Either tf.train.Optimizer instance,
      or a function from (graph, sess) to such an instance.
      Defaults to Adam with lr .05.
    transforms: A list of stochastic transformations that get composed,
      which our visualization should robustly activate the network against.
      See the transform module for more details.
      Defaults to [transform.jitter(8)].
    thresholds: A list of numbers of optimization steps, at which we should
      save (and display if verbose=True) the visualization.
    print_objectives: A list of objectives separate from those being optimized,
      whose values get logged during the optimization.
    verbose: Should we display the visualization when we hit a threshold?
      This should only be used in IPython.

  Returns:
    2D array of optimization results containing of evaluations of supplied
    param_f snapshotted at specified thresholds. Usually that will mean one or
    multiple channel visualizations stacked on top of each other.
  """

    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(
        allow_soft_placement=True,
        gpu_options=gpu_options,
    )
    with tf.Graph().as_default() as graph, tf.Session(config=config) as sess:

        T = make_vis_T(model, objective_f, param_f, optimizer, transforms)
        loss, vis_op, t_image = T("loss"), T("vis_op"), T("input")
        added_vars = [x for x in tf.global_variables() \
                      if not x.op.name.startswith(model_name_scope)]
        init_new_vars_op = tf.initialize_variables(added_vars)
        init_new_vars_op.run()

        images = []
        all_losses = []
        for i in tqdm(range(max(thresholds) + 1)):
            loss_, _ = sess.run([loss, vis_op])
            all_losses.append(loss_)
            if i in thresholds:
                vis = t_image.eval()
                images.append(vis)
                if verbose:
                    print(i, loss_)
        return t_image.eval(), all_losses
    sess.run(s_assign)
    style_features = [0 for i in range(5)]
    style_features = sess.run(
        [vgg.conv1_1, vgg.conv2_1, vgg.conv3_1, vgg.conv4_1, vgg.conv5_1],
        feed_dict={vgg.imgs: [style_img]})

    c_assign = vgg.imgs_update.assign(np.asarray([content_img]).astype(float))
    sess.run(c_assign)
    content_features = sess.run(vgg.conv5_2,
                                feed_dict={vgg.imgs: [content_img]})

    result_img = np.zeros((1, 224, 224, 3)).tolist()
    # r_assign = vgg.imgs_update.assign(np.asarray(result_img).astype(float))
    # sess.run(r_assign)
    vgg.transfer_style(content_features, style_features)

    sess.run(tf.initialize_variables(set(tf.all_variables()) - vgg.temp))

    for i in range(1000):
        loss = sess.run(vgg.loss, feed_dict={vgg.imgs: result_img})
        print("iteration", i, "loss", loss)
        update = sess.run(vgg.train_step, feed_dict={vgg.imgs: result_img})

    result_img = sess.run(vgg.imgs_update, feed_dict={vgg.imgs: result_img})

    # import skimage.io as io
    x = np.asarray(result_img[0]).astype(np.uint8)
    # io.imshow(x)
    # io.show()

    imsave('output.jpg', x)
예제 #52
0
    def restart_units(self):

        self.restart_op = tf.initialize_variables([self.v, self.u])
예제 #53
0
    for grad, var in grads:
        if (var.name == "sp_w_fc1:0"):
            idx_in1 = tf.cast(tf.constant(idx_fc1), tf.float32)
            grads[count] = (tf.multiply(idx_in1, grad), var)
        if (var.name == "sp_w_fc2:0"):
            idx_in2 = tf.cast(tf.constant(idx_fc2), tf.float32)
            grads[count] = (tf.multiply(idx_in2, grad), var)
        count += 1
    train_step = trainer.apply_gradients(grads)

    correct_prediction = tf.equal(tf.argmax(logit, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    for var in tf.all_variables():
        if sess.run(tf.is_variable_initialized(var)) == False:
            sess.run(tf.initialize_variables([var]))

    for i in range(20000):
        batch = mnist.train.next_batch(50)
        idx_in1_value = sess.run(idx_in1)
        grads_fc1_value = sess.run(grads,
                                   feed_dict={
                                       x: batch[0],
                                       y_: batch[1],
                                       keep_prob: 0.5
                                   })
        if i % 100 == 0:
            train_acc = sess.run(accuracy,
                                 feed_dict={
                                     x: batch[0],
                                     y_: batch[1],
예제 #54
0
 def _initialize_variables(self):
     uninitialized_var_names = [bytes.decode(var) for var in self._sess.run(tf.report_uninitialized_variables())]
     uninitialized_vars = [var for var in tf.global_variables() if var.name.split(':')[0] in uninitialized_var_names]
     self._sess.run(tf.initialize_variables(uninitialized_vars))
def color_sketches_net(height,
                       width,
                       iterations,
                       batch_size,
                       content_weight,
                       tv_weight,
                       learning_rate,
                       generator_network='unet',
                       use_adversarial_net=False,
                       use_hint=False,
                       adv_net_weight=1.0,
                       weight_decay_lambda=1e-5,
                       sketch_reconstruct_weight=10.0 / 255.0,
                       print_iterations=None,
                       checkpoint_iterations=None,
                       save_dir="model/",
                       do_restore_and_generate=False,
                       do_restore_and_train=False,
                       restore_from_noadv_to_adv=False,
                       preprocessed_folder=None,
                       preprocessed_file_path_list=None,
                       content_preprocessed_folder=None,
                       color_rebalancing_folder=None,
                       from_screenshot=False,
                       from_webcam=False,
                       test_img_dir=None,
                       test_img_hint=None,
                       input_mode='sketch',
                       output_mode='rgb',
                       use_cpu=False):
    """
    Stylize images.
    TODO: modify the description.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :param: lr_decay_steps: learning rate decays by lr_decay_rate after lr_decay steps.
    Default per https://arxiv.org/abs/1603.03417
    :param: min_lr: The minimum learning rate. Default per https://arxiv.org/abs/1603.03417
    :param: lr_decay_rate: learning rate decays by lr_decay_rate after lr_decay steps.
    Default per https://arxiv.org/abs/1603.03417
    :param: use_semantic_masks: If it is true, the input to the generator network will be the semantic masks instead
    of the content image. The content image will serve as ground truth for loss (I haven't decided whether to use content
    or style loss).
    :rtype: iterator[tuple[int|None,image]]
    """

    # Before training, make sure everything is set correctly.
    if use_hint:
        assert test_img_hint is not None
    height, width = get_compatible_shape(height, width)
    input_shape = (1, height, width, 3)
    print(
        'The input shape is: %s. Input mode is: %s. Output mode is: %s. Using %s generator network'
        % (str(input_shape), input_mode, output_mode, generator_network))

    content_img_preprocessed = None
    sketches_preprocessed = None
    prev_content_preprocessed_file_i = 0

    # Define tensorflow placeholders and variables.
    with tf.Graph().as_default():
        input_images = tf.placeholder(
            tf.float32,
            shape=[
                batch_size, input_shape[1], input_shape[2],
                1 if generator_network != 'lnet' else 3
            ],
            name='input_sketches' if input_mode == 'sketch' else 'input_bw')

        if use_hint:
            input_hint = tf.placeholder(
                tf.float32,
                shape=[batch_size, input_shape[1], input_shape[2], 3],
                name='input_hint')
            input_concatenated = tf.concat(3, (input_images, input_hint))
            if generator_network == 'unet_color':
                assert input_mode == 'sketch' or (input_mode == 'raw_sketch'
                                                  and do_restore_and_generate)
                color_output = unet_color_util.net(input_concatenated)
                sketch_output = lnet_util.net(
                    (color_output - 128) / 128
                ) * 255  # This is the reconstructed sketch from the color output.
            elif generator_network == 'lnet':
                assert input_mode == 'color' and not use_adversarial_net and not use_hint
                # This step is not necessary but kept to be in sync with chainer repo.
                input_concatenated = (input_concatenated - 128) / 128
                color_output = lnet_util.net(input_concatenated,
                                             trainable=True) * 255
            elif generator_network == 'backprop':
                assert input_mode == 'sketch'
                color_output = tf.get_variable(
                    'backprop_input_var',
                    shape=[batch_size, input_shape[1], input_shape[2], 3],
                    initializer=tf.random_normal_initializer(
                        mean=128, stddev=10.0)) + 0 * input_images
                sketch_output = lnet_util.net(
                    (color_output - 128) / 128
                ) * 255  # This is the reconstructed sketch from the color output.
            else:
                # TODO: change the error message.
                raise AssertionError(
                    "Please input a valid generator network name. Possible options are: TODO. Got: %s"
                    % (generator_network))

        else:
            if generator_network == 'unet_color':
                assert input_mode == 'sketch' or (input_mode == 'raw_sketch'
                                                  and do_restore_and_generate)
                color_output = unet_color_util.net(input_images)
                sketch_output = lnet_util.net(
                    (color_output - 128) / 128
                ) * 255  # This is the reconstructed sketch from the color output.
            elif generator_network == 'lnet':
                assert input_mode == 'color' and not use_adversarial_net and not use_hint
                # This step is not necessary but kept to be in sync with chainer repo.
                input_images = (input_images - 128) / 128
                color_output = lnet_util.net(input_images,
                                             trainable=True) * 255
            elif generator_network == 'backprop':
                assert input_mode == 'sketch'
                color_output = tf.get_variable(
                    'backprop_input_var',
                    shape=[batch_size, input_shape[1], input_shape[2], 3],
                    initializer=tf.random_normal_initializer(
                    )) + 0 * input_images
                sketch_output = lnet_util.net(
                    (color_output - 128) / 128
                ) * 255  # This is the reconstructed sketch from the color output.
            else:
                raise AssertionError(
                    "Please input a valid generator network name. Possible options are: TODO. Got: %s"
                    % (generator_network))

        generator_all_var = unet_util.get_net_all_variables()
        sketch_reconstruct_all_var = lnet_util.get_net_all_variables()

        if not do_restore_and_generate:
            assert preprocessed_folder is not None and preprocessed_file_path_list is not None and \
                   preprocessed_folder[-1] == '/'
            learning_rate_init = tf.constant(learning_rate)
            learning_rate_var = tf.get_variable(name='learning_rate_var',
                                                trainable=False,
                                                initializer=learning_rate_init)
            color_expected_output = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size, input_shape[1], input_shape[2],
                    3 if generator_network != 'lnet' else 1
                ],
                name='color_expected_output')
            # Use the mean difference loss. Used to use tf.nn.l2_loss. Don't know how big of a difference that makes.
            # color_loss_non_adv =tf.nn.l2_loss(color_output - color_expected_output) / batch_size
            color_loss_non_adv = tf.reduce_mean(
                tf.abs(color_output - color_expected_output))
            weight_decay_loss_non_adv = conv_util.weight_decay_loss(
                scope='unet') * weight_decay_lambda
            # This is only for unet_color, not for training the lnet,
            sketch_expected_output = lnet_util.net(
                (color_expected_output - 128) / 128, reuse=True) * 255
            sketch_reconstruct_loss_non_adv = tf.reduce_mean(
                tf.abs(sketch_output -
                       sketch_expected_output)) * sketch_reconstruct_weight

            generator_loss_non_adv = color_loss_non_adv + weight_decay_loss_non_adv + sketch_reconstruct_loss_non_adv
            # tv_loss = tv_weight * total_variation(image)

            if use_adversarial_net:
                adv_net_input = tf.placeholder(
                    tf.float32,
                    shape=[batch_size, input_shape[1], input_shape[2], 3],
                    name='adv_net_input')
                adv_net_prediction_image_input = adv_net_util.net(
                    adv_net_input)
                adv_net_prediction_generator_input = adv_net_util.net(
                    color_output, reuse=True)
                adv_net_all_var = adv_net_util.get_net_all_variables()

                weight_decay_loss_adv = conv_util.weight_decay_loss(
                    scope='adv_net') * weight_decay_lambda

                logits_from_i = adv_net_prediction_image_input
                logits_from_g = adv_net_prediction_generator_input

                # One represent labeling the image as coming from real image. Zero represent labeling it as generated.
                adv_loss_from_i = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits_from_i, tf.ones(
                            [batch_size], dtype=tf.int64))) * adv_net_weight
                adv_loss_from_g = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits_from_g, tf.zeros(
                            [batch_size], dtype=tf.int64))) * adv_net_weight

                adv_loss = adv_loss_from_i + adv_loss_from_g + weight_decay_loss_adv
                generator_loss_through_adv = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits_from_g, tf.ones(
                            [batch_size], dtype=tf.int64))) * adv_net_weight
                # Beta1 = 0.5 according to dcgan paper
                adv_train_step = tf.train.AdamOptimizer(
                    learning_rate_var, beta1=0.5,
                    beta2=0.999).minimize(adv_loss, var_list=adv_net_all_var)
                # adv_train_step_i = tf.train.AdamOptimizer(learning_rate_var, beta1=0.5,
                #                        beta2=0.999).minimize(adv_loss_from_i, var_list=adv_net_all_var)
                # adv_train_step_g = tf.train.AdamOptimizer(learning_rate_var, beta1=0.5,
                #                        beta2=0.999).minimize(adv_loss_from_g, var_list=adv_net_all_var)
                generator_train_step_through_adv = tf.train.AdamOptimizer(
                    learning_rate_var, beta1=0.5,
                    beta2=0.999).minimize(generator_loss_through_adv,
                                          var_list=generator_all_var)
                generator_train_step = tf.train.AdamOptimizer(
                    learning_rate_var, beta1=0.9,
                    beta2=0.999).minimize(generator_loss_non_adv)

                with tf.control_dependencies(
                    [generator_train_step_through_adv, generator_train_step]):
                    generator_both_train = tf.no_op(
                        name='generator_both_train')

                adv_loss_real_sum = scalar_summary("adv_loss_real",
                                                   adv_loss_from_i)
                adv_loss_fake_sum = scalar_summary("adv_loss_fake",
                                                   adv_loss_from_g)
                adv_loss_weight_decay_sum = scalar_summary(
                    "adv_loss_weight_decay", weight_decay_loss_adv)

                generator_loss_through_adv_sum = scalar_summary(
                    "g_loss_through_adv", generator_loss_through_adv)
                adv_loss_sum = scalar_summary("adv_loss", adv_loss)
                generator_loss_l2_sum = scalar_summary(
                    "generator_loss_non_adv", generator_loss_non_adv)
                generator_loss_weight_decay_sum = scalar_summary(
                    "generator_loss_weight_decay", weight_decay_loss_non_adv)
                sketch_reconstruct_loss_non_adv_sum = scalar_summary(
                    "sketch_reconstruct_loss_non_adv",
                    sketch_reconstruct_loss_non_adv)

                g_sum = merge_summary([
                    generator_loss_through_adv_sum, generator_loss_l2_sum,
                    generator_loss_weight_decay_sum,
                    sketch_reconstruct_loss_non_adv_sum
                ])
                adv_sum = merge_summary([
                    adv_loss_fake_sum, adv_loss_real_sum,
                    adv_loss_weight_decay_sum, adv_loss_sum
                ])
            else:
                # optimizer setup
                # Training using adam optimizer. Setting comes from https://arxiv.org/abs/1610.07629.
                generator_train_step = tf.train.AdamOptimizer(
                    learning_rate_var, beta1=0.9,
                    beta2=0.999).minimize(generator_loss_non_adv)
                generator_loss_l2_sum = scalar_summary("color_loss_non_adv",
                                                       generator_loss_non_adv)
                generator_loss_weight_decay_sum = scalar_summary(
                    "generator_loss_weight_decay", weight_decay_loss_non_adv)
                sketch_reconstruct_loss_non_adv_sum = scalar_summary(
                    "sketch_reconstruct_loss_non_adv",
                    sketch_reconstruct_loss_non_adv)
                g_sum = merge_summary([
                    generator_loss_l2_sum, generator_loss_weight_decay_sum,
                    sketch_reconstruct_loss_non_adv_sum
                ])

            def print_progress(i,
                               feed_dict,
                               adv_feed_dict,
                               start_time,
                               total_iterations,
                               last=False):
                stderr.write('Iteration %d/%d\n' % (i + 1, iterations))
                if last or (print_iterations and i % print_iterations == 0):
                    current_time = time.time()
                    if i > 0:
                        seconds_passed = current_time - start_time
                        seconds_remaining = float(total_iterations -
                                                  i) / i * seconds_passed
                        m, s = divmod(seconds_remaining, 60)
                        h, m = divmod(m, 60)
                        stderr.write(
                            'Estimated time remaining: "%d:%02d:%02d"' %
                            (h, m, s))
                    stderr.write('Learning rate %f\n' %
                                 (learning_rate_var.eval()))
                    # TODO: change this
                    stderr.write(
                        ' generator l2 loss: %g\n' %
                        generator_loss_non_adv.eval(feed_dict=feed_dict))
                    stderr.write('       sketch loss: %g\n' %
                                 sketch_reconstruct_loss_non_adv.eval(
                                     feed_dict=feed_dict))
                    if not generator_network == 'backprop':
                        stderr.write('  w decay gen loss: %g\n' %
                                     weight_decay_loss_non_adv.eval(
                                         feed_dict=feed_dict))
                    # if generator_network == 'unet_both' or generator_network == 'colorful_img_both':
                    #     stderr.write('           bw loss: %g\n' % color_loss_non_adv.eval(feed_dict=feed_dict))
                    # stderr.write('           ab loss: %g\n' % ab_loss_non_adv.eval(feed_dict=feed_dict))
                    if use_adversarial_net:
                        stderr.write(
                            '   adv_from_i loss: %g\n' %
                            adv_loss_from_i.eval(feed_dict=adv_feed_dict))
                        stderr.write(
                            '   adv_from_g loss: %g\n' %
                            adv_loss_from_g.eval(feed_dict=adv_feed_dict))
                        stderr.write('generator adv loss: %g\n' %
                                     generator_loss_through_adv.eval(
                                         feed_dict=adv_feed_dict))
                        stderr.write('  w decay adv loss: %g\n' %
                                     weight_decay_loss_adv.eval(
                                         feed_dict=adv_feed_dict))

        # Optimization
        # It used to track and record only the best one with lowest loss. This is no longer necessary and I think
        # just recording the one generated at each round will make it easier to debug.
        best_image = None
        start_time = time.time()
        if restore_from_noadv_to_adv and use_adversarial_net:
            saver = tf.train.Saver(generator_all_var + [learning_rate_var])
        else:
            saver = tf.train.Saver()

        if use_cpu:
            config = tf.ConfigProto(device_count={'GPU': 0})
        else:
            config = None
        with tf.Session(config=config) as sess:
            if do_restore_and_generate:
                assert batch_size == 1
                ckpt = tf.train.get_checkpoint_state(save_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                else:
                    stderr("No checkpoint found. Exiting program")
                    return

                if from_screenshot:
                    # This is the x and y offset, the coordinate where we start capturing screen shot.
                    kScreenX = 300
                    kScreenY = 300
                elif from_webcam:
                    cap = cv2.VideoCapture(0)
                    # Set width and height.
                    ret = cap.set(3, 1280)
                    ret = cap.set(4, 960)
                    ret, frame = cap.read()
                    print('The dimension of this camera is : %d x %d' %
                          (frame.shape[1], frame.shape[0]))
                else:
                    assert test_img_dir is not None
                iterator = 0

                while from_screenshot or from_webcam or (iterator == 0):
                    if from_screenshot:
                        pass
                        # w = gtk.gdk.get_default_root_window()
                        # sz = w.get_size()
                        # print "The size of the window is %d x %d" % sz
                        # pb = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB, False, 8, input_shape[1], input_shape[2])
                        # pb = pb.get_from_drawable(w, w.get_colormap(), kScreenX, kScreenY, 0, 0, input_shape[1],
                        #                           input_shape[2])
                        # content_image = pb.pixel_array
                    elif from_webcam:
                        ret, frame = cap.read()
                        content_image = scipy.misc.imresize(
                            frame, (input_shape[1], input_shape[2]))
                    else:
                        content_image = imread(
                            test_img_dir, (input_shape[1], input_shape[2]))
                    content_image = np.array([content_image])
                    if input_mode == 'sketch':
                        color_expected_output = tf.placeholder(
                            tf.float32,
                            shape=[
                                batch_size, input_shape[1], input_shape[2],
                                3 if generator_network != 'lnet' else 1
                            ],
                            name='color_expected_output')
                        sketch_expected_output = lnet_util.net(
                            (color_expected_output - 128) / 128,
                            reuse=True) * 255
                        content_image_yuv = cv2.cvtColor(
                            np.asarray(content_image[0, ...], dtype=np.uint8),
                            cv2.COLOR_RGB2YUV)
                        image_sketches = sketch_expected_output.eval(
                            feed_dict={
                                color_expected_output:
                                np.array([content_image_yuv])
                            })

                        # image_sketches = sketches_util.image_to_sketch(content_image)
                        # image_sketches = np.expand_dims(image_sketches, axis=3)
                    elif input_mode == 'bw':
                        content_image_lab = colorful_img_network_util.rgb_to_lab(
                            content_image)
                        image_sketches = content_image_lab[..., 0:1]
                        # image_sketches = np.expand_dims(rgb2gray(content_image), axis=3)
                    elif input_mode == 'color':
                        image_sketches = np.zeros(content_image.shape)
                        # image_sketches = np.expand_dims(rgb2gray(content_image), axis=3)
                    elif input_mode == 'raw_sketch':
                        image_sketches = rgb2gray(content_image, keep_dim=True)
                    else:
                        raise AssertionError('Input mode error.')

                    # Do some processing...
                    image_sketches, content_image = sketches_util.generate_training_batch(
                        image_sketches, content_image, train=False)

                    # Now generate an image using the style_blend_weights given.
                    if input_mode == 'color':
                        feed_dict = {input_images: content_image}
                    else:
                        feed_dict = {input_images: image_sketches[..., :1]}

                    if use_hint:
                        image_hint = hint_imread(
                            test_img_hint, (input_shape[1], input_shape[2]))
                        feed_dict[input_hint] = np.array([image_hint])

                    generated_bw = color_output.eval(feed_dict=feed_dict)
                    iterator += 1

                    if generator_network != 'lnet':
                        # Whenever using cv2.cvtColor, be careful not to use float values... It gives out wierd answers.
                        print(generated_bw[0, 0, 0:5, :])
                        print(content_image[0, 0, 0:5, :])
                        generated_image = np.array([
                            cv2.cvtColor(
                                np.asarray(generated_bw[0, ...],
                                           dtype=np.uint8), cv2.COLOR_YUV2RGB)
                        ])
                        # generated_image = image_sketches[...,:1]
                    else:
                        generated_image = generated_bw
                    yield (iterator, generated_image)

            else:
                # Initialize log writer
                summary_writer = SummaryWriter("./logs", sess.graph)

                # initialize pre-processsed numpy array
                if content_preprocessed_folder is not None:
                    if not os.path.isfile(content_preprocessed_folder +
                                          'record.txt'):
                        raise AssertionError(
                            'No preprocessed content images found in %s. To use this feature, first use some '
                            'other file to call read_resize_and_save_all_imgs_in_dir.'
                            % (content_preprocessed_folder))
                    content_preprocessed_record = sketches_util.read_preprocessed_sketches_npy_record(
                        content_preprocessed_folder)
                    if content_preprocessed_record[0][
                            3] != height or content_preprocessed_record[0][
                                4] != width:
                        raise AssertionError(
                            'The height and/or width of the preprocessed numpy files does not '
                            'match those of the current setting.')
                    # Read the first file
                    print('Reading preprocessed content images.')
                    content_img_preprocessed = np.load(
                        content_preprocessed_record[
                            prev_content_preprocessed_file_i][0])
                    sketches_preprocessed = np.load(
                        content_preprocessed_record[
                            prev_content_preprocessed_file_i][1])

                # Do Training.
                iter_start = 0
                if do_restore_and_train:
                    ckpt = tf.train.get_checkpoint_state(save_dir)
                    if ckpt and ckpt.model_checkpoint_path:
                        saver.restore(sess, ckpt.model_checkpoint_path)
                        iter_start = get_global_step_from_save_dir(
                            ckpt.model_checkpoint_path)
                    else:
                        raise AssertionError(
                            "No checkpoint found. Exiting program")
                        return
                    if restore_from_noadv_to_adv and use_adversarial_net:
                        # Simply running this doesn;t seem to work.
                        # sess.run(tf.initialize_variables(adv_net_all_var))

                        # Get all variables except the generator net and the learning rate
                        if '0.12.0' in tf.__version__:
                            all_vars = tf.get_collection(
                                tf.GraphKeys.GLOBAL_VARIABLES)
                        else:
                            all_vars = tf.get_collection(
                                tf.GraphKeys.VARIABLES)
                        var_not_saved = [
                            item for item in all_vars
                            if item not in (generator_all_var +
                                            [learning_rate_var])
                        ]
                        sess.run(tf.initialize_variables(var_not_saved))
                        # Now change the saver back to normal
                        saver = tf.train.Saver()
                        raise NotImplementedError
                else:
                    # # In the past I ran this. Now I have lnet which is a pretrained network.
                    # sess.run(tf.initialize_all_variables())

                    saver = tf.train.Saver(sketch_reconstruct_all_var)
                    ckpt = tf.train.get_checkpoint_state(
                        'model/chainer_converted/')
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # Get variables not in lnet and initialize them
                    # Get all variables except the generator net and the learning rate
                    if '0.12.0' in tf.__version__:
                        all_vars = tf.get_collection(
                            tf.GraphKeys.GLOBAL_VARIABLES)
                    else:
                        all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
                    var_not_saved = [
                        item for item in all_vars
                        if item not in sketch_reconstruct_all_var
                    ]
                    sess.run(tf.initialize_variables(var_not_saved))
                    # Now change the saver back to normal
                    saver = tf.train.Saver()

                # Get path to all content images.

                image_subpaths = read_preprocessed_file_path_list(
                    preprocessed_file_path_list)

                # Ignore the ones at the end.
                if batch_size != 1 and len(image_subpaths) % batch_size != 0:
                    image_subpaths = image_subpaths[:-(len(image_subpaths) %
                                                       batch_size)]
                print('The size of training dataset is %d images.' %
                      len(image_subpaths))
                preprocessed_colored_folder = preprocessed_folder + 'color/'
                preprocessed_sketch_folder = preprocessed_folder + 'line/'

                content_dirs = map(lambda p: preprocessed_colored_folder + p,
                                   image_subpaths)
                sketch_dirs = map(lambda p: preprocessed_sketch_folder + p,
                                  image_subpaths)

                # # Test training GAN differently***
                # generators_turn = True
                # # END TEST***

                current_lr = learning_rate_var.eval()
                if abs(current_lr - learning_rate) > 0.00000001:
                    print(
                        "Given learning rate is different from the learning rate stored. Changing lr %f -> %f"
                        % (current_lr, learning_rate))
                    sess.run(learning_rate_var.assign(learning_rate))

                for i in range(iter_start, iterations):
                    if content_preprocessed_folder is not None:
                        current_content_preprocessed_file_i, index_within_preprocessed =  \
                            sketches_util.find_corresponding_sketches_npy_from_record(
                            content_preprocessed_record, i * batch_size, batch_size)
                        if prev_content_preprocessed_file_i != current_content_preprocessed_file_i:
                            prev_content_preprocessed_file_i = current_content_preprocessed_file_i
                            content_img_preprocessed = np.load(
                                content_preprocessed_record[
                                    current_content_preprocessed_file_i][0])
                            sketches_preprocessed = np.load(
                                content_preprocessed_record[
                                    current_content_preprocessed_file_i][1])
                        content_pre_list = content_img_preprocessed[
                            index_within_preprocessed:
                            index_within_preprocessed + batch_size,
                            ...].astype(np.float32)

                        if input_mode == 'sketch':
                            image_sketches = sketches_preprocessed[
                                index_within_preprocessed:
                                index_within_preprocessed + batch_size,
                                ...].astype(np.float32)
                            image_sketches = np.expand_dims(image_sketches,
                                                            axis=3)
                        elif input_mode == 'bw':
                            content_image_lab = colorful_img_network_util.rgb_to_lab(
                                content_pre_list)
                            image_sketches = content_image_lab[..., 0:1]
                            # image_sketches = np.expand_dims(rgb2gray(content_pre_list), axis=3)
                        elif input_mode == 'color':
                            image_sketches = content_pre_list
                        elif input_mode == 'raw_sketch':
                            raise AssertionError(
                                'Input mode raw_sketch should not be trained.')
                        else:
                            raise AssertionError('Input mode error.')
                    else:

                        current_content_dirs = get_batch_paths(
                            content_dirs, i * batch_size, batch_size)
                        current_sketch_dirs = get_batch_paths(
                            sketch_dirs, i * batch_size, batch_size)
                        content_pre_list = read_and_resize_batch_images(
                            current_content_dirs, None, None)
                        image_sketches = read_and_resize_bw_mask_images(
                            current_sketch_dirs, None, None,
                            len(current_sketch_dirs), 1)

                        # if input_mode == 'sketch':
                        #     image_sketches = sketches_util.image_to_sketch(content_pre_list)
                        #     image_sketches = np.expand_dims(image_sketches, axis=3)
                        # elif input_mode == 'bw':
                        #     content_image_lab = colorful_img_network_util.rgb_to_lab(content_pre_list)
                        #     image_sketches = content_image_lab[...,0:1]
                        #     # image_sketches = np.expand_dims(rgb2gray(content_pre_list), axis=3)
                        # else:
                        #     raise AssertionError('Input mode error.')

                    # Do some processing...
                    image_sketches, content_pre_list = sketches_util.generate_training_batch(
                        image_sketches, content_pre_list, train=True)
                    if generator_network == 'lnet':
                        feed_dict = {
                            color_expected_output: image_sketches[..., :1]
                        }
                    else:
                        feed_dict = {color_expected_output: content_pre_list}

                    if use_hint:
                        # image_hint = sketches_util.generate_hint_from_image(content_pre_list)
                        # feed_dict[input_hint] = image_hint
                        image_hint = image_sketches[..., 1:]
                        feed_dict[input_hint] = image_hint

                    image_sketches = image_sketches[..., :1]

                    if input_mode == 'color':
                        feed_dict[input_images] = content_pre_list
                    else:
                        feed_dict[input_images] = image_sketches

                    last_step = (i == iterations - 1)

                    if use_adversarial_net:
                        # adv_feed_dict = {input_images:image_sketches, adv_net_input: content_pre_list}
                        # if use_hint:
                        #     adv_feed_dict[input_hint] = image_hint
                        adv_feed_dict = copy.copy(feed_dict)
                        adv_feed_dict[adv_net_input] = content_pre_list

                        # TEST printing before training
                        print_progress(i,
                                       feed_dict=feed_dict,
                                       adv_feed_dict=adv_feed_dict,
                                       start_time=start_time,
                                       total_iterations=iterations,
                                       last=last_step)

                        # Update D network
                        _, summary_str = sess.run([adv_train_step, adv_sum],
                                                  feed_dict=adv_feed_dict)
                        summary_writer.add_summary(summary_str, i)

                        # Update G network
                        _, summary_str = sess.run(
                            [generator_both_train, g_sum],
                            feed_dict=adv_feed_dict)
                        summary_writer.add_summary(summary_str, i)

                    else:
                        adv_feed_dict = None
                        print_progress(i,
                                       feed_dict=feed_dict,
                                       adv_feed_dict=adv_feed_dict,
                                       start_time=start_time,
                                       total_iterations=iterations,
                                       last=last_step)

                        _, summary_str = sess.run(
                            [generator_train_step, g_sum], feed_dict=feed_dict)
                        summary_writer.add_summary(summary_str, i)

                    # TEST printing after training
                    print_progress(i,
                                   feed_dict=feed_dict,
                                   adv_feed_dict=adv_feed_dict,
                                   start_time=start_time,
                                   total_iterations=iterations,
                                   last=last_step)

                    if (checkpoint_iterations
                            and i % checkpoint_iterations == 0) or last_step:
                        saver.save(sess,
                                   save_dir + 'model.ckpt',
                                   global_step=i)
                        print('Checkpoint saved.')

                        if test_img_dir is not None:
                            test_image = imread(test_img_dir)
                            test_image_shape = test_image.shape

                        # The for loop will run once and terminate. Can't use return and yield in the same function so this is a hacky way to do it.
                        # Set use_cpu = true to save graphical memory
                        for _, generated_image in color_sketches_net(
                                test_image_shape[0],
                                test_image_shape[1],
                                iterations,
                                1,
                                content_weight,
                                tv_weight,
                                learning_rate,
                                generator_network=generator_network,
                                use_adversarial_net=
                                False,  # use_adversarial_net=use_adversarial_net,
                                use_hint=use_hint,
                                save_dir=save_dir,
                                do_restore_and_generate=True,
                                do_restore_and_train=False,
                                from_screenshot=False,
                                from_webcam=False,
                                test_img_dir=test_img_dir,
                                test_img_hint=test_img_hint,
                                input_mode=input_mode,
                                output_mode=output_mode,
                                use_cpu=use_cpu):
                            pass
                        best_image = generated_image

                        # Because we now have batch, choose the first one in the batch as our sample image.
                        yield ((None if last_step else i),
                               None if test_img_dir is None else best_image)
예제 #56
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32, shape=[None, None, None, 39], name='input_images')
    input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')
    input_labels = tf.placeholder(tf.float32, shape=[None, None, 4, 2], name='input_labels')

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))
    input_labels_split = tf.split(input_labels, len(gpus))
    
    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
		il = input_labels_split[i]
                total_loss, model_loss, f_score, f_geometry, _ = tower_loss(iis, isms, igms, itms, il, reuse_variables)
                #f_score, f_geometry = i_am_testing(iis)
                batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                #print "below..."
                #batch_norm_updates_op = tf.group(*[op for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) if 'resnet_v1_50/block4' in op.name or 'resnet_v1_50/block3' in op.name or 'feature_fusion' in op.name])
                #print "above..."
                reuse_variables = True
                #print "below.."
                train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block1' in var.name]
                #train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block4' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_7' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_8' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_9' in var.name]
                #print train_var
                #print "above..."
                train_var += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='feature_fusion')
                grads = opt.compute_gradients(total_loss, var_list=train_var)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step)
    #train_var = [var for var in tf.trainable_variables() if ('resnet_v1_50/block3' in var.name or 'resnet_v1_50/block4' in var.name or 'feature_fusion' in var.name)]
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')
    
    #####################################################################################################################
    # BLOCK MODIFIED BY ME
    #variables = slim.get_variables_to_restore()
    #var_list = []
    #for v in variables:
    #    if len(v.name.split('/')) == 1:
    #            var_list.append(v)
    #    elif v.name.split('/')[1] != "myconv1" or not v.name.find('custom_filter'):
    #            var_list.append(v)
    #    else:
    #            pass
    #saver = tf.train.Saver(var_list)
    saver = tf.train.Saver(tf.global_variables())
    saver_restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    # removing the first conv layer
    #del saver_restore_vars[1]
    #saver_to_restore = tf.train.Saver(saver_restore_vars)
    #####################################################################################################################
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph())
    
    init = tf.global_variables_initializer()
    #print '>> trainable variables: ',slim.get_trainable_variables()
    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
    #my_char_l = "5"
    #my_char_U = ""
    data_size = 0
    train_data_indices = []
    list_of_img_pos = []
    with open('./cropped_annotations_5.txt', 'r') as f:
        annotation_file = f.readlines()
    #with open('Data/cropped_annotations_new/cropped_annotations' + my_char_U + '.txt', 'r') as f:
    #    annotation_file += f.readlines()
    idx = 0
    for line in annotation_file:
	if len(line)>1 and line[:13] == './cropped_img':# and str(line[14:27]) in training_list:
            data_size +=1
            train_data_indices.append(idx)
            list_of_img_pos.append(line[14:].split(".")[0]+".tiff")
        idx += 1
    list_of_img_all = os.listdir('./cropped_img')
    list_of_img_neg = np.array(list(set(list_of_img_all) - set(list_of_img_pos)))
    #print "Char model: " + my_char_U + my_char_l
    #print "Data size: " + str(data_size)
    epoch_size = data_size / (16 * 2)
    #print epoch_size
    print "This many steps per epoch: " + str(epoch_size)

    #list_of_img_neg_char = os.listdir('Data/j')

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
	    print '>> Checkpoint path: ', FLAGS.checkpoint_path
	    print '>> second stuff: ', os.path.basename(ckpt_state.model_checkpoint_path)
	    #all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)[1]
	    var1 = saver_restore_vars[1]
	    del saver_restore_vars[1]
	    var2 = saver_restore_vars[422]
	    del saver_restore_vars[422]
	    #names = [var.name for var in saver_restore_vars]
	    saver_to_restore = tf.train.Saver(saver_restore_vars)	
	    #print '>> global vars: ', names.index('resnet_v1_50/conv1/weights/ExponentialMovingAverage:0')#[var.name for var in tf.global_variables()]
	    model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
	    # originally saver.restore(sess, model_path)
            saver_to_restore.restore(sess, model_path)
	    init_new_vars_op = tf.initialize_variables([var1, var2])
	    sess.run(init_new_vars_op)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)
        #print "below:"
        #tvars = tf.trainable_variables()
        #g_vars = [var for var in tvars if 'resnet_v1_50/block4' in var.name]
        #print g_vars
        #print tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_v1_50')
        #return
        print FLAGS.learning_rate
        print reg_constant
        for step in range(24*epoch_size):
            ### Generate Dwata ###
            data = [], [], [], [], []
            np.random.shuffle(train_data_indices)
            num_im = 0
            actual_num_im = 0
	    list_of_chars = list(string.ascii_lowercase)+[str(x) for x in range(10)]
            while len(data[0]) < 32:
                prob = 1#np.random.random(1)[0]
                if prob > 0.49:
                    i = train_data_indices[num_im]
                    im_fn = "./cropped_img/"+annotation_file[i][14:].split(".tiff",1)[0]+".tiff"
		    #print im_fn
                    im = cv2.imread(im_fn)
		    ################################################################################
                    # adding rest of the channels
                    for ids_c in range(len(list_of_chars)):
                        crop_dir = '/mnt/nfs/work1/elm/ray/evaluation/EAST_cropped/'+list_of_chars[ids_c]+'/'
                        filename = crop_dir+annotation_file[i][14:].split(".tiff",1)[0]+".tiff"
                        pad = cv2.imread(filename)
                        pad = pad[:,:,0]
                        pad = np.expand_dims(pad, axis=2)
                        im = np.append(im, pad, axis = 2)
                    ################################################################################
		    ################################################################################
                    if im is not None:
                        r, c, _ = im.shape
		        text_polys = []
                        text_tags = []
                        if int(annotation_file[i+1]) > 0:
                            for idx in range(i+2,i+2+int(annotation_file[i+1])):
                                annotation_data = annotation_file[idx]
                                annotation_data = annotation_data.split(" ")
                                x, y = float(annotation_data[0]), float(annotation_data[1])
		                w, h = float(annotation_data[2]), float(annotation_data[3])
		                text_polys.append([list([int(x),int(y-h)]),list([int(x+w),int(y-h)]),list([int(x+w),int(y)]),list([int(x),int(y)])])
                                text_tags.append(False)
                        score_map, geo_map, training_mask = icdar.generate_rbox((int(r), int(c)), np.array(text_polys), np.array(text_tags))
                        data[0].append(im[:, :, ::-1].astype(np.float32))
                        data[1].append(im_fn)
                        data[2].append(score_map[::4, ::4, np.newaxis].astype(np.float32))
                        data[3].append(geo_map[::4, ::4, :].astype(np.float32))
                        data[4].append(training_mask[::4, ::4, np.newaxis].astype(np.float32))
                        actual_num_im += 1  
                    num_im += 1
           
                else:
                    im_fn = np.random.choice(list_of_img_neg)
		    ################################################################################
                    # adding rest of the channels
                    #for i in range(len(list_of_chars)):
                    crop_dir = '/mnt/nfs/work1/elm/ray/evaluation/EAST_single_cropped/'
                    filename = crop_dir+annotation_file[i][14:].split(".tiff",1)[0]+".tiff"
                    pad = cv2.imread(filename)
                    pad = pad[:,:,0]
                    pad = np.expand_dims(pad, axis=2)
                    im = np.append(im, pad, axis = 2)
                    ################################################################################
                    #    im_fn = np.random.choice(list_of_img_neg_char)
                    #    im_mini = cv2.imread("Data/j/" + im_fn)
		    # 	r0, c0, _ = im_mini.shape
                    #     im = np.zeros((512, 512, 3), dtype=np.uint8)
 		    #	ra, rb, ca, cb = 256-r0/2, 256+(r0+1)/2, 256-c0/2, 256+(c0+1)/2
                    #    im[ra:rb, ca:cb, :] = im_mini.copy()
                    if im is not None:
                        r, c, _ = im.shape
                        score_map, geo_map, training_mask = icdar.generate_rbox((int(r), int(c)), np.array([]), np.array([]))
                        data[0].append(im[:, :, ::-1].astype(np.float32))
                        data[1].append(im_fn)
                        data[2].append(score_map[::4, ::4, np.newaxis].astype(np.float32))
                        data[3].append(geo_map[::4, ::4, :].astype(np.float32))
                        data[4].append(training_mask[::4, ::4, np.newaxis].astype(np.float32))
       
            ### Run model ###
    	    ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0],
                                                                                    input_score_maps: data[2],
                                                                                    input_geo_maps: data[3],
                                                                                    input_training_masks: data[4]})
            epoch = step / epoch_size
            batch_num = step % epoch_size   
            if step % (epoch_size/3) == 0:   
                print "Epoch no.: " + str(epoch) + " batch no.: " + str(batch_num) + " loss: " + str(ml)
                print "Epoch no.: " + str(epoch) + " batch no.: " + str(batch_num) + " loss: " + str(tl)
    	    if step % (epoch_size/2) == 0:
		#print "Epoche: " + str(step / (epoch_size/2))
		saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step)
    	        _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0],
                                                                                                 input_score_maps: data[2],
                                                                                                 input_geo_maps: data[3],
                                                                                                 input_training_masks: data[4]})
                summary_writer.add_summary(summary_str, global_step=step)
            if False:
                count_right = 0
                count_wrong = 0
                count_posNotDetected = 0
                im0 = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1]
                w, h, _ = im0.shape
                slide_window = 300
                crop_size = 512
                crop_center = (256, 256)
                num_rows, num_cols = int(np.ceil(w/slide_window)), int(np.ceil(h/slide_window))
                print num_cols
		for rot in [-90.0, -60.0, -30.0, 0.0, 30.0, 60.0, 90.0]:
                    im = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1]
                    boxes_one_rot = []  
		    count = 0
                    while count < num_rows * num_cols:
                        images, data2, data3, data4 = [], [], [], []
                        for k in range(16):
                            i = (count + k) / num_rows
                            j = (count + k) % num_cols
                    
                            temp = im[slide_window*i:slide_window*i+crop_size, \
                                      slide_window*j:slide_window*j+crop_size, ::-1]
                            w2, h2, _ = temp.shape
                            if w2 < crop_size or h2 < crop_size:
                                result = np.zeros((crop_size,crop_size,3))
                                result[:w2,:h2] = temp
                                temp = result
                            M = cv2.getRotationMatrix2D(crop_center,rot,1.0)
                            temp = cv2.warpAffine(temp, M, (crop_size, crop_size))
                            images.append(temp)
			    score_map, geo_map, training_mask = icdar.generate_rbox((int(crop_size), int(crop_size)), np.array([]), np.array([]))
                            data2.append(score_map[::4, ::4, np.newaxis].astype(np.float32))
                            data3.append(geo_map[::4, ::4, :].astype(np.float32))
                            data4.append(training_mask[::4, ::4, np.newaxis].astype(np.float32))
                        score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: images, input_score_maps:data2,
                                                                                                 input_geo_maps: data3,
                                                                                                 input_training_masks: data4})
                        for k in range(16):
                            i = (count + k) / num_rows
                            j = (count + k) % num_cols
                            boxes = detect(score_map=score[j], geo_map=geometry[j], score_map_thresh=0.01, box_thresh=0.01, nms_thres=0.01)
                            if boxes is not None:
                                boxes = boxes[:, :8].reshape((-1, 4, 2))
                                for box in boxes:
                                    M_inv = cv2.getRotationMatrix2D(crop_center,-1*rot,1)
                                    box[0] = M_inv.dot(np.array((box[0,0], box[0,1]) + (1,)))
                                    box[1] = M_inv.dot(np.array((box[1,0], box[1,1]) + (1,)))
                                    box[2] = M_inv.dot(np.array((box[2,0], box[2,1]) + (1,)))
                                    box[3] = M_inv.dot(np.array((box[3,0], box[3,1]) + (1,)))
                                    box = sort_poly(box.astype(np.int32))
                                    box[0,0] = box[0,0] + j * slide_window
                                    box[0,1] = box[0,1] + i * slide_window
                                    box[1,0] = box[1,0] + j * slide_window
                                    box[1,1] = box[1,1] + i * slide_window
                                    box[2,0] = box[2,0] + j * slide_window
                                    box[2,1] = box[2,1] + i * slide_window
                                    box[3,0] = box[3,0] + j * slide_window
                                    box[3,1] = box[3,1] + i * slide_window
                    boxes_one_rot.append(box)
                    boxes_single_rot = np.zeros((len(boxes_one_rot), 9))
                    boxes_single_rot[:, :8] = np.array(boxes_one_rot).reshape((-1, 8))
                    boxes_single_rot[:, 8] = 1
                    labels += boxes_single_rot.tolist()                                               
                boxes = lanms.merge_quadrangle_n9(np.array(labels), nms_thres)
                annotation = np.load("/mnt/nfs/work1/elm/ray/new_char_anots_ncs/" + "j" + "/" + "D0117-5755036" + ".npy").item()
                ### Compute the TP, FP, FN info for each image
                count_right_cache = 0
                boxes = boxes[:, :8].reshape((-1, 4, 2))
                num_true_pos = len(annotation)
                for box in boxes:
                    box = sort_poly(box.astype(np.int32))
                    if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
                        continue
                    k = 0
                    idx = 0
                    count_wrong += 1
                    while (idx < num_true_pos):
                        if k in annotation: 
                            proposed_label = annotation[k]['vertices']
                            if len(proposed_label) == 4:
                                x3, y3, x2, y2, x1, y1, x0, y0 = proposed_label[0][0], proposed_label[0][1], proposed_label[1][0], proposed_label[1][1], \
                                                     proposed_label[2][0], proposed_label[2][1], proposed_label[3][0], proposed_label[3][1]
                                if (checkIOU(box, [[x0,y0],[x1,y1],[x2,y2],[x3,y3]]) == True):
                                    count_right_cache += 1
                                    count_wrong -= 1
                                    break 
                            idx += 1
                        k += 1
                count_posNotDetected += num_true_pos - count_right_cache
                count_right += count_right_cache
                precision = (float) (count_right) / (float) (count_right + count_wrong)  # TP / TP + FP
                recall = (float) (count_right) / (float) (count_right + count_posNotDetected)  # TP / TP + FN
                fscore = 2 * (precision * recall) / (precision + recall)
                print "Precision, recall, fscore: " + str(precision) + ", " + str(recall) + ", " + str(fscore)    
예제 #57
0
def main(_):
	nH, nW, nD = 28, 28, 1
	nC = 10
	nB = FLAGS.B
	nT = 10
	
	with tf.device("/gpu:"+ str(FLAGS.gpu)):

            n_hidden = 10
            n_sequence = 4

            def weight_variable(shape):
                initial = tf.truncated_normal(shape, stddev = 0.1)
                return tf.Variable(initial)
            
            def bias_variable(shape):
                initial = tf.constant(0.1, shape=shape)
                return tf.Variable(initial)

            def RNN(x):
                x_list = []
                for i in range(n_sequence):
                    x_list.append(x)

                with tf.variable_scope('rnn'):
                    # Define a lstm cell with tensorflow
                    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden)

                    # Get lstm cell output
                    outputs, states = rnn.rnn(lstm_cell, x_list, dtype=tf.float32)

                    logits = []
                    for i in range(len(outputs)):
                        with tf.variable_scope('linear'+str(i)):
                            logits.append(linear(outputs[i], n_hidden, 2))
                    return logits

            def Hash(y):
                with tf.variable_scope('hash'):
                    y1 = relu(linear(y, 10, 100))
                    z_logits = []
                    for i in range(n_sequence):
                        with tf.variable_scope('linear'+str(i)):
                            z_logits.append(linear(y1, 100, 2))
                    return z_logits

            
            def InvHash(z_logits):
                with tf.variable_scope('invhash'):
                    z_concat = tf.concat(1, z_logits)
                    with tf.variable_scope('linear1'):
                        y1 = relu(linear(z_concat, 2*n_sequence, 100))
                    with tf.variable_scope('linear2'):
                        y__logit = linear(y1, 100, 10)
                    return y__logit

            def match_all(y, y_):
                # Evaluate model
                num_correct_pred = 0
                for i in range(n_sequence):
                    num_correct_pred += tf.cast(tf.equal(tf.argmax(y_[i],1), tf.argmax(y[i],1)), tf.int32)
                correct_pred = tf.equal(num_correct_pred, tf.constant(n_sequence,dtype=tf.int32))
                accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
                return accuracy



            def TestHash():
                v = np.zeros((10,10))
                for i in range(10):
                    v[i][i] = 1.0
                output = sess.run(z, feed_dict = {'y:0': v})
                s_list = []
                for i in range(10):
                    s = ''
                    for j in range(n_sequence):
                        if output[j][i][0] > output[j][i][1]:
                            s += '0'
                        else:
                            s += '1'
                    s_list.append(s)
                ok = True
                for i in range(10):
                    for j in range(i+1,10):
                        if s_list[i] == s_list[j]:
                            ok = False
                if ok:
                    print("Hash One to One")
                else:
                    print("Hash not one one", s_list)



            sess = create_session()
            saver = tf.train.import_meta_graph('saved/model-45000.meta')
            saver.restore(sess, tf.train.latest_checkpoint('saved/'))
            g = tf.get_default_graph()
            prev_vars = tf.all_variables()

            x = g.get_tensor_by_name('x:0')
            y = g.get_tensor_by_name('y:0')
            print x,y
            is_training = g.get_tensor_by_name('is_training:0')

            y_pred = g.get_tensor_by_name('cnn_train/y_pred_logits:0')
            y_pred = tf.stop_gradient(y_pred)

            features = g.get_tensor_by_name('cnn_train/features:0')
            features = tf.stop_gradient(features)
            accuracy = g.get_tensor_by_name('cnn_train/accuracy:0')

#---------------------------------------------------------------------------------------------#

            z_logits    = RNN(features)
            zlogits     = Hash(y)


            z_ = []
            for i in range(n_sequence):
                z_.append(tf.nn.softmax(z_logits[i]))

            z = []
            for i in range(n_sequence):
                z.append(softmax(zlogits[i]))

            y__logits = InvHash(z)

            

            eq_check_3 = cross_entropy(y__logits, y)
            tf.summary.scalar('inverse_loss', eq_check_3)

            l2 = 0
            for i in range(n_sequence):
                l2 += tf.nn.l2_loss(z[i])

            eq_check_2 = sum_cross_entropy(z_logits, z)
            tf.summary.scalar('rnn_classification_loss', eq_check_2)
            tf.summary.scalar('l2_z', l2)
            total_loss =  eq_check_2 + 100*eq_check_3 + 0.01*l2
            tf.summary.scalar('loss', total_loss)
            rnn_acc        = match_all(z_, z)
            tf.summary.scalar('rnn_acc', rnn_acc)


            learning_rate   = 0.0001

            with tf.variable_scope('optimizer'):
                optimizer= minimize(total_loss, { 'learning rate' : learning_rate}, algo='adam')

            sess.run(tf.initialize_variables(list(set(tf.all_variables()) - set(prev_vars)) ))

            writer = tf.summary.FileWriter('logs', graph = sess.graph)
            summary_op = tf.summary.merge_all()


            n_epoch         = 100
            n_batch         = 200
            
            n_display       = 10000
            
            for e in range(n_epoch):

                for i in range(0, train.shape[0], nB):

                    batch = next_batch(nB)

                    feed_dict = {
                        'x:0': batch['data'],
                        'y:0': batch['labels'],
                        'is_training:0': True
                    }
                    a = sess.run([optimizer,summary_op], feed_dict = feed_dict)
                    writer.add_summary(a[-1], e*50000 + i)

                    if i % 4000 == 0:
                        TestHash()
                        writer.flush()
예제 #58
0
    def meta_loss(self,
                  make_loss,
                  len_unroll,
                  net_assignments=None,
                  second_derivatives=False):
        """Returns an operator computing the meta-loss.

    Args:
      make_loss: Callable which returns the optimizee loss; note that this
          should create its ops in the default graph.
      len_unroll: Number of steps to unroll.
      net_assignments: variable to optimizer mapping. If not None, it should be
          a list of (k, names) tuples, where k is a valid key in the kwargs
          passed at at construction time and names is a list of variable names.
      second_derivatives: Use second derivatives (default is false).

    Returns:
      namedtuple containing (loss, update, reset, fx, x)
    """

        # Construct an instance of the problem only to grab the variables. This
        # loss will never be evaluated.
        x, constants = _get_variables(make_loss)

        print("Optimizee variables")
        print([op.name for op in x])
        print("Problem variables")
        print([op.name for op in constants])

        # Create the optimizer networks and find the subsets of variables to assign
        # to each optimizer.
        nets, net_keys, subsets = _make_nets(x, self._config, net_assignments)

        # Store the networks so we can save them later.
        self._nets = nets

        # Create hidden state for each subset of variables.
        state = []
        with tf.name_scope("states"):
            for i, (subset, key) in enumerate(zip(subsets, net_keys)):
                net = nets[key]
                with tf.name_scope("state_{}".format(i)):
                    state.append(
                        _nested_variable([
                            net.initial_state_for_inputs(x[j],
                                                         dtype=tf.float32)
                            for j in subset
                        ],
                                         name="state",
                                         trainable=False))

        def update(net, fx, x, state):
            """Parameter and RNN state update."""
            with tf.name_scope("gradients"):
                gradients = tf.gradients(fx, x)

                # Stopping the gradient here corresponds to what was done in the
                # original L2L NIPS submission. However it looks like things like
                # BatchNorm, etc. don't support second-derivatives so we still need
                # this term.
                if not second_derivatives:
                    gradients = [tf.stop_gradient(g) for g in gradients]

            with tf.name_scope("deltas"):
                deltas, state_next = zip(
                    *[net(g, s) for g, s in zip(gradients, state)])
                state_next = list(state_next)

            return deltas, state_next

        def time_step(t, fx_array, x, state):
            """While loop body."""
            x_next = list(x)
            state_next = []

            with tf.name_scope("fx"):
                fx = _make_with_custom_variables(make_loss, x)
                fx_array = fx_array.write(t, fx)

            with tf.name_scope("dx"):
                for subset, key, s_i in zip(subsets, net_keys, state):
                    x_i = [x[j] for j in subset]
                    deltas, s_i_next = update(nets[key], fx, x_i, s_i)

                    for idx, j in enumerate(subset):
                        x_next[j] += deltas[idx]
                    state_next.append(s_i_next)

            with tf.name_scope("t_next"):
                t_next = t + 1

            return t_next, fx_array, x_next, state_next

        # Define the while loop.
        fx_array = tf.TensorArray(tf.float32,
                                  size=len_unroll + 1,
                                  clear_after_read=False)
        _, fx_array, x_final, s_final = tf.while_loop(
            cond=lambda t, *_: t < len_unroll,
            body=time_step,
            loop_vars=(0, fx_array, x, state),
            parallel_iterations=1,
            swap_memory=True,
            name="unroll")

        with tf.name_scope("fx"):
            fx_final = _make_with_custom_variables(make_loss, x_final)
            fx_array = fx_array.write(len_unroll, fx_final)

        loss = tf.reduce_sum(fx_array.pack(), name="loss")

        # Reset the state; should be called at the beginning of an epoch.
        with tf.name_scope("reset"):
            variables = (nest.flatten(state) + x + constants)
            # Empty array as part of the reset process.
            reset = [tf.initialize_variables(variables), fx_array.close()]

        # Operator to update the parameters and the RNN state after our loop, but
        # during an epoch.
        with tf.name_scope("update"):
            update = (nest.flatten(_nested_assign(x, x_final)) +
                      nest.flatten(_nested_assign(state, s_final)))

        # Log internal variables.
        for k, net in nets.iteritems():
            print("Optimizer '{}' variables".format(k))
            print([op.name for op in nn.get_variables_in_module(net)])

        return MetaLoss(loss, update, reset, fx_final, x_final)
예제 #59
0
def train_rnn(data_folder, model_file):
    y, rnn_state = RNN(x)

    print "Loading training pickles.."

    # We want to keep the sentences in order to train per sentence
    # Sentences are padded to num_steps
    train_set = import_data.load_dataset(data_folder + '/train_data.pickle',
                                         data_folder + '/train_labels.pickle',
                                         keep_sentences=True,
                                         context_frames=1,
                                         seq_length=num_steps,
                                         batch_size=train_batch_size)

    print "Loading done"

    global sess
    global summary_op
    global train_writer
    global saver
    saver = tf.train.Saver()

    # Create the dir for the model
    if not os.path.isdir('%s/models/%s' % (save_loc, start_date)):
        try:
            os.makedirs('%s/models/%s' % (save_loc, start_date))
        except OSError:
            if not os.path.isdir('%s/models/%s' % (save_loc, start_date)):
                raise

    sess = tf.InteractiveSession()
    summary_op = tf.merge_all_summaries()
    train_writer = tf.train.SummaryWriter(
        '%s/summaries/%s' % (save_loc, start_date), sess.graph)

    # Cost function
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(y, true_labels))
    # Optimizer
    # For gradient descend, learning rate = 0.002 (see Hinton et al.)
    # For AdamOptimizer, learning rate = 0.001 (default)
    if (optimizer_name == 'Adam'):
        # Hacky solution for always making sure that the beta2_power var
        # is always initialized
        temp = set(tf.all_variables())
        optimizer = tf.train.AdamOptimizer().minimize(cost)
        sess.run(tf.initialize_variables(set(tf.all_variables()) - temp))
    else:
        optimizer = tf.train.GradientDescentOptimizer(0.02).minimize(cost)

    if model_file:
        saver.restore(sess, model_file)
        print "Model restored"
    else:
        # Initialization
        init_op = tf.initialize_all_variables()
        sess.run(init_op)

    print("Training network. Date: %s" % start_date)
    train(train_set, y, rnn_state, cost, optimizer)

    save_path = saver.save(sess,
                           "%s/models/%s/model.ckpt" % (save_loc, start_date))
    print("Model saved in file: %s" % save_path)
    print("Summaries written to %s/summaries/%s" % (save_loc, start_date))

    evaluate_rnn(data_folder, y, rnn_state)
# DiscreteDeepQ object
current_controller = DiscreteDeepQ(input_size,
                                   num_actions,
                                   brain,
                                   optimizer,
                                   session,
                                   discount_rate=0.95,
                                   target_network_update_rate=0.005,
                                   exploration_period=5000,
                                   max_experience=10000,
                                   store_every_nth=4,
                                   train_every_nth=4,
                                   summary_writer=journalist)

init_all_vars_op = tf.initialize_variables(tf.all_variables(),
                                           name='init_all_vars_op')

session.run(tf.initialize_all_variables())

#for saving graph state, trainable variable values
for variable in tf.trainable_variables():
    tf.identity(variable, name="readVariable")
    tf.assign(variable,
              tf.placeholder(tf.float32,
                             variable.get_shape(),
                             name="variableValue"),
              name="resoreVariable")

tf.train.write_graph(session.graph_def,
                     'models/',
                     'graph-separated-1d.pb',