def __init__(self, dataset_path_or_instance, layers, epochs, starter_learning_rate, noise_std,
                 train_ratio=0.8, test_ratio=0.1, validation_ratio=0.1):
        super(MultilayerPerceptron, self).__init__(dataset_path_or_instance, epochs, starter_learning_rate,
                                                   train_ratio, test_ratio, validation_ratio)
        self._noise_std = noise_std

        self._layers = layers
        self._layers.insert(0, self._input_size)
        self._layers.append(self._output_size)
        self._L = len(self._layers) - 1  # size of layers ignoring input layer

        # build network and return cost function
        self._cost = self.__build_network__()

        # define the y function as the classification function
        self._y = self.__build_classifier__()

        # loss
        self._loss = -tf.reduce_mean(tf.reduce_sum(self._outputs*tf.log(self._cost), 1))

        # y_true and y_pred used to get the metrics
        self._y_true = tf.argmax(self._outputs, 1)
        self._y_pred = tf.argmax(self._y, 1)

        # train_step for the weight parameters, optimized with Adam
        self._learning_rate = tf.Variable(self._starter_learning_rate, trainable=False)
        self._train_step = tf.train.AdamOptimizer(self._learning_rate).minimize(self._loss)

        # add the updates of batch normalization statistics to train_step
        bn_updates = tf.group(*self._bn_assigns)
        with tf.control_dependencies([self._train_step]):
            self._train_step = tf.group(bn_updates)
	def build_rmsprop_optimizer(self, learning_rate, rmsprop_decay, rmsprop_constant, gradient_clip, version):

		with tf.name_scope('rmsprop'):
			optimizer = tf.train.GradientDescentOptimizer(learning_rate)

			grads_and_vars = optimizer.compute_gradients(self.loss)
			grads = [gv[0] for gv in grads_and_vars]
			params = [gv[1] for gv in grads_and_vars]

			if gradient_clip > 0:
				grads = tf.clip_by_global_norm(grads, gradient_clip)

			if version == 'rmsprop':
				return optimizer.apply_gradients(zip(grads, params))
			elif version == 'graves_rmsprop':
				square_grads = [tf.square(grad) for grad in grads]

				avg_grads = [tf.Variable(tf.ones(var.get_shape())) for var in params]
				avg_square_grads = [tf.Variable(tf.ones(var.get_shape())) for var in params]

				update_avg_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * grad_pair[1])) 
					for grad_pair in zip(avg_grads, grads)]
				update_avg_square_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * tf.square(grad_pair[1]))) 
					for grad_pair in zip(avg_square_grads, grads)]
				avg_grad_updates = update_avg_grads + update_avg_square_grads

				rms = [tf.sqrt(avg_grad_pair[1] - tf.square(avg_grad_pair[0]) + rmsprop_constant)
					for avg_grad_pair in zip(avg_grads, avg_square_grads)]


				rms_updates = [grad_rms_pair[0] / grad_rms_pair[1] for grad_rms_pair in zip(grads, rms)]
				train = optimizer.apply_gradients(zip(rms_updates, params))

				return tf.group(train, tf.group(*avg_grad_updates))
 def nabla(self,cost,n,c=1,q=0.001,a=0.001,A=100,alpha=0.602,gamma=0.101):
     cn=(c+0.0)/(n+A)**gamma
     an=a/(n+1+A)**alpha
     qk=math.sqrt(q/(n+A)*math.log(math.log(n+A)))
     wk=normal()
     dv=[]
     sess=self.sess
     g=[]
     orig=self.var
     for m in self.var:
         shape=m.shape
         nm=np.ones(shape=shape)
         for x in np.nditer(nm, op_flags=['readwrite']):
             x[...]=dist.bernoulli() * 2 * cn
         dv.append(nm)
     del l=[:]
     for m,d,t in zip(self.var,dv,self.var_t):
         l.append(t.assign(m+d))
     sess.run(tf.group(*l))
     f1=sess.run(cost,self.feed)
     del l=[:]
     for m,d,t in zip(self.var,dv,self.var_t):
         l.append(t.assign(m-d))
     sess.run(tf.group(*l))
     f0=sess.run(cost,self.feed)
     df=f1-f0
     for m in dv:
         for x in np.nditer(m, op_flags=['readwrite']):
             x[...]=-(df+0.0)/x/2
     return dv
Exemple #4
0
def solve(global_step):
    """add solver to losses"""
    # learning reate
    lr = _configure_learning_rate(82783, global_step)
    optimizer = _configure_optimizer(lr)
    tf.summary.scalar('learning_rate', lr)

    # compute and apply gradient
    losses = tf.get_collection(tf.GraphKeys.LOSSES)
    regular_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    regular_loss = tf.add_n(regular_losses)
    out_loss = tf.add_n(losses)
    total_loss = tf.add_n(losses + regular_losses)

    tf.summary.scalar('total_loss', total_loss)
    tf.summary.scalar('out_loss', out_loss)
    tf.summary.scalar('regular_loss', regular_loss)

    update_ops = []
    variables_to_train = _get_variables_to_train()
    # update_op = optimizer.minimize(total_loss)
    gradients = optimizer.compute_gradients(total_loss, var_list=variables_to_train)
    grad_updates = optimizer.apply_gradients(gradients, 
            global_step=global_step)
    update_ops.append(grad_updates)
    
    # update moving mean and variance
    if FLAGS.update_bn:
        update_bns = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        update_bn = tf.group(*update_bns)
        update_ops.append(update_bn)

    return tf.group(*update_ops)
 def central_step():
     # restore v1, slots
     op5 = tf.group(*[ tf.assign(w,v) for w,v in zip(restored_vars, tmp_vars)])
     with tf.get_default_graph().control_dependencies([op5]):
         back =  tf.group(*[tf.assign_sub(v, -self._lr_t*grad) for grad,v in grads_and_vars])
         with tf.get_default_graph().control_dependencies([back]):
             return tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
    def optimize(self, learning_rate, train_layers,global_step,source_centroid,target_centroid):
        print '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
	print train_layers
	var_list = [v for v in tf.trainable_variables() if v.name.split('/')[1] in train_layers+['fc9']]
	finetune_list=[v for v in var_list if v.name.split('/')[1] in ['conv1','conv2','conv3','conv4','conv5','fc6','fc7']]
	new_list=[v for v in var_list if v.name.split('/')[1] in ['fc8','fc9']]
	self.Gregloss=0.0005*tf.reduce_mean([tf.nn.l2_loss(x) for x in var_list if 'weights' in x.name])
	
	finetune_weights=[v for v in finetune_list if 'weights' in v.name]
	finetune_biases=[v for v in finetune_list if 'biases' in v.name]
	new_weights=[v for v in new_list if 'weights' in v.name]
	new_biases=[v for v in new_list if 'biases' in v.name]

	
	print '==============finetune_weights======================='
	print finetune_weights
	print '==============finetune_biases======================='
	print finetune_biases
	print '==============new_weights======================='
	print new_weights
	print '==============new_biases======================='
	print new_biases
	
        self.F_loss=self.loss+self.Gregloss+global_step*self.G_loss+global_step*self.Semanticloss
	train_op1=tf.train.MomentumOptimizer(learning_rate*0.1,0.9).minimize(self.F_loss, var_list=finetune_weights)
	train_op2=tf.train.MomentumOptimizer(learning_rate*0.2,0.9).minimize(self.F_loss, var_list=finetune_biases)
        train_op3=tf.train.MomentumOptimizer(learning_rate*1.0,0.9).minimize(self.F_loss, var_list=new_weights)
        train_op4=tf.train.MomentumOptimizer(learning_rate*2.0,0.9).minimize(self.F_loss, var_list=new_biases)
	train_op=tf.group(train_op1,train_op2,train_op3,train_op4)
	with tf.control_dependencies([train_op1,train_op2,train_op3,train_op4]):
	    update_sc=self.source_moving_centroid.assign(source_centroid)
	    update_tc=self.target_moving_centroid.assign(target_centroid)
	
	return tf.group(update_sc,update_tc)
    def optimize(self, learning_rate, train_layers,global_step,source_centroid,target_centroid):
        print '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
	print train_layers
	var_list=[v for v in tf.trainable_variables() if v.name.split('/')[1] in ['conv1','conv2','fc1','fc2']]
	self.Gregloss=5e-4*tf.reduce_mean([tf.nn.l2_loss(x) for x in var_list if 'weights' in x.name])
	
	new_weights=[v for v in var_list if 'weights' in v.name or 'gamma' in v.name]
	new_biases=[v for v in var_list if 'biases' in v.name or 'beta' in v.name]

	
	print '==============new_weights======================='
	print new_weights
	print '==============new_biases======================='
	print new_biases

        self.F_loss=self.loss+self.Gregloss+global_step*self.Semanticloss+global_step*self.G_loss
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
	print '+++++++++++++++ batch norm update ops +++++++++++++++++'
  	print update_ops
	with tf.control_dependencies(update_ops):
	    train_op3=tf.train.MomentumOptimizer(learning_rate*1.0,0.9).minimize(self.F_loss, var_list=new_weights)
            train_op4=tf.train.MomentumOptimizer(learning_rate*2.0,0.9).minimize(self.F_loss, var_list=new_biases)
	train_op=tf.group(train_op3,train_op4)
	
	with tf.control_dependencies([train_op3,train_op4]):
	    update_sc=self.source_moving_centroid.assign(source_centroid)
	    update_tc=self.target_moving_centroid.assign(target_centroid)
	return tf.group(update_sc,update_tc)
Exemple #8
0
 def init_gradients(self, loss, var_train):
     if self.play_mode:
         return
     
     with tf.device(self.args.device):
         var_refs = [v.ref() for v in var_train]
         train_gradients = tf.gradients(
             loss, var_refs,
             gate_gradients=False,
             aggregation_method=None,
             colocate_gradients_with_ops=False)
 
         acc_gradient_list = []
         train_step_list = []
         new_grad_vars = []
         self.grad_list = []
         var_list = []
         for grad, var in zip(train_gradients, self.global_vars):
             acc_gradient = tf.Variable(tf.zeros(grad.get_shape()), trainable=False)
             acc_gradient_list.append(acc_gradient)
             train_step_list.append(acc_gradient.assign_add(grad))
             new_grad_vars.append((tf.convert_to_tensor(acc_gradient, dtype=tf.float32), var))
             self.grad_list.append(acc_gradient)
             var_list.append(var)
         
         self.train_step = tf.group(*train_step_list)                
         
         self.reset_acc_gradients = tf.initialize_variables(acc_gradient_list)        
         self.apply_grads = self.global_optimizer.apply_gradients(new_grad_vars)
 
         sync_list = []
         for i in range(0, len(self.global_vars)):
             sync_list.append(var_train[i].assign(self.global_vars[i]))
         self.sync = tf.group(*sync_list)
def running_mean(cost, tag_name, batch_size=1):
    with tf.name_scope("running_mean_" + tag_name):
        with tf.variable_scope(tag_name):
            cost_sum = tf.get_variable(
              "cost_sum",
              initializer=tf.zeros_initializer,
              dtype=tf.float64,
              shape=(),
              collections=[tf.GraphKeys.LOCAL_VARIABLES],
              trainable=False)
            batches = tf.get_variable(
              "cost_num_batches",
              initializer=tf.zeros_initializer,
              dtype=tf.int32,
              shape=(),
              collections=[tf.GraphKeys.LOCAL_VARIABLES],
              trainable=False)

        cost_add = tf.assign_add(cost_sum, tf.cast(cost, dtype=tf.float64))
        batches_add = tf.assign_add(batches, batch_size)
        update_cost_mean = tf.group(cost_add, batches_add)

        reset_batches = tf.assign(batches, 0)
        reset_cost_sum = tf.assign(cost_sum, 0.0)
        reset_cost_mean = tf.group(reset_batches, reset_cost_sum)

        mean_cost = tf.divide(
          cost_sum,
          tf.cast(batches, dtype=tf.float64))
        train_loss_summary = tf.summary.scalar(tag_name, mean_cost)

    return reset_cost_mean, update_cost_mean, train_loss_summary
  def testSummariesAreFlushedToDiskWithoutGlobalStep(self):
    output_dir = os.path.join(self.get_temp_dir(), 'flush_test_no_global_step')
    if tf.gfile.Exists(output_dir):  # For running on jenkins.
      tf.gfile.DeleteRecursively(output_dir)

    names_to_metrics, names_to_updates = self._create_names_to_metrics(
        self._predictions, self._labels)

    for k in names_to_metrics:
      v = names_to_metrics[k]
      tf.summary.scalar(k, v)

    summary_writer = tf.train.SummaryWriter(output_dir)

    initial_op = tf.group(tf.global_variables_initializer(),
                          tf.local_variables_initializer())
    eval_op = tf.group(*names_to_updates.values())

    with self.test_session() as sess:
      slim.evaluation.evaluation(
          sess,
          initial_op=initial_op,
          eval_op=eval_op,
          summary_op=tf.summary.merge_all(),
          summary_writer=summary_writer)

      names_to_values = {name: names_to_metrics[name].eval()
                         for name in names_to_metrics}
    self._verify_summaries(output_dir, names_to_values)
  def AddTraining(self,
                  task_context,
                  batch_size,
                  learning_rate=0.1,
                  decay_steps=4000,
                  momentum=0.9,
                  corpus_name='documents'):
    """Builds a trainer to minimize the cross entropy cost function.

    Args:
      task_context: file path from which to read the task context
      batch_size: batch size to request from reader op
      learning_rate: initial value of the learning rate
      decay_steps: decay learning rate by 0.96 every this many steps
      momentum: momentum parameter used when training with momentum
      corpus_name: name of the task input to read parses from

    Returns:
      Dictionary of named training nodes.
    """
    with tf.name_scope('training'):
      nodes = self.training
      nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
      nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
                                      return_average=False))
      nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
                                         nodes['logits']))
      # Add the optimizer
      if self._only_train:
        trainable_params = [v
                            for k, v in self.params.iteritems()
                            if k in self._only_train]
      else:
        trainable_params = self.params.values()
      lr = self._AddLearningRate(learning_rate, decay_steps)
      optimizer = tf.train.MomentumOptimizer(lr,
                                             momentum,
                                             use_locking=self._use_locking)
      train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
      for param in trainable_params:
        slot = optimizer.get_slot(param, 'momentum')
        self.inits[slot.name] = state_ops.init_variable(slot,
                                                        tf.zeros_initializer)
        self.variables[slot.name] = slot
      numerical_checks = [
          tf.check_numerics(param,
                            message='Parameter is not finite.')
          for param in trainable_params
          if param.dtype.base_dtype in [tf.float32, tf.float64]
      ]
      check_op = tf.group(*numerical_checks)
      avg_update_op = tf.group(*self._averaging.values())
      train_ops = [train_op]
      if self._check_parameters:
        train_ops.append(check_op)
      if self._use_averaging:
        train_ops.append(avg_update_op)
      nodes['train_op'] = tf.group(*train_ops, name='train_op')
    return nodes
Exemple #12
0
    def create_variables(self, settings):
        self.network_names = [
            'state_encoder',
            'action_decoder',
            'value_decoder',
        ]

        #### CREATE ALL THE NETWORKS
        self.networks = {
            name:parse_block(settings['networks'][name])
            for name in self.network_names
        }

        #### CREATE VARIABLES TO STORE GRADIENTS
        self.net_grads = {}
        for n in self.network_names:
            self.net_grads[n] = [
                tf.Variable(tf.zeros_like(v), name=v.name.split(':')[0]+"_grad")
                for v in self.networks[n].variables()
            ]

        #### CREATE COMBINED NETWORK: state -> action
        self.action_network = SequenceWrapper(
            [self.networks["state_encoder"], self.networks["action_decoder"]],
            scope="action_network")

        #### CREATE COMBINED NETWORK: state -> state_value
        self.value_network = SequenceWrapper(
            [self.networks["state_encoder"], self.networks["value_decoder"]],
            scope="value_network")

        #### COMPUTE STATE VALUE AND ACTION
        self.state        = self.networks["state_encoder"].input_placeholder()
        self.action_probs = self.action_network(self.state)
        self.action_id    = tf.argmax(self.action_probs, dimension=1)

        self.state_value        =  tf.reduce_sum(self.value_network(self.state), 1)

        #### COMPUTE ACTOR UPDATE
        self.reward             = tf.placeholder(tf.float32, (None,))
        self.chosen_action_id   = tf.placeholder(tf.int64, (None,))

        self.advantage          = self.reward - tf.stop_gradient(self.state_value)
        self.onehot             = tf.constant(np.diag(
                np.ones((self.num_actions,), dtype=np.float32)))
        self.chosen_action_mask = tf.nn.embedding_lookup(self.onehot, self.chosen_action_id)
        self.chosen_action_prob = tf.reduce_sum(self.action_probs * self.chosen_action_mask, 1)
        self.actor_loss         = - tf.log(self.chosen_action_prob) * self.advantage
        self.update_actor_grads = tf.group(*[
            self.update_network_grads('state_encoder', self.actor_loss),
            self.update_network_grads('action_decoder', self.actor_loss),
        ])

        #### COMPUTE VALUE NETWORK UPDATE
        self.value_loss         = tf.square(self.reward - self.state_value)
        self.update_value_grads = tf.group(*[
            self.update_network_grads('state_encoder', self.value_loss),
            self.update_network_grads('value_decoder', self.value_loss),
        ])
Exemple #13
0
  def __init__(self, target, name, do_inverses=False):
    self.name = name
    self.target = target
    self.do_inverses = do_inverses
    self.tf_svd = SvdTuple(tf.svd(target))
    self.update_counter = 0

    self.init = SvdTuple(
      ones(target.shape[0], name=name+"_s_init"),
      Identity(target.shape[0], name=name+"_u_init"),
      Identity(target.shape[0], name=name+"_v_init"),
      Identity(target.shape[0], name=name+"_inv_init"),
    )

    assert self.tf_svd.s.shape == self.init.s.shape
    assert self.tf_svd.u.shape == self.init.u.shape
    assert self.tf_svd.v.shape == self.init.v.shape
    #    assert self.tf_svd.inv.shape == self.init.inv.shape

    self.cached = SvdTuple(
      tf.Variable(self.init.s, name=name+"_s"),
      tf.Variable(self.init.u, name=name+"_u"),
      tf.Variable(self.init.v, name=name+"_v"),
      tf.Variable(self.init.inv, name=name+"_inv"),
    )

    self.s = self.cached.s
    self.u = self.cached.u
    self.v = self.cached.v
    self.inv = self.cached.inv
    
    self.holder = SvdTuple(
      tf.placeholder(default_dtype, shape=self.cached.s.shape, name=name+"_s_holder"),
      tf.placeholder(default_dtype, shape=self.cached.u.shape, name=name+"_u_holder"),
      tf.placeholder(default_dtype, shape=self.cached.v.shape, name=name+"_v_holder"),
      tf.placeholder(default_dtype, shape=self.cached.inv.shape, name=name+"_inv_holder")
    )

    self.update_tf_op = tf.group(
      self.cached.s.assign(self.tf_svd.s),
      self.cached.u.assign(self.tf_svd.u),
      self.cached.v.assign(self.tf_svd.v),
      self.cached.inv.assign(self.tf_svd.inv)
    )

    self.update_external_op = tf.group(
      self.cached.s.assign(self.holder.s),
      self.cached.u.assign(self.holder.u),
      self.cached.v.assign(self.holder.v),
    )

    self.update_externalinv_op = tf.group(
      self.cached.inv.assign(self.holder.inv),
    )


    self.init_ops = (self.s.initializer, self.u.initializer, self.v.initializer,
                     self.inv.initializer)
    def test_mnist(self):
        import tensor_dynamic.data.input_data as mnist

        num_labeled = 100
        data = mnist.read_data_sets("../data/MNIST_data", n_labeled=num_labeled, one_hot=True)

        batch_size = 100
        num_epochs = 1
        num_examples = 60000
        num_iter = (num_examples/batch_size) * num_epochs
        starter_learning_rate = 0.02
        inputs = tf.placeholder(tf.float32, shape=(None, 784))
        targets = tf.placeholder(tf.float32)

        with tf.Session() as s:
            s.as_default()
            i = InputLayer(inputs)
            l1 = LadderLayer(i, 500, 1000.0, s)
            l2 = LadderGammaLayer(l1, 10, 10.0, s)
            ladder = LadderOutputLayer(l2, 0.1, s)

            loss = ladder.cost_all_layers_train(targets)
            learning_rate = tf.Variable(starter_learning_rate, trainable=False)
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

            bn_updates = tf.group(*(l1.bn_assigns + l2.bn_assigns))
            with tf.control_dependencies([train_step]):
                train_step = tf.group(bn_updates)
            pred_cost = -tf.reduce_mean(tf.reduce_sum(targets * tf.log(tf.clip_by_value(ladder.activation_predict, 1e-10, 1.0)), 1))  # cost used for prediction

            correct_prediction = tf.equal(tf.argmax(ladder.activation_predict, 1), tf.argmax(targets, 1))  # no of correct predictions
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) * tf.constant(100.0)

            s.run(tf.initialize_all_variables())

            #print "init accuracy", s.run([accuracy], feed_dict={inputs: data.test.images, targets: data.test.labels})

            min_loss = 100000.

            writer = tf.train.SummaryWriter("/tmp/td", s.graph_def)
            writer.add_graph(s.graph_def)

            for i in range(num_iter):
                images, labels = data.train.next_batch(batch_size)
                _, loss_val = s.run([train_step, loss], feed_dict={inputs: images, targets: labels})

                if loss_val < min_loss:
                    min_loss = loss_val
                print(i, loss_val)

                # print "acc", s.run([accuracy], feed_dict={inputs: data.test.images, targets: data.test.labels})

            #acc = s.run(accuracy, feed_dict={inputs: data.test.images, targets: data.test.labels})
            print "min loss", min_loss
            #print "final accuracy ", acc
            self.assertLess(min_loss, 20.0)
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    var_list = [ v for _,v in grads_and_vars]
    with ops.init_scope():
        zt = [self._get_or_make_slot(v, v, "zt", self._name) for _,v in grads_and_vars]
        slots_list = []
        for name in self.optimizer.get_slot_names():
            for var in self.optimizer.variables():
                self._get_or_make_slot(var, var, "zt", "zt")
    self._prepare()

    def _name(post, s):
        ss = s.split(":")
        return ss[0] + "_" + post + "_dontsave"
    zt = [self.get_slot(v, "zt") for _,v in grads_and_vars]
    xt = [tf.Variable(v, name=_name("gigaxt",v.name)) for _,v in grads_and_vars]
    tmp = [tf.Variable(v, name=_name("gigatmp",v.name)) for _,v in grads_and_vars]
    xslots_list = []
    zslots_list = []
    tmpslots_list = []
    slots_vars = []
    for name in self.optimizer.get_slot_names():
        for var in self.optimizer.variables():
            slots_vars += [var]
            xslots_list.append(tf.Variable(var))
            zslots_list.append(self._get_or_make_slot(var, var, "zt", "zt"))
            tmpslots_list.append(tf.Variable(var, name=_name("gigaslottmp", var.name)))


    restored_vars = var_list + slots_vars
    zt_vars = zt + zslots_list
    xt_vars = xt + xslots_list
    tmp_vars = tmp + tmpslots_list
    all_grads = [ g for g, _ in grads_and_vars ]
    # store variables for resetting

    op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, restored_vars)]) # store tmp_vars

    with tf.get_default_graph().control_dependencies([op1]):
        op2 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op2]):
            op3 = tf.group(*[tf.assign(w, v) for w,v in zip(xt_vars, restored_vars)]) # store xt^+1 in xt_vars
            with tf.get_default_graph().control_dependencies([op3]):
                op4 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, zt_vars)]) # restore vars to zt (different weights)
                with tf.get_default_graph().control_dependencies([op4]):
                    op5 = self.optimizer2.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) # zt+1
                    with tf.get_default_graph().control_dependencies([op5]):
                        zt1_xt1 = [_restored_vars - _xt1_vars for _restored_vars, _xt1_vars in zip(restored_vars, xt_vars)]
                        St1 = [tf.minimum(1.0, tf.norm(_zt1_vars-_zt_vars) / tf.norm(_zt1_xt1)) for _zt1_vars, _zt_vars, _zt1_xt1 in zip(restored_vars, zt_vars, zt1_xt1)]
                        self.gan.add_metric('st1',tf.reduce_mean(tf.add_n(St1)/len(St1)))
                        #self.gan.add_metric('xzt1',tf.norm(xt_vars[0]-zt_vars[0]))
                        nextw = [_xt_t1 + _St1 * _zt1_xt1 for _xt_t1, _St1, _zt1_xt1 in zip(xt_vars, St1, zt1_xt1)]
                        op6 = tf.group(*[tf.assign(w, v) for w,v in zip(zt_vars, restored_vars)]) # set zt+1
                        with tf.get_default_graph().control_dependencies([op6]):
                            op7 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, nextw)]) # set xt+1
                            with tf.get_default_graph().control_dependencies([op7]):
                                return tf.no_op()
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    var_list = [ v for _,v in grads_and_vars]
    d_vars = []
    g_vars = []
    for grad,var in grads_and_vars:
        if var in self.gan.d_vars():
            d_vars += [var]
        elif var in self.gan.g_vars():
            g_vars += [var]
        else:
            raise("Couldn't find var in g_vars or d_vars")
    w = [tf.Variable(self.config.start_at or 0.0), tf.Variable(self.config.start_at or 0.0)]

    Vidv = [self.gan.trainer.d_loss, self.gan.trainer.g_loss]
    #Vsoc = [1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss, -1/2. * self.gan.trainer.d_loss - 1/2.* self.gan.trainer.g_loss]
    Vsoc = [1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss, 1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss]

    wlr = self.config.w_learn_rate or 0.01
    wt1 = [w[0] + wlr * (Vidv[0] - Vsoc[0]), w[1] + wlr * (Vidv[1] - Vsoc[1])]
    def clamped(net):
        return tf.maximum(self.config.min or 0., tf.minimum(net, self.config.max or 1.))

    self._prepare()

    wt1 = [clamped(wt1[0]),clamped(wt1[1])]
    self.gan.add_metric('wt0', wt1[0])
    self.gan.add_metric('wt1', wt1[1])
    op1 = tf.group(*[tf.assign(w, v) for w,v in zip(w, wt1)]) # store variables

    with tf.get_default_graph().control_dependencies([op1]):
        Vi = [(1. - w[0]) * Vidv[0] + w[0] * Vsoc[0],
              (1. - w[1]) * Vidv[1] + w[1] * Vsoc[1]]
        if self.config.reverse_w:
            Vi = [(w[0]) * Vidv[0] + (1.0-w[0]) * Vsoc[0],
                  (w[1]) * Vidv[1] + (1.0-w[1]) * Vsoc[1]]
        self.gan.add_metric('w0', w[0])
        self.gan.add_metric('w1', w[1])

        new_grads = tf.gradients(Vi[0], d_vars) + tf.gradients(Vi[1], g_vars)
        self.gan.trainer.d_loss = Vi[0]
        self.gan.trainer.g_loss = Vi[1]
        new_grads_and_vars = list(zip(new_grads, var_list)).copy()
        op3 = self.optimizer.apply_gradients(new_grads_and_vars.copy(), global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op3]):
            if(self.config.w_l1):
                # return to selfish state
                wt1 = [wt1[0] + self.config.w_l1 * ((self.config.l1_default or 0.0)-wt1[0]),
                       wt1[1] + self.config.w_l1 * ((self.config.l1_default or 0.0)-wt1[1])]
                op4 = tf.group(*[tf.assign(w, v) for w,v in zip(w, wt1)]) # store variables
                with tf.get_default_graph().control_dependencies([op4]):
                    self.gan.add_metric('l1w0', w[0])
                    self.gan.add_metric('l1w1', w[1])
                    return tf.no_op()

            else:
                return tf.no_op()
Exemple #17
0
def _add_ema(model, decay):
    """Create ops needed to track EMA when training.

    :param model: The model with a `.sess` we want to track.
    :param decay: float, Decay to use in the EMA

    :returns:
        ema_op: The update op. This applies the ema to each variable. Should be
           set as a control dependency on the training op.
        load: Op to copy emas to the variables.
        restore_var: Op to copy the original variables back from the EMA ones.

    Note:
        If you run the load op multiple times then the backup variables will be
        replaced by the ema variables.

        Currently there was a bug I haven't been able to fix. I haven't found why
        but sometimes when you run it with a tf.cond you get this error.
        `tensorflow.python.framework.errors_impl.InvalidArgumentError: Retval[0] does not have value`
        The stop gap is to remove this which means if you run load multiple times
        it will over write the backup variables with ema values.

        The load op is set up to automatically save the normal parameters when
        you load the ema's in.
    """
    ema = tf.train.ExponentialMovingAverage(decay=decay)
    model_vars = model.sess.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    with tf.variable_scope("BackupVariables"):
        backup_vars = [
            tf.get_variable(
                var.op.name,
                dtype=var.value().dtype,
                trainable=False,
                initializer=var.initialized_value()
            ) for var in model_vars
        ]
    ema_op = ema.apply(model_vars)

    save_back_up = tf.group(*(
        tf.assign(back, var.read_value())
        for var, back in zip(model_vars, backup_vars)
    ), name='save_backups')

    with tf.control_dependencies([save_back_up]):
        load = tf.group(*(
            tf.assign(var, ema.average(var).read_value())
            for var in model_vars
        ), name="load_ema")

    restore_vars = tf.group(*(
        tf.assign(var, back.read_value())
        for var, back in zip(model_vars, backup_vars)
    ), name="restore_backups")

    return ema_op, load, restore_vars
    def _create(self):
        gan = self.gan
        generator = self.gan.generator
        config = self.config

        d_vars = self.d_vars or gan.discriminator.variables()

        loss = self.loss or gan.loss
        d_loss, g_loss = loss.sample

        self.d_log = -tf.log(tf.abs(d_loss+TINY))


        d_optimizer = self.build_optimizer(config, 'd_', config.d_trainer, self.d_lr, d_vars, d_loss)
        #TODO more than one g_loss
        g_optimizer = [self.build_optimizer(config, 'g_', config.g_trainer, self.g_lr, g.variables(), g_loss) for g, l in zip(generator.children, loss.children_losses)]

        assign_children = []
        for p, o in generator.parent_child_tuples:
            for ov, pv in zip(o.variables(), p.variables()):
                op=tf.assign(ov, pv)
                if config.mutation_percent:
                    op += tf.random_normal(self.gan.ops.shape(pv), mean=0, stddev=0.01) * tf.cast(tf.greater(config.mutation_percent, tf.random_uniform(shape=self.gan.ops.shape(pv), minval=0, maxval=1)), tf.float32)
                assign_children.append(op)
        self.clone_parent = tf.group(*assign_children)


        update_parent=[]
        for p, o in generator.parent_child_tuples:
            c_to_p = []
            for ov, pv in zip(o.variables(), p.variables()):
                op=tf.assign(pv, ov)
                c_to_p.append(op)
            update_parent.append(tf.group(*c_to_p))
        self.update_parent = update_parent
        f_lambda = config.f_lambda or 1

        def _squash(grads):
            return tf.add_n([tf.reshape(gan.ops.squash(g), [1]) for g in grads])
        children_grads = [_squash(tf.gradients(l, d_vars)) for l in loss.children_losses]
        if config.fitness == "g":
            self.measure_g = [-l for l in loss.children_losses]
        else:
            self.measure_g = [-l+f_lambda*(-tf.log(TINY+grad_d - tf.log(TINY+tf.nn.sigmoid(loss.d_loss)) - tf.log(TINY+1-tf.nn.sigmoid(l)))) for l, grad_d in zip(loss.children_losses, children_grads)]
        loss.metrics['measure_g'] = tf.reduce_mean(self.measure_g)
        loss.metrics['g_loss'] = loss.g_loss
        loss.metrics['d_loss'] = loss.d_loss

        self.g_loss = g_loss
        self.d_loss = d_loss
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.hist = [0 for i in range(len(self.gan.generator.children))]

        return g_optimizer, d_optimizer
Exemple #19
0
	def create_networks_and_training_method(self,state_dim,action_dim):

		theta_p = networks.theta_p(state_dim,action_dim)
		theta_q = networks.theta_q(state_dim,action_dim)
		target_theta_p,target_update_p = self.exponential_moving_averages(theta_p,TAU)
		target_theta_q,target_update_q = self.exponential_moving_averages(theta_q,TAU)

		self.state = tf.placeholder(tf.float32,[None,state_dim],'state')
		self.action_test = networks.policy_network(self.state,theta_p)

		# Initialize a random process the Ornstein-Uhlenbeck process for action exploration
		self.exploration = OUNoise(action_dim)
		noise = self.exploration.noise()
		self.action_exploration = self.action_test + noise

		q = networks.q_network(self.state,self.action_test,theta_q)
		# policy optimization
		mean_q = tf.reduce_mean(q)
		weight_decay_p = tf.add_n([L2_POLICY * tf.nn.l2_loss(var) for var in theta_p])  
		loss_p = -mean_q + weight_decay_p

		optim_p = tf.train.AdamOptimizer(P_LEARNING_RATE)
		grads_and_vars_p = optim_p.compute_gradients(loss_p, var_list=theta_p)
		optimize_p = optim_p.apply_gradients(grads_and_vars_p)
		with tf.control_dependencies([optimize_p]):
			self.train_p = tf.group(target_update_p)

		# q optimization
		self.action_train = tf.placeholder(tf.float32,[None,action_dim],'action_train')
		self.reward = tf.placeholder(tf.float32,[None],'reward')
		self.next_state = tf.placeholder(tf.float32,[None,state_dim],'next_state')
		self.done = tf.placeholder(tf.bool,[None],'done')

		q_train = networks.q_network(self.state,self.action_train,theta_q)
		next_action = networks.policy_network(self.next_state,theta=target_theta_p)
		next_q = networks.q_network(self.next_state,next_action,theta=target_theta_q)
		q_target = tf.stop_gradient(tf.select(self.done,self.reward,self.reward + GAMMA * next_q))

		# q loss
		q_error = tf.reduce_mean(tf.square(q_target - q_train))
		weight_decay_q = tf.add_n([L2_Q * tf.nn.l2_loss(var) for var in theta_q])
		loss_q = q_error + weight_decay_q

		optim_q = tf.train.AdamOptimizer(Q_LEARNING_RATE)
		grads_and_vars_q = optim_q.compute_gradients(loss_q, var_list=theta_q)
		optimize_q = optim_q.apply_gradients(grads_and_vars_q)
		with tf.control_dependencies([optimize_q]):
			self.train_q = tf.group(target_update_q)

		tf.scalar_summary("loss_q",loss_q)
		tf.scalar_summary("loss_p",loss_p)
		tf.scalar_summary("q_mean",mean_q)
		global merged_summary_op
		merged_summary_op = tf.merge_all_summaries()
Exemple #20
0
def get_target_updates(vars, target_vars, tau):
    logger.info('setting up target updates ...')
    soft_updates = []
    init_updates = []
    assert len(vars) == len(target_vars)
    for var, target_var in zip(vars, target_vars):
        logger.info('  {} <- {}'.format(target_var.name, var.name))
        init_updates.append(tf.assign(target_var, var))
        soft_updates.append(tf.assign(target_var, (1. - tau) * target_var + tau * var))
    assert len(init_updates) == len(vars)
    assert len(soft_updates) == len(vars)
    return tf.group(*init_updates), tf.group(*soft_updates)
Exemple #21
0
    def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None):
        """A normalizer that ensures that observations are approximately distributed according to
        a standard Normal distribution (i.e. have mean zero and variance one).

        Args:
            size (int): the size of the observation to be normalized
            eps (float): a small constant that avoids underflows
            default_clip_range (float): normalized observations are clipped to be in
                [-default_clip_range, default_clip_range]
            sess (object): the TensorFlow session to be used
        """
        self.size = size
        self.eps = eps
        self.default_clip_range = default_clip_range
        self.sess = sess if sess is not None else tf.get_default_session()

        self.local_sum = np.zeros(self.size, np.float32)
        self.local_sumsq = np.zeros(self.size, np.float32)
        self.local_count = np.zeros(1, np.float32)

        self.sum_tf = tf.get_variable(
            initializer=tf.zeros_initializer(), shape=self.local_sum.shape, name='sum',
            trainable=False, dtype=tf.float32)
        self.sumsq_tf = tf.get_variable(
            initializer=tf.zeros_initializer(), shape=self.local_sumsq.shape, name='sumsq',
            trainable=False, dtype=tf.float32)
        self.count_tf = tf.get_variable(
            initializer=tf.ones_initializer(), shape=self.local_count.shape, name='count',
            trainable=False, dtype=tf.float32)
        self.mean = tf.get_variable(
            initializer=tf.zeros_initializer(), shape=(self.size,), name='mean',
            trainable=False, dtype=tf.float32)
        self.std = tf.get_variable(
            initializer=tf.ones_initializer(), shape=(self.size,), name='std',
            trainable=False, dtype=tf.float32)
        self.count_pl = tf.placeholder(name='count_pl', shape=(1,), dtype=tf.float32)
        self.sum_pl = tf.placeholder(name='sum_pl', shape=(self.size,), dtype=tf.float32)
        self.sumsq_pl = tf.placeholder(name='sumsq_pl', shape=(self.size,), dtype=tf.float32)

        self.update_op = tf.group(
            self.count_tf.assign_add(self.count_pl),
            self.sum_tf.assign_add(self.sum_pl),
            self.sumsq_tf.assign_add(self.sumsq_pl)
        )
        self.recompute_op = tf.group(
            tf.assign(self.mean, self.sum_tf / self.count_tf),
            tf.assign(self.std, tf.sqrt(tf.maximum(
                tf.square(self.eps),
                self.sumsq_tf / self.count_tf - tf.square(self.sum_tf / self.count_tf)
            ))),
        )
        self.lock = threading.Lock()
def parameterized_vs_naive(shape, num_iters):
    np.random.seed(1618)  # Make it reproducible.

    # No CSE/CF.
    optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)
    config = tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=optimizer_options))

    with tf.Session(config=config) as sess:
        param_op = tf.group(random_ops.parameterized_truncated_normal(shape))
        naive_op = tf.group(random_ops.truncated_normal(shape))

        param_dt = timeit.timeit(lambda: sess.run(param_op), number=num_iters)
        naive_dt = timeit.timeit(lambda: sess.run(naive_op), number=num_iters)
        return param_dt, naive_dt
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    var_list = [ v for _,v in grads_and_vars]
    d_vars = []
    g_vars = []
    for grad,var in grads_and_vars:
        if var in self.gan.d_vars():
            d_vars += [var]
        elif var in self.gan.g_vars():
            g_vars += [var]
        else:
            raise("Couldn't find var in g_vars or d_vars")

    with ops.init_scope():
        v1 = [self._zeros_slot(v, "v1", self._name) for _,v in grads_and_vars]
        if self.config.include_slots:
            for name in self.optimizer.get_slot_names():
                for var in self.optimizer.variables():
                    self._zeros_slot(var, "pm", "pm")
    self._prepare()

    v1 = [self.get_slot(v, "v1") for _,v in grads_and_vars]
    slots_list = []
    slots_vars = []
    if self.config.include_slots:
        for name in self.optimizer.get_slot_names():
            for var in self.optimizer.variables():
                slots_vars += [var]
                slots_list.append(self._zeros_slot(var, "pm", "pm"))


    current_vars = var_list + slots_vars
    tmp_vars = v1 + slots_list
    all_grads = [ g for g, _ in grads_and_vars ]

    op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, current_vars)]) # store variables

    with tf.get_default_graph().control_dependencies([op1]):
        # store g2
        #op3 = tf.group(*[tf.assign_sub(v, self._lr_t*grad) for grad,v in grads_and_vars])
        op3 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op3]):

            def pmcombine(_v1,_v2):
                return _v2 + (_v2 - _v1)

            combined = [pmcombine(_v1, _v2) for _v1, _v2 in zip(tmp_vars, current_vars)]
            # restore v1, slots
            op5 = tf.group(*[ tf.assign(w,v) for w,v in zip(current_vars, combined)])
            with tf.get_default_graph().control_dependencies([op5]):
                return tf.no_op()
Exemple #24
0
  def __init__(self, queues, enqueue_ops):
    close_op = tf.group(* [q.close() for q in queues])
    cancel_op = tf.group(
        * [q.close(cancel_pending_enqueues=True) for q in queues])
    queue_closed_exception_types = (errors.OutOfRangeError,)

    enqueue_op = tf.group(*enqueue_ops, name="multi_enqueue")

    super(MultiQueueRunner, self).__init__(
        queues[0],
        enqueue_ops=[enqueue_op],
        close_op=close_op,
        cancel_op=cancel_op,
        queue_closed_exception_types=queue_closed_exception_types)
    def calculate_depth(grads_and_vars_k,k=0):
        if(k == 0):
            return tf.group(*[tf.assign(v,nv) for v,nv in zip(depth_vars, depth_slots)])

        op2 = self.optimizer.apply_gradients(grads_and_vars_k, global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op2]):
            w_k_combined = [self._decay *w_k_1 + (1.-self._decay)*w_hat for w_hat, w_k_1 in zip(depth_slots, depth_vars)]
            op3 = tf.group(*[tf.assign(w, v) for w,v in zip(depth_slots, w_k_combined)]) # store variables
            with tf.get_default_graph().control_dependencies([op3]):
                d_loss, g_loss = self.gan.loss.sample
                d_grads = tf.gradients(d_loss, d_vars)
                g_grads = tf.gradients(g_loss, g_vars)
                grads_k_1 = d_grads + g_grads
                grads_and_vars_k_1 = list(zip(grads_k_1,depth_vars)).copy()
                return calculate_depth(grads_and_vars_k_1,k-1)
Exemple #26
0
  def testSummariesAreFlushedToDisk(self):
    output_dir = os.path.join(self.get_temp_dir(), 'flush_test')
    if tf.gfile.Exists(output_dir):  # For running on jenkins.
      tf.gfile.DeleteRecursively(output_dir)

    accuracy0, update_op0 = tf.contrib.metrics.streaming_accuracy(
        self._predictions, self._labels)
    accuracy1, update_op1 = tf.contrib.metrics.streaming_accuracy(
        self._predictions+1, self._labels)

    names_to_metrics = {
        'Accuracy': accuracy0,
        'Another accuracy': accuracy1,
    }

    for k in names_to_metrics:
      v = names_to_metrics[k]
      tf.scalar_summary(k, v)

    summary_writer = tf.train.SummaryWriter(output_dir)

    init_op = tf.group(tf.initialize_all_variables(),
                       tf.initialize_local_variables())
    eval_op = tf.group(update_op0, update_op1)

    with self.test_session() as sess:
      slim.evaluation.evaluation(
          sess,
          init_op=init_op,
          eval_op=eval_op,
          summary_op=tf.merge_all_summaries(),
          summary_writer=summary_writer,
          global_step=self._global_step)

      # Check that the results were saved. The events file may have additional
      # entries, e.g. the event version stamp, so have to parse things a bit.
      output_filepath = glob.glob(os.path.join(output_dir, '*'))
      self.assertEqual(len(output_filepath), 1)
      events = tf.train.summary_iterator(output_filepath[0])
      summaries = [e.summary for e in events if e.summary.value]
      values = []
      for summary in summaries:
        for value in summary.value:
          values.append(value)
      saved_results = {v.tag: v.simple_value for v in values}
      for name in names_to_metrics:
        self.assertAlmostEqual(names_to_metrics[name].eval(),
                               saved_results[name])
def evaluate():
    """Eval ocr for a number of steps."""
    with tf.Graph().as_default() as g:
        images, labels, seq_lengths = ocr.inputs()
        logits, timesteps = ocr.inference(images, FLAGS.eval_batch_size, train=True)
        ler = ocr.create_label_error_rate(logits, labels, timesteps)
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        config = tf.ConfigProto(
            device_count={'GPU': 0}
        )
        sess = tf.Session(config=config)
        sess.run(init_op)

        saver = tf.train.Saver()

        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

        while True:
            eval_once(saver, summary_writer, ler, summary_op)
            if FLAGS.run_once:
                break
            # print("Waiting for next evaluation for " + str(FLAGS.eval_interval_secs) + " sec")
            time.sleep(FLAGS.eval_interval_secs)
Exemple #28
0
def moving_average(value, window):
    value = tf.to_float(value)
    shape = value.get_shape()

    queue_init = tf.zeros(tf.TensorShape(window).concatenate(shape))
    total_init = tf.zeros(shape)
    num_init = tf.constant(0, dtype=tf.float32)

    queue = tf.FIFOQueue(window, [tf.float32], shapes=[shape])
    total = tf.Variable(total_init, trainable=False)
    num = tf.Variable(num_init, trainable=False)

    init = tf.cond(
        tf.equal(queue.size(), 0),
        lambda: tf.group(
            queue.enqueue_many(queue_init),
            total.assign(total_init),
            num.assign(num_init)),
        lambda: tf.no_op())

    with tf.control_dependencies([init]):
        total_ = total + value - queue.dequeue()
        num_ = num + 1
        value_averaged = total_ / (tf.minimum(num_, window) + EPSILON)

        with tf.control_dependencies([queue.enqueue([value]), total.assign(total_), num.assign(num_)]):
            return tf.identity(value_averaged)
Exemple #29
0
def initialize_variables(sess, saver, logdir, checkpoint=None, resume=None):
  """Initialize or restore variables from a checkpoint if available.

  Args:
    sess: Session to initialize variables in.
    saver: Saver to restore variables.
    logdir: Directory to search for checkpoints.
    checkpoint: Specify what checkpoint name to use; defaults to most recent.
    resume: Whether to expect recovering a checkpoint or starting a new run.

  Raises:
    ValueError: If resume expected but no log directory specified.
    RuntimeError: If no resume expected but a checkpoint was found.
  """
  sess.run(tf.group(
      tf.local_variables_initializer(),
      tf.global_variables_initializer()))
  if resume and not (logdir or checkpoint):
    raise ValueError('Need to specify logdir to resume a checkpoint.')
  if logdir:
    state = tf.train.get_checkpoint_state(logdir)
    if checkpoint:
      checkpoint = os.path.join(logdir, checkpoint)
    if not checkpoint and state and state.model_checkpoint_path:
      checkpoint = state.model_checkpoint_path
    if checkpoint and resume is False:
      message = 'Found unexpected checkpoint when starting a new run.'
      raise RuntimeError(message)
    if checkpoint:
      saver.restore(sess, checkpoint)
Exemple #30
0
  def _outputs_with_release(self, handle, inputs, outputs):
    """Ensures ComputeSession is released before outputs are returned.

    Args:
      handle: Handle to ComputeSession on which all computation until now has
          depended. It will be released and assigned to the output 'run'.
      inputs: list of nodes we want to pass through without any dependencies.
      outputs: list of nodes whose access should ensure the ComputeSession is
          safely released.

    Returns:
      A dictionary of both input and output nodes.
    """
    with tf.control_dependencies(outputs.values()):
      with tf.name_scope('ComputeSession'):
        release_op = dragnn_ops.release_session(handle)
      run_op = tf.group(release_op, name='run')
      for output in outputs:
        with tf.control_dependencies([release_op]):
          outputs[output] = tf.identity(outputs[output], name=output)
    all_nodes = inputs.copy()
    all_nodes.update(outputs)

    # Add an alias for simply running without collecting outputs.
    # Common, for instance, with training.
    all_nodes['run'] = run_op
    return all_nodes
Exemple #31
0
 def _finish(self, update_ops, name_scope):
   update_ops.append([self._counter.assign_add(1)])
   return tf.group(*update_ops, name=name_scope)
def train(logits, represent_feature_tensor, images_tensor, expand_images_tensor, labels_tensor, is_training_tensor, save_model_path=None, step_width=100, record_loss=False):
    cross_id = 1
    patches_dir = '/home/give/Documents/dataset/MICCAI2018/Patches/crossvalidation'
    roi_dir = '/home/give/Documents/dataset/MICCAI2018/Slices/crossvalidation'
    pre_load = True
    train_dataset = DataSet(os.path.join(patches_dir, str(cross_id), 'train'), 'train', pre_load=pre_load,
                            rescale=True, divied_liver=False, expand_is_roi=True,
                            full_roi_path=os.path.join(roi_dir, str(cross_id), 'train'))
    val_dataset = DataSet(os.path.join(patches_dir, str(cross_id), 'test'), 'test', pre_load=pre_load,
                          rescale=True, divied_liver=False, expand_is_roi=True,
                          full_roi_path=os.path.join(roi_dir, str(cross_id), 'test'))

    train_batchdata = train_dataset.get_next_batch(net_config.BATCH_SIZE)
    val_batchdata = val_dataset.get_next_batch(net_config.BATCH_SIZE)

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    val_step = tf.get_variable('val_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    # inter loss
    loss_last = loss(logits, labels_tensor)
    loss_inter = loss_last

    # intra loss
    if has_centerloss:
        represent_feature_tensor_shape = represent_feature_tensor.get_shape().as_list()
        print 'represent_feature_tensor_shape is ', represent_feature_tensor_shape
        centers_value = np.zeros([category_num, represent_feature_tensor_shape[1]], dtype=np.float32)
        print 'centers_value shape is ', np.shape(centers_value)
        centers_saved_tensor = tf.get_variable('center_tensor', shape=[category_num, represent_feature_tensor_shape[1]],
                                         initializer=tf.truncated_normal_initializer(stddev=CONV_WEIGHT_STDDEV),
                                         dtype=tf.float32, trainable=False)
        centers_tensor = tf.placeholder(dtype=tf.float32, shape=[category_num, represent_feature_tensor_shape[1]])
        print 'center_tensor shape is ', tf.shape(centers_tensor)
        center_loss = calculate_centerloss(represent_feature_tensor, labels_tensor,
                                           centers_tensor=centers_tensor)
        owner_step = tf.py_func(update_centers, [centers_tensor, represent_feature_tensor, labels_tensor, category_num],
                                tf.float32)

        loss_ = loss_inter + _lambda * center_loss
    else:
        loss_ = loss_inter
    predictions = tf.nn.softmax(logits)
    print 'predictions shape is ', predictions
    print 'label is ', labels_tensor
    top1_error = top_k_error(predictions, labels_tensor, 1)
    labels_onehot = tf.one_hot(labels_tensor, logits.get_shape().as_list()[-1])
    print 'output node is ', logits.get_shape().as_list()[-1]
    accuracy_tensor = calculate_accuracy(predictions, labels_onehot)

    # loss_avg
    ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    tf.add_to_collection(UPDATE_OPS_COLLECTION, ema.apply([loss_]))
    tf.summary.scalar('loss_avg', ema.average(loss_))

    # validation stats
    ema = tf.train.ExponentialMovingAverage(0.9, val_step)
    val_op = tf.group(val_step.assign_add(1), ema.apply([top1_error]))
    top1_error_avg = ema.average(top1_error)
    tf.summary.scalar('val_top1_error_avg', top1_error_avg)

    tf.summary.scalar('learning_rate', FLAGS.learning_rate)

    opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, MOMENTUM)
    grads = opt.compute_gradients(loss_)
    for grad, var in grads:
        if grad is not None and not FLAGS.minimal_summaries:
            tf.summary.histogram(var.op.name + '/gradients', grad)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    if not FLAGS.minimal_summaries:
        # Display the training images in the visualizer.
        tf.summary.image('images', images_tensor)

        for var in tf.trainable_variables():
            tf.summary.image(var.op.name, var)

    batchnorm_updates = tf.get_collection(UPDATE_OPS_COLLECTION)
    batchnorm_updates_op = tf.group(*batchnorm_updates)

    if has_centerloss:
        with tf.control_dependencies([apply_gradient_op, batchnorm_updates_op, owner_step]):
            train_op = tf.no_op('train')
    else:
        train_op = tf.group(apply_gradient_op, batchnorm_updates_op)

    saver = tf.train.Saver(tf.all_variables())

    summary_op = tf.summary.merge_all()

    init = tf.initialize_all_variables()

    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    sess.run(init)
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
    val_summary_writer = tf.summary.FileWriter(FLAGS.log_val_dir, sess.graph)
    if FLAGS.resume:
        latest = tf.train.latest_checkpoint(FLAGS.load_model_path)
        if not latest:
            print "No checkpoint to continue from in", FLAGS.train_dir
            sys.exit(1)
        print "resume", latest
        saver.restore(sess, latest)
        centers_value = sess.run(centers_saved_tensor)

    for x in xrange(FLAGS.max_steps + 1):
        start_time = time.time()

        step = sess.run(global_step)
        if has_centerloss:
            i = [train_op, loss_, owner_step]
        else:
            i = [train_op, loss_]
        write_summary = step % 100 and step > 1
        if write_summary:
            i.append(summary_op)
        train_roi_batch_images, train_expand_roi_batch_images, train_labels = train_batchdata.next()
        o = sess.run(i, feed_dict={
            images_tensor: train_roi_batch_images,
            expand_images_tensor: train_expand_roi_batch_images,
            labels_tensor: train_labels,
            centers_tensor: centers_value,
            is_training_tensor: True
        })
        if has_centerloss:
            centers_value = o[2]
            centers_saved_tensor = tf.convert_to_tensor(np.asarray(centers_value, np.float32), np.float32)
        loss_value = o[1]

        duration = time.time() - start_time

        assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

        if (step - 1) % step_width == 0:
            accuracy_value, inter_loss_value, center_loss_value, labels_values, predictions_values = sess.run(
                [accuracy_tensor, loss_inter, center_loss, labels_tensor, predictions], feed_dict={
                    images_tensor: train_roi_batch_images,
                    expand_images_tensor: train_expand_roi_batch_images,
                    labels_tensor: train_labels,
                    centers_tensor: centers_value,
                    is_training_tensor: True
                })
            examples_per_sec = FLAGS.batch_size / float(duration)
            # accuracy = eval_accuracy(predictions_values, labels_values)
            format_str = ('step %d, loss = %.2f, inter_loss = %.5f, center_loss =%.5f, accuracy value = %g  (%.1f examples/sec; %.3f '
                          'sec/batch)')

            print(format_str % (step, loss_value, inter_loss_value, center_loss_value, accuracy_value, examples_per_sec, duration))
        if write_summary:
            if has_centerloss:
                summary_str = o[3]
            else:
                summary_str = o[2]
            summary_writer.add_summary(summary_str, step)

        # Save the model checkpoint periodically.
        if step > 1 and step % step_width == 0:

            checkpoint_path = os.path.join(save_model_path, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=global_step)
            save_dir = os.path.join(save_model_path, str(step))
            if not os.path.exists(save_dir):
                os.mkdir(save_dir)
            filenames = glob(os.path.join(save_model_path, '*-'+str(int(step + 1))+'.*'))
            for filename in filenames:
                shutil.copy(
                    filename,
                    os.path.join(save_dir, os.path.basename(filename))
                )
        # Run validation periodically
        if step > 1 and step % step_width == 0:
            val_roi_batch_images, val_expand_roi_batch_images, val_labels = val_batchdata.next()
            _, top1_error_value, summary_value, accuracy_value, labels_values, predictions_values = sess.run(
                [val_op, top1_error, summary_op, accuracy_tensor, labels_tensor, predictions],
                {
                    images_tensor: val_roi_batch_images,
                    expand_images_tensor: val_expand_roi_batch_images,
                    centers_tensor: centers_value,
                    labels_tensor: val_labels,
                    is_training_tensor: False
                })
            predictions_values = np.argmax(predictions_values, axis=1)
            # accuracy = eval_accuracy(predictions_values, labels_values)
            calculate_acc_error(
                logits=predictions_values,
                label=labels_values,
                show=True
            )
            print('Validation top1 error %.2f, accuracy value %f'
                  % (top1_error_value, accuracy_value))
            val_summary_writer.add_summary(summary_value, step)
Exemple #33
0
def train(dataset):
    """Train on dataset for a number of steps."""
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        # Calculate the learning rate schedule.
        num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                                 FLAGS.batch_size)
        decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                        global_step,
                                        decay_steps,
                                        FLAGS.learning_rate_decay_factor,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        opt = tf.train.RMSPropOptimizer(lr,
                                        RMSPROP_DECAY,
                                        momentum=RMSPROP_MOMENTUM,
                                        epsilon=RMSPROP_EPSILON)

        # Get images and labels for ImageNet and split the batch across GPUs.
        assert FLAGS.batch_size % FLAGS.num_gpus == 0, (
            'Batch size must be divisible by number of GPUs')
        split_batch_size = int(FLAGS.batch_size / FLAGS.num_gpus)

        # Override the number of preprocessing threads to account for the increased
        # number of GPU towers.
        num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus
        images, labels = image_processing.distorted_inputs(
            dataset, num_preprocess_threads=num_preprocess_threads)

        input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Number of classes in the Dataset label.
        num_classes = dataset.num_classes()

        # Split the batch of images and labels for towers.
        images_splits = tf.split(axis=0,
                                 num_or_size_splits=FLAGS.num_gpus,
                                 value=images)
        labels_splits = tf.split(axis=0,
                                 num_or_size_splits=FLAGS.num_gpus,
                                 value=labels)

        # Calculate the gradients for each model tower.
        tower_grads = []
        reuse_variables = None
        for i in range(FLAGS.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' %
                                   (inception.TOWER_NAME, i)) as scope:
                    # Force all Variables to reside on the CPU.
                    with slim.arg_scope([slim.variables.variable],
                                        device='/cpu:0'):
                        # Calculate the loss for one tower of the ImageNet model. This
                        # function constructs the entire ImageNet model but shares the
                        # variables across all towers.
                        loss = _tower_loss(images_splits[i], labels_splits[i],
                                           num_classes, scope, reuse_variables)

                    # Reuse variables for the next tower.
                    reuse_variables = True

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                  scope)

                    # Retain the Batch Normalization updates operations only from the
                    # final tower. Ideally, we should grab the updates from all towers
                    # but these stats accumulate extremely fast so we can ignore the
                    # other stats from the other towers without significant detriment.
                    batchnorm_updates = tf.get_collection(
                        slim.ops.UPDATE_OPS_COLLECTION, scope)

                    # Calculate the gradients for the batch of data on this ImageNet
                    # tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = _average_gradients(tower_grads)

        # Add a summaries for the input processing and global_step.
        summaries.extend(input_summaries)

        # Add a summary to track the learning rate.
        summaries.append(tf.summary.scalar('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.summary.histogram(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.summary.histogram(var.op.name, var))

        # Track the moving averages of all trainable variables.
        # Note that we maintain a "double-average" of the BatchNormalization
        # global statistics. This is more complicated then need be but we employ
        # this for backward-compatibility with our previous models.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception.MOVING_AVERAGE_DECAY, global_step)

        # Another possibility is to use tf.slim.get_variables().
        variables_to_average = (tf.trainable_variables() +
                                tf.moving_average_variables())
        variables_averages_op = variable_averages.apply(variables_to_average)

        # Group all updates to into a single train op.
        batchnorm_updates_op = tf.group(*batchnorm_updates)
        train_op = tf.group(apply_gradient_op, variables_averages_op,
                            batchnorm_updates_op)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.summary.merge(summaries)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        if FLAGS.pretrained_model_checkpoint_path:
            assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path)
            variables_to_restore = tf.get_collection(
                slim.variables.VARIABLES_TO_RESTORE)
            restorer = tf.train.Saver(variables_to_restore)
            restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path)
            print('%s: Pre-trained model restored from %s' %
                  (datetime.now(), FLAGS.pretrained_model_checkpoint_path))

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=sess.graph)

        for step in range(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                examples_per_sec = FLAGS.batch_size / float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, duration))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 5000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
def build_graph(checkpoint_file):
    with tf.Graph().as_default() as graph:
        tf.logging.set_verbosity(tf.logging.INFO)
        # Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing
        dataset = get_split('validation', FLAGS.dataset_dir)
        images, raw_images, labels = load_batch(dataset,
                                                batch_size=FLAGS.batch_size,
                                                is_training=False)

        # Create some information about the training steps
        num_batches_per_epoch = dataset.num_samples / FLAGS.batch_size
        num_steps_per_epoch = num_batches_per_epoch

        # Now create the inference model but set is_training=False
        with slim.arg_scope(inception_resnet_v2_arg_scope()):
            logits, end_points = inception_resnet_v2(
                images, num_classes=dataset.num_classes, is_training=False)
            logits_op = end_points['Logits']
            pred_op = end_points['Predictions']
            # logging.info("The logits output from the model is: %s, The prediction of the model is: %s" % (end_points['Logits'], end_points['Predictions']))

        # #get all the variables to restore from the checkpoint file and create the saver function to restore
        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        def restore_fn(sess):
            return saver.restore(sess, checkpoint_file)

        # Just define the metrics to track without the loss or whatsoever
        predictions = tf.argmax(end_points['Predictions'], 1)
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
            predictions, labels)  ## decleartion?
        acc_mine = tf.reduce_mean(
            tf.cast(tf.equal(predictions, labels), tf.float32))
        metrics_op = tf.group(accuracy_update)

        def get_pred_and_label(sess):
            pred = sess.run([pred_op])
            label = sess.run([labels])
            label = label[0]
            # logging.info('--------visulizing the pred: %s' % pred)
            # logging.info('--------get the shape of pred: %s' % pred[0][0][1])
            pred_pos = np.empty(FLAGS.batch_size)
            for i in range(len(pred)):
                pos_list = pred[0][i]
                pred_pos[i] = pos_list[1]
                label[i] = float(label[i])
            # logging.info('--------visulizing the pred: %s' % type(pred_pos))
            logging.info('--------visulizing the label: %s' % label)
            # logging.info('--------visulizing the label: %s' % type(label))
            return pred_pos, label

        # Create the global step and an increment op for monitoring
        global_step = get_or_create_global_step()
        global_step_op = tf.assign(
            global_step, global_step + 1
        )  # no apply_gradient method so manually increasing the global_step

        # Create a evaluation step function
        def eval_step(sess, metrics_op):
            '''
            Simply takes in a session, runs the metrics op and some logging information.
            '''
            start_time = time.time()
            _, global_step_count, accuracy_value, step_logits, step_prediction, step_acc = sess.run(
                [
                    metrics_op, global_step_op, accuracy, logits_op, pred_op,
                    acc_mine
                ])
            time_elapsed = time.time() - start_time

            # Log some information
            # logging.info('Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', global_step_count, accuracy_value, time_elapsed)
            logging.info(
                'The averange accuracy of this batch(total 36 samples) is: %s'
                % step_acc)
            # for i in range(len(step_prediction)):
            #     # pred = 'True' if predictions[i] == labels[i] else 'False'
            #     logging.info("The prediction of %s th image is : %s" % ((i, max(step_prediction[i]))))

            return accuracy_value

        # Define some scalar quantities to monitor
        tf.summary.scalar('Validation_Accuracy', accuracy)
        my_summary_op = tf.summary.merge_all()
Exemple #35
0
def train():
    inputs, gt_alphas = datasets.get_dataset()
    model = network.MnasUnet(inputs, is_training=True)

    total_loss = losses.compute_loss(model.end_points,
                                     gt_alphas,
                                     mode=FLAGS.mode)
    """ set the update operations for training """
    update_ops = []
    variables_to_train = tf.trainable_variables()

    global_step = tf.Variable(0, name='global_step', trainable=False)
    lr = _get_learning_rate(FLAGS.num_images, global_step)
    optimizer = tf.train.GradientDescentOptimizer(lr)
    update_opt = optimizer.minimize(total_loss, global_step,
                                    variables_to_train)
    update_ops.append(update_opt)

    update_bns = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    if len(update_bns):
        update_bn = tf.group(*update_bns)
        update_ops.append(update_bn)
    update_op = tf.group(*update_ops)
    """ set Summary and log info """
    tf.summary.scalar('learning_rate', lr)
    tf.summary.scalar('total_loss', total_loss)
    tf.summary.scalar('model_loss', model.end_points['model_loss'])
    tf.summary.scalar('regular_loss', model.end_points['regular_loss'])

    summary_op = tf.summary.merge_all()
    logdir = os.path.join(FLAGS.summaries_dir,
                          strftime('%Y%m%d%H%M%S', gmtime()))
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    summary_writer = tf.summary.FileWriter(logdir, graph=tf.Session().graph)
    """ set saver for saving final model and backbone model for restore """
    saver = tf.train.Saver(max_to_keep=3)
    """ Set Gpu Env """
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    gpu_opt = tf.GPUOptions(per_process_gpu_memory_fraction=0.8,
                            allow_growth=True)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opt)) as sess:
        sess.run(init_op)
        ckpt = tf.train.get_checkpoint_state(FLAGS.training_checkpoint_model)
        """ resotre checkpoint of Backbone network """
        if ckpt is not None:
            lastest_ckpt = tf.train.latest_checkpoint(
                FLAGS.training_checkpoint_model)
            print('lastest', lastest_ckpt)
            re_saver = tf.train.Saver(var_list=tf.global_variables())
            re_saver.restore(sess, lastest_ckpt)
        else:
            restore_vars = _get_restore_vars("MnasNet")
            re_saver = tf.train.Saver(var_list=restore_vars)
            re_saver.restore(sess,
                             "data/pretrained_models/MnasNet_224_final.ckpt")
        """ Generate threads """
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        try:
            while not coord.should_stop():
                s_time = time.time()
                _, loss, current_step = sess.run(
                    [update_op, total_loss, global_step])

                duration_time = time.time() - s_time
                print("""iter %d: time:%.3f(sec), total-loss %.4f""" %
                      (current_step, duration_time, loss))

                if np.isnan(loss) or np.isinf(loss):
                    print('isnan or isinf', loss)
                    raise

                if current_step % 10 == 0:
                    # write summary
                    summary = sess.run(summary_op)
                    summary_writer.add_summary(summary, current_step)
                    summary_writer.flush()

                if current_step % 50 == 0:
                    # Save a checkpoint
                    save_path = 'output/training/MnasUnet_Matting.ckpt'
                    saver.save(sess, save_path, global_step=current_step)

                if current_step + 1 == FLAGS.max_iters:
                    print('max iter : %d, current_step : %d' %
                          (FLAGS.max_iters, current_step))
                    break

        except tf.errors.OutOfRangeError:
            print('Error occured')
        finally:
            saver.save(sess,
                       './output/models/MnasUnet_Matting_final.ckpt',
                       write_meta_graph=False)
            coord.request_stop()

        coord.join(threads)
        sess.close()
def run_training(fold_num,
                 train_tfrecord_path,
                 test_tfrecord_path,
                 train_batch_size=60,
                 test_batch_size=30):
    with tf.Graph().as_default():
        # with tf.device('/gpu:'+GPU_NUM):
        images, label = read_and_decode(train_tfrecord_path)
        # 使用shuffle_batch可以随机打乱输入
        images_batch, label_batch = tf.train.shuffle_batch(
            [images, label],
            batch_size=train_batch_size,
            capacity=1000,
            min_after_dequeue=800)

        images_test, label_test = read_and_decode_4_test(test_tfrecord_path)
        # 使用shuffle_batch可以随机打乱输入
        images_batch_test, label_batch_test = tf.train.batch(
            [images_test, label_test],
            batch_size=test_batch_size,
            capacity=1000)

        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder, keep_prob, is_train = placeholder_inputs(
        )

        # Build a Graph that computes predictions from the inference model.
        fe_logits = model.inference(images_placeholder, keep_prob, is_train)

        # Add to the Graph the Ops for loss calculation.
        loss = model.loss(fe_logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        global_step = tf.Variable(0, trainable=False)
        train_op = model.training(loss, flags.learning_rate, global_step)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = model.evaluation(fe_logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.summary.merge_all()

        # Add the variable initializer Op.
        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())

        # Create a saver for writing training checkpoints.
        # saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.48)
        with tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=True,
                                      gpu_options=gpu_options)) as sess:

            # Instantiate a SummaryWriter to output summaries and the Graph.
            train_writer = tf.summary.FileWriter(
                './summaries_new/summaries_graph_0420/' + str(fold_num) +
                '/train', sess.graph)
            test_writer = tf.summary.FileWriter(
                './summaries_new/summaries_graph_0420/' + str(fold_num) +
                '/test', sess.graph)

            # And then after everything is built:

            # Run the Op to initialize the variables.
            sess.run(init)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            img_test, l_test = sess.run([images_batch_test, label_batch_test])
            test_feed_dict = fill_feed_dict(img_test, l_test, 1.0, False,
                                            images_placeholder,
                                            labels_placeholder, keep_prob,
                                            is_train)
            # Start the training loop.
            last_train_correct = []
            last_test_correct = []
            for step in range(flags.max_steps):
                start_time = time.time()

                # Fill a feed dictionary with the actual set of images and labels
                # for this particular training step.
                img, l = sess.run([images_batch, label_batch])
                feed_dict = fill_feed_dict(img, l, 0.9, True,
                                           images_placeholder,
                                           labels_placeholder, keep_prob,
                                           is_train)

                # Run one step of the model.  The return values are the activations
                # from the `train_op` (which is discarded) and the `loss` Op.  To
                # inspect the values of your Ops or variables, you may include them
                # in the list passed to sess.run() and the value tensors will be
                # returned in the tuple from the call.
                _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

                duration = time.time() - start_time

                # Write the summaries and print an overview fairly often.
                if step % 100 == 0 or (step + 1) == flags.max_steps:
                    print('fold_num:{}'.format(fold_num))
                    print('Step %d: loss = %.2f (%.3f sec)' %
                          (step, loss_value, duration))
                    # Update the events file.
                    train_summary_str = sess.run(summary, feed_dict=feed_dict)
                    test_summary_str = sess.run(summary,
                                                feed_dict=test_feed_dict)
                    train_writer.add_summary(train_summary_str, step)
                    test_writer.add_summary(test_summary_str, step)
                    # summary_writer.flush()

                    print('Training Data Eval:')
                    train_correct = sess.run(eval_correct, feed_dict=feed_dict)
                    print('train_correct:{}'.format(train_correct))
                    print('Test Data Eval:')
                    test_correct = sess.run(eval_correct,
                                            feed_dict=test_feed_dict)
                    print('test_correct:{}\n\n'.format(test_correct))
                    # if (step + 1) == flags.max_steps:
                    if step > flags.max_steps - 10 * 50:
                        last_train_correct.append(train_correct)
                        last_test_correct.append(test_correct)
                    if (step + 1) == flags.max_steps:
                        # fe_logits_last_values = sess.run(fe_logits, feed_dict=test_feed_dict)
                        # np.savetxt('./summaries/summaries_graph_1219/' + str(fold_num) + '/logit.txt',
                        #            fe_logits_last_values)
                        # np.savetxt('./summaries/summaries_graph_1219/' + str(fold_num) + '/test_l.txt',
                        #            l_test)
                        print(last_train_correct)
                        print(last_test_correct)
                        print(np.array(last_train_correct).mean())
                        print(np.array(last_test_correct).mean())
            # saver_path = saver.save(sess, "/home/duheran/facial_expresssion/save/dtgn.ckpt")  # 将模型保存到save/model.ckpt文件
            # print("Model saved in file:", saver_path)
            coord.request_stop()
            coord.join(threads)

    return last_train_correct, last_test_correct
Exemple #37
0
    def __init__(self, conf, tasksconf, dataconf, modelconf, evaluatorconf,
                 expdir, init_filename, server, task_index):
        '''
        NnetTrainer constructor, creates the training graph

        Args:
            conf: the trainer config
            taskconf: the config file for each task
            dataconf: the data configuration as a ConfigParser
            modelconf: the neural net model configuration
            evaluatorconf: the evaluator configuration for evaluating
                if None no evaluation will be done
            expdir: directory where the summaries will be written
            init_filename: filename of the network that should be used to
            initialize the model. Put to None if no network is available/wanted.
            server: optional server to be used for distributed training
            task_index: optional index of the worker task in the cluster
        '''

        self.expdir = expdir
        self.server = server
        self.conf = conf
        self.tasksconf = tasksconf
        self.task_index = task_index
        self.init_filename = init_filename

        self.batch_size = int(conf['batch_size'])

        cluster = tf.train.ClusterSpec(server.server_def.cluster)

        #create the graph
        self.graph = tf.Graph()

        if 'local' in cluster.as_dict():
            num_replicas = 1
            device = tf.DeviceSpec(job='local')
        else:
            #distributed training
            num_replicas = len(cluster.as_dict()['worker'])
            num_servers = len(cluster.as_dict()['ps'])
            ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy(
                num_tasks=num_servers,
                load_fn=tf.contrib.training.byte_size_load_fn)
            device = tf.train.replica_device_setter(ps_tasks=num_servers,
                                                    ps_strategy=ps_strategy)
            chief_ps = tf.DeviceSpec(job='ps', task=0)

        self.is_chief = task_index == 0

        #create the model
        modelfile = os.path.join(expdir, 'model', 'model.pkl')
        with open(modelfile, 'wb') as fid:
            self.model = model_factory.factory(
                modelconf.get('model', 'architecture'))(conf=modelconf)
            pickle.dump(self.model, fid)

        evaltype = evaluatorconf.get('evaluator', 'evaluator')

        #define the placeholders in the graph
        with self.graph.as_default():

            #create a local num_steps variable
            self.num_steps = tf.get_variable(
                name='num_steps',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            #a variable to hold the amount of steps already taken
            self.global_step = tf.get_variable(
                name='global_step',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            should_terminate = tf.get_variable(
                name='should_terminate',
                shape=[],
                dtype=tf.bool,
                initializer=tf.constant_initializer(False),
                trainable=False)

            self.terminate = should_terminate.assign(True).op

            #create a check if training should continue
            self.should_stop = tf.logical_or(
                tf.greater_equal(self.global_step, self.num_steps),
                should_terminate)

            with tf.variable_scope('train') as train_scope:

                tasks_losses = []

            if evaltype != 'None':

                with tf.variable_scope('validate') as val_scope:

                    tasks_val_losses = []

        #3 model types for multi task: single one to one; single one to many; multiple one to one
        #single one to one: the whole model is shared for all tasks, only loss function can be different
        #single one to many: each task has a separate output so only part of the network is shared, eg evrything but the output layer
        #multiple one to one: each task has its own network. Possibly the outputs are combined in a loss function

        for task in self.conf['tasks'].split(' '):
            taskconf = self.tasksconf[task]

            #get the database configurations
            input_names = modelconf.get('io', 'inputs').split(' ')
            if input_names == ['']:
                input_names = []
            input_sections = [taskconf[i].split(' ') for i in input_names]
            input_dataconfs = []
            for sectionset in input_sections:
                input_dataconfs.append([])
                for section in sectionset:
                    input_dataconfs[-1].append(dict(dataconf.items(section)))

            output_names = taskconf['targets'].split(' ')
            if output_names == ['']:
                output_names = []
            target_sections = [taskconf[o].split(' ') for o in output_names]
            target_dataconfs = []
            for sectionset in target_sections:
                target_dataconfs.append([])
                for section in sectionset:
                    target_dataconfs[-1].append(dict(dataconf.items(section)))

            #create the loss computer
            loss_computer = loss_computer_factory.factory(
                taskconf['loss_type'])(self.batch_size)

            #create the evaluator
            if evaltype != 'None':
                evaluator = evaluator_factory.factory(evaltype)(
                    conf=evaluatorconf,
                    dataconf=dataconf,
                    model=self.model,
                    task=task)

            with self.graph.as_default():

                #check if running in distributed model
                if 'local' in cluster.as_dict():

                    #get the filenames
                    data_queue_elements, _ = input_pipeline.get_filenames(
                        input_dataconfs + target_dataconfs)

                    #create the data queue and queue runners (inputs get shuffled! I already did this so set to False)
                    data_queue = tf.train.string_input_producer(
                        string_tensor=data_queue_elements,
                        shuffle=False,
                        seed=None,
                        capacity=self.batch_size * 2,
                        shared_name='data_queue')

                    #compute the number of steps
                    if int(conf['numbatches_to_aggregate']) == 0:
                        num_steps = (int(conf['num_epochs']) *
                                     len(data_queue_elements) /
                                     self.batch_size)
                    else:
                        num_steps = (int(conf['num_epochs']) *
                                     len(data_queue_elements) /
                                     (self.batch_size *
                                      int(conf['numbatches_to_aggregate'])))
                    #set the number of steps
                    self.set_num_steps = self.num_steps.assign(num_steps).op
                    self.done = tf.no_op()

                else:
                    with tf.device(chief_ps):

                        #get the data queue
                        data_queue = tf.FIFOQueue(capacity=self.batch_size *
                                                  (num_replicas + 1),
                                                  shared_name='data_queue',
                                                  name='data_queue',
                                                  dtypes=[tf.string],
                                                  shapes=[[]])

                        #get the number of steps from the parameter server
                        num_steps_queue = tf.FIFOQueue(
                            capacity=num_replicas,
                            dtypes=[tf.int32],
                            shared_name='num_steps_queue',
                            name='num_steps_queue',
                            shapes=[[]])

                        #set the number of steps
                        self.set_num_steps = self.num_steps.assign(
                            num_steps_queue.dequeue()).op

                    #get the done queues
                    done_ops = []
                    for i in range(num_servers):
                        with tf.device('job:ps/task:%d' % i):
                            done_queue = tf.FIFOQueue(
                                capacity=num_replicas,
                                dtypes=[tf.bool],
                                shapes=[[]],
                                shared_name='done_queue%d' % i,
                                name='done_queue%d' % i)

                            done_ops.append(done_queue.enqueue(True))

                    self.done = tf.group(*done_ops)

                #training part
                with tf.variable_scope(train_scope):

                    with tf.variable_scope(task):

                        #create the input pipeline
                        data, seq_length = input_pipeline.input_pipeline(
                            data_queue=data_queue,
                            batch_size=self.batch_size,
                            numbuckets=int(conf['numbuckets']),
                            dataconfs=input_dataconfs + target_dataconfs)

                        inputs = {
                            input_names[i]: d
                            for i, d in enumerate(data[:len(input_sections)])
                        }
                        seq_length = {
                            input_names[i]: d
                            for i, d in enumerate(
                                seq_length[:len(input_sections)])
                        }
                        targets = {
                            output_names[i]: d
                            for i, d in enumerate(data[len(input_sections):])
                        }
                        #target_seq_length = {
                        #output_names[i]: d
                        #for i, d in enumerate(seq_length[len(input_sections):])}

                        #compute the training outputs of the model
                        logits = self.model(inputs=inputs,
                                            input_seq_length=seq_length,
                                            is_training=True)

                        #compute the loss
                        task_loss = loss_computer(targets, logits, seq_length)

                    tasks_losses.append(task_loss)

                #validation part
                if evaltype != 'None':

                    with tf.variable_scope(val_scope):

                        with tf.variable_scope(task):

                            task_val_batch_loss, self.valbatches, _, _ = evaluator.evaluate(
                            )

                        tasks_val_losses.append(task_val_batch_loss)

        with self.graph.as_default():

            with tf.variable_scope(train_scope):

                #a variable to scale the learning rate (used to reduce the
                #learning rate in case validation performance drops)
                learning_rate_fact = tf.get_variable(
                    name='learning_rate_fact',
                    shape=[],
                    initializer=tf.constant_initializer(1.0),
                    trainable=False)

                #compute the learning rate with exponential decay and scale
                #with the learning rate factor
                self.learning_rate = (tf.train.exponential_decay(
                    learning_rate=float(conf['initial_learning_rate']),
                    global_step=self.global_step,
                    decay_steps=self.num_steps,
                    decay_rate=float(conf['learning_rate_decay'])) *
                                      learning_rate_fact)

                #create the optimizer
                optimizer = tf.train.AdamOptimizer(self.learning_rate)

                #TODO: The proper way to exploit data paralellism is via the
                #SyncReplicasOptimizer defined below. However for some reason it hangs
                #and I have not yet found a solution for it. For the moment the gradients
                #are accumulated in a way that does not allow data paralellism and there
                # is no advantage on having multiple workers. (We also accumulate the loss)

                #create an optimizer that aggregates gradients
                #if int(conf['numbatches_to_aggregate']) > 0:
                #optimizer = tf.train.SyncReplicasOptimizer(
                #opt=optimizer,
                #replicas_to_aggregate=int(
                #conf['numbatches_to_aggregate'])#,
                ##total_num_replicas=num_replicas
                #)

                loss = tf.reduce_mean(tasks_losses)

                self.total_loss = tf.get_variable(
                    name='total_loss',
                    shape=[],
                    dtype=tf.float32,
                    initializer=tf.constant_initializer(0),
                    trainable=False)

                self.reset_loss = self.total_loss.assign(0.0)

                self.acc_loss = self.total_loss.assign_add(loss)

                ##compute the gradients
                #grads_and_vars = optimizer.compute_gradients(self.loss)

                #with tf.variable_scope('clip'):
                #clip_value = float(conf['clip_grad_value'])
                ##clip the gradients
                #grads_and_vars = [(tf.clip_by_value(grad, -clip_value, clip_value), var)
                #for grad, var in grads_and_vars]

                self.params = tf.trainable_variables()

                grads = [
                    tf.get_variable(param.op.name,
                                    param.get_shape().as_list(),
                                    initializer=tf.constant_initializer(0),
                                    trainable=False) for param in self.params
                ]

                self.reset_grad = tf.variables_initializer(grads)

                #compute the gradients
                minibatch_grads_and_vars = optimizer.compute_gradients(loss)

                with tf.variable_scope('clip'):
                    clip_value = float(conf['clip_grad_value'])
                    #clip the gradients
                    minibatch_grads_and_vars = [
                        (tf.clip_by_value(grad, -clip_value, clip_value), var)
                        for grad, var in minibatch_grads_and_vars
                    ]

                (minibatchgrads,
                 minibatchvars) = zip(*minibatch_grads_and_vars)

                #update gradients by accumulating them
                self.update_gradients = [
                    grad.assign_add(batchgrad)
                    for batchgrad, grad in zip(minibatchgrads, grads)
                ]

                #opperation to apply the gradients
                grads_and_vars = list(zip(grads, minibatchvars))
                apply_gradients_op = optimizer.apply_gradients(
                    grads_and_vars=grads_and_vars,
                    global_step=self.global_step,
                    name='apply_gradients')

                #all remaining operations with the UPDATE_OPS GraphKeys
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

                #create an operation to update the gradients, the batch_loss
                #and do all other update ops
                self.update_op = tf.group(*([apply_gradients_op] + update_ops),
                                          name='update')

        with self.graph.as_default():

            if evaltype != 'None':
                #validation part
                with tf.variable_scope(val_scope):

                    #create a variable to hold the validation loss
                    self.validation_loss = tf.get_variable(
                        name='validation_loss',
                        shape=[],
                        dtype=tf.float32,
                        initializer=tf.constant_initializer(0),
                        trainable=False)

                    #create a variable to save the last step where the model
                    #was validated
                    validated_step = tf.get_variable(
                        name='validated_step',
                        shape=[],
                        dtype=tf.int32,
                        initializer=tf.constant_initializer(
                            -int(conf['valid_frequency'])),
                        trainable=False)

                    #a check if validation is due
                    self.should_validate = tf.greater_equal(
                        self.global_step - validated_step,
                        int(conf['valid_frequency']))

                    #compute the loss
                    val_batch_loss = tf.reduce_mean(tasks_val_losses)

                    self.update_loss = self.validation_loss.assign(
                        self.validation_loss +
                        val_batch_loss  #/self.valbatches
                    ).op

                    #update the learning rate factor
                    self.half_lr = learning_rate_fact.assign(
                        learning_rate_fact / 2).op

                    #create an operation to updated the validated step
                    self.update_validated_step = validated_step.assign(
                        self.global_step).op

                    #variable to hold the best validation loss so far
                    self.best_validation = tf.get_variable(
                        name='best_validation',
                        shape=[],
                        dtype=tf.float32,
                        initializer=tf.constant_initializer(1.79e+308),
                        trainable=False)

                    #op to update the best velidation loss
                    self.update_best = self.best_validation.assign(
                        self.validation_loss).op

                    #a variable that holds the amount of workers at the
                    #validation point
                    waiting_workers = tf.get_variable(
                        name='waiting_workers',
                        shape=[],
                        dtype=tf.int32,
                        initializer=tf.constant_initializer(0),
                        trainable=False)

                    #an operation to signal a waiting worker
                    self.waiting = waiting_workers.assign_add(1).op

                    #an operation to set the waiting workers to zero
                    self.reset_waiting = waiting_workers.initializer

                    #an operation to check if all workers are waiting
                    self.all_waiting = tf.equal(waiting_workers,
                                                num_replicas - 1)

                    tf.summary.scalar('validation loss', self.validation_loss)
            else:
                self.update_loss = None

            tf.summary.scalar('learning rate', self.learning_rate)

            #create a histogram for all trainable parameters
            for param in tf.trainable_variables():
                tf.summary.histogram(param.name, param)

            #create the scaffold
            self.scaffold = tf.train.Scaffold()
Exemple #38
0
    def setup_model(self):
        with SetVerbosity(self.verbose):

            assert issubclass(self.policy, ActorCriticPolicy), "Error: the input policy for the ACER model must be " \
                                                               "an instance of common.policies.ActorCriticPolicy."

            if isinstance(self.action_space, Discrete):
                self.n_act = self.action_space.n
                continuous = False
            elif isinstance(self.action_space, Box):
                # self.n_act = self.action_space.shape[-1]
                # continuous = True
                raise NotImplementedError("WIP: Acer does not support Continuous actions yet.")
            else:
                raise ValueError("Error: ACER does not work with {} actions space.".format(self.action_space))

            self.n_batch = self.n_envs * self.n_steps

            self.graph = tf.Graph()
            with self.graph.as_default():
                self.sess = tf_util.make_session(num_cpu=self.num_procs, graph=self.graph)

                n_batch_step = None
                if issubclass(self.policy, LstmPolicy):
                    n_batch_step = self.n_envs
                n_batch_train = self.n_envs * (self.n_steps + 1)

                step_model = self.policy(self.sess, self.observation_space, self.action_space, self.n_envs, 1,
                                         n_batch_step, reuse=False, **self.policy_kwargs)

                self.params = find_trainable_variables("model")

                with tf.variable_scope("train_model", reuse=True,
                                       custom_getter=tf_util.outer_scope_getter("train_model")):
                    train_model = self.policy(self.sess, self.observation_space, self.action_space, self.n_envs,
                                              self.n_steps + 1, n_batch_train, reuse=True, **self.policy_kwargs)

                with tf.variable_scope("moving_average"):
                    # create averaged model
                    ema = tf.train.ExponentialMovingAverage(self.alpha)
                    ema_apply_op = ema.apply(self.params)

                    def custom_getter(getter, name, *args, **kwargs):
                        name = name.replace("polyak_model/", "")
                        val = ema.average(getter(name, *args, **kwargs))
                        return val

                with tf.variable_scope("polyak_model", reuse=True, custom_getter=custom_getter):
                    self.polyak_model = polyak_model = self.policy(self.sess, self.observation_space, self.action_space,
                                                                   self.n_envs, self.n_steps + 1,
                                                                   self.n_envs * (self.n_steps + 1), reuse=True,
                                                                   **self.policy_kwargs)

                with tf.variable_scope("loss", reuse=False):
                    self.done_ph = tf.placeholder(tf.float32, [self.n_batch])  # dones
                    self.reward_ph = tf.placeholder(tf.float32, [self.n_batch])  # rewards, not returns
                    self.mu_ph = tf.placeholder(tf.float32, [self.n_batch, self.n_act])  # mu's
                    self.action_ph = train_model.pdtype.sample_placeholder([self.n_batch])
                    self.learning_rate_ph = tf.placeholder(tf.float32, [])
                    eps = 1e-6

                    # Notation: (var) = batch variable, (var)s = sequence variable,
                    # (var)_i = variable index by action at step i
                    # shape is [n_envs * (n_steps + 1)]
                    if continuous:
                        value = train_model.value_fn[:, 0]
                    else:
                        value = tf.reduce_sum(train_model.policy_proba * train_model.q_value, axis=-1)

                    rho, rho_i_ = None, None
                    if continuous:
                        action_ = strip(train_model.proba_distribution.sample(), self.n_envs, self.n_steps)
                        distribution_f = tf.contrib.distributions.MultivariateNormalDiag(
                            loc=strip(train_model.proba_distribution.mean, self.n_envs, self.n_steps),
                            scale_diag=strip(train_model.proba_distribution.logstd, self.n_envs, self.n_steps))
                        f_polyak = tf.contrib.distributions.MultivariateNormalDiag(
                            loc=strip(polyak_model.proba_distribution.mean, self.n_envs, self.n_steps),
                            scale_diag=strip(polyak_model.proba_distribution.logstd, self.n_envs, self.n_steps))

                        f_i = distribution_f.prob(self.action_ph)
                        f_i_ = distribution_f.prob(action_)
                        f_polyak_i = f_polyak.prob(self.action_ph)
                        phi_i = strip(train_model.proba_distribution.mean, self.n_envs, self.n_steps)

                        q_value = strip(train_model.value_fn, self.n_envs, self.n_steps)
                        q_i = q_value[:, 0]

                        rho_i = tf.reshape(f_i, [-1, 1]) / (self.mu_ph + eps)
                        rho_i_ = tf.reshape(f_i_, [-1, 1]) / (self.mu_ph + eps)

                        qret = q_retrace(self.reward_ph, self.done_ph, q_i, value, tf.pow(rho_i, 1/self.n_act),
                                         self.n_envs, self.n_steps, self.gamma)
                    else:
                        # strip off last step
                        # f is a distribution, chosen to be Gaussian distributions
                        # with fixed diagonal covariance and mean \phi(x)
                        # in the paper
                        distribution_f, f_polyak, q_value = \
                            map(lambda variables: strip(variables, self.n_envs, self.n_steps),
                                [train_model.policy_proba, polyak_model.policy_proba, train_model.q_value])

                        # Get pi and q values for actions taken
                        f_i = get_by_index(distribution_f, self.action_ph)
                        f_i_ = distribution_f
                        phi_i = distribution_f
                        f_polyak_i = f_polyak

                        q_i = get_by_index(q_value, self.action_ph)

                        # Compute ratios for importance truncation
                        rho = distribution_f / (self.mu_ph + eps)
                        rho_i = get_by_index(rho, self.action_ph)

                        # Calculate Q_retrace targets
                        qret = q_retrace(self.reward_ph, self.done_ph, q_i, value, rho_i, self.n_envs, self.n_steps,
                                         self.gamma)

                    # Calculate losses
                    # Entropy
                    entropy = tf.reduce_sum(train_model.proba_distribution.entropy())

                    # Policy Gradient loss, with truncated importance sampling & bias correction
                    value = strip(value, self.n_envs, self.n_steps, True)
                    # check_shape([qret, value, rho_i, f_i], [[self.n_envs * self.n_steps]] * 4)
                    # check_shape([rho, distribution_f, q_value], [[self.n_envs * self.n_steps, self.n_act]] * 2)

                    # Truncated importance sampling
                    adv = qret - value
                    log_f = tf.log(f_i + eps)
                    # [n_envs * n_steps]
                    gain_f = log_f * tf.stop_gradient(adv * tf.minimum(self.correction_term, rho_i))
                    loss_f = -tf.reduce_mean(gain_f)

                    # Bias correction for the truncation
                    adv_bc = (q_value - tf.reshape(value, [self.n_envs * self.n_steps, 1]))  # [n_envs * n_steps, n_act]

                    # check_shape([adv_bc, log_f_bc], [[self.n_envs * self.n_steps, self.n_act]] * 2)
                    if continuous:
                        gain_bc = tf.stop_gradient(adv_bc *
                                                   tf.nn.relu(1.0 - (self.correction_term / (rho_i_ + eps))) *
                                                   f_i_)
                    else:
                        log_f_bc = tf.log(f_i_ + eps)  # / (f_old + eps)
                        gain_bc = tf.reduce_sum(log_f_bc *
                                                tf.stop_gradient(
                                                    adv_bc *
                                                    tf.nn.relu(1.0 - (self.correction_term / (rho + eps))) *
                                                    f_i_),
                                                axis=1)
                    # IMP: This is sum, as expectation wrt f
                    loss_bc = -tf.reduce_mean(gain_bc)

                    loss_policy = loss_f + loss_bc

                    # Value/Q function loss, and explained variance
                    check_shape([qret, q_i], [[self.n_envs * self.n_steps]] * 2)
                    explained_variance = q_explained_variance(tf.reshape(q_i, [self.n_envs, self.n_steps]),
                                                              tf.reshape(qret, [self.n_envs, self.n_steps]))
                    loss_q = tf.reduce_mean(tf.square(tf.stop_gradient(qret) - q_i) * 0.5)

                    # Net loss
                    check_shape([loss_policy, loss_q, entropy], [[]] * 3)
                    loss = loss_policy + self.q_coef * loss_q - self.ent_coef * entropy

                    tf.summary.scalar('entropy_loss', entropy)
                    tf.summary.scalar('policy_gradient_loss', loss_policy)
                    tf.summary.scalar('value_function_loss', loss_q)
                    tf.summary.scalar('loss', loss)

                    norm_grads_q, norm_grads_policy, avg_norm_grads_f = None, None, None
                    avg_norm_k, avg_norm_g, avg_norm_k_dot_g, avg_norm_adj = None, None, None, None
                    if self.trust_region:
                        # [n_envs * n_steps, n_act]
                        grad = tf.gradients(- (loss_policy - self.ent_coef * entropy) * self.n_steps * self.n_envs,
                                            phi_i)
                        # [n_envs * n_steps, n_act] # Directly computed gradient of KL divergence wrt f
                        kl_grad = - f_polyak_i / (f_i_ + eps)
                        k_dot_g = tf.reduce_sum(kl_grad * grad, axis=-1)
                        adj = tf.maximum(0.0, (tf.reduce_sum(kl_grad * grad, axis=-1) - self.delta) / (
                                tf.reduce_sum(tf.square(kl_grad), axis=-1) + eps))  # [n_envs * n_steps]

                        # Calculate stats (before doing adjustment) for logging.
                        avg_norm_k = avg_norm(kl_grad)
                        avg_norm_g = avg_norm(grad)
                        avg_norm_k_dot_g = tf.reduce_mean(tf.abs(k_dot_g))
                        avg_norm_adj = tf.reduce_mean(tf.abs(adj))

                        grad = grad - tf.reshape(adj, [self.n_envs * self.n_steps, 1]) * kl_grad
                        # These are turst region adjusted gradients wrt f ie statistics of policy pi
                        grads_f = -grad / (self.n_envs * self.n_steps)
                        grads_policy = tf.gradients(f_i_, self.params, grads_f)
                        grads_q = tf.gradients(loss_q * self.q_coef, self.params)
                        grads = [gradient_add(g1, g2, param, verbose=self.verbose)
                                 for (g1, g2, param) in zip(grads_policy, grads_q, self.params)]

                        avg_norm_grads_f = avg_norm(grads_f) * (self.n_steps * self.n_envs)
                        norm_grads_q = tf.global_norm(grads_q)
                        norm_grads_policy = tf.global_norm(grads_policy)
                    else:
                        grads = tf.gradients(loss, self.params)

                    norm_grads = None
                    if self.max_grad_norm is not None:
                        grads, norm_grads = tf.clip_by_global_norm(grads, self.max_grad_norm)
                    grads = list(zip(grads, self.params))

                with tf.variable_scope("input_info", reuse=False):
                    tf.summary.scalar('rewards', tf.reduce_mean(self.reward_ph))
                    tf.summary.scalar('learning_rate', tf.reduce_mean(self.learning_rate))
                    tf.summary.scalar('advantage', tf.reduce_mean(adv))
                    tf.summary.scalar('action_probabilty', tf.reduce_mean(self.mu_ph))

                    if self.full_tensorboard_log:
                        tf.summary.histogram('rewards', self.reward_ph)
                        tf.summary.histogram('learning_rate', self.learning_rate)
                        tf.summary.histogram('advantage', adv)
                        tf.summary.histogram('action_probabilty', self.mu_ph)
                        if tf_util.is_image(self.observation_space):
                            tf.summary.image('observation', train_model.obs_ph)
                        else:
                            tf.summary.histogram('observation', train_model.obs_ph)

                trainer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_ph, decay=self.rprop_alpha,
                                                    epsilon=self.rprop_epsilon)
                _opt_op = trainer.apply_gradients(grads)

                # so when you call _train, you first do the gradient step, then you apply ema
                with tf.control_dependencies([_opt_op]):
                    _train = tf.group(ema_apply_op)

                # Ops/Summaries to run, and their names for logging
                assert norm_grads is not None
                run_ops = [_train, loss, loss_q, entropy, loss_policy, loss_f, loss_bc, explained_variance, norm_grads]
                names_ops = ['loss', 'loss_q', 'entropy', 'loss_policy', 'loss_f', 'loss_bc', 'explained_variance',
                             'norm_grads']
                if self.trust_region:
                    self.run_ops = run_ops + [norm_grads_q, norm_grads_policy, avg_norm_grads_f, avg_norm_k, avg_norm_g,
                                              avg_norm_k_dot_g, avg_norm_adj]
                    self.names_ops = names_ops + ['norm_grads_q', 'norm_grads_policy', 'avg_norm_grads_f', 'avg_norm_k',
                                                  'avg_norm_g', 'avg_norm_k_dot_g', 'avg_norm_adj']

                self.train_model = train_model
                self.step_model = step_model
                self.step = step_model.step
                self.proba_step = step_model.proba_step
                self.initial_state = step_model.initial_state

                tf.global_variables_initializer().run(session=self.sess)

                self.summary = tf.summary.merge_all()
Exemple #39
0
    def __init__(self, env):

        self.env = env
        tf.reset_default_graph()
        self.sess = tf.Session()

        # A few starter hyperparameters
        # hyperparameters
        self.gamma = 0.99
        self.h1 = 64
        self.h2 = 64
        self.h3 = 64
        self.l2_reg = 1e-6
        self.max_episode_step = 1000
        self.update_slow_target_every = 100
        self.batch_size = 1024
        self.eps_start = 1.0
        self.epsilon_end = 0.05
        self.epsilon_decay_length = 1e5
        self.epsilon_decay_exp = 0.97
        self.num_episodes = 0
        self.num_steps = 0
        self.epsilon_linear_step = (
            self.eps_start - self.epsilon_end) / self.epsilon_decay_length
        # memory
        self.replay_memory = ReplayMemory(1e6)
        # Perhaps you want to have some samples in the memory before starting to train?
        self.min_replay_size = 2000

        # define yours training operations here...
        self.observation_input = tf.placeholder(
            tf.float32, shape=[None] + list(self.env.observation_space.shape))
        self.target_input = tf.placeholder(
            dtype=tf.float32,
            shape=[None] + list(self.env.observation_space.shape)
        )  # input to slow target network

        with tf.variable_scope('q_network') as scope:
            self.q_values = self.build_model(self.observation_input)

        with tf.variable_scope('target_network') as scope:
            self.target_q_values = self.build_model(self.observation_input,
                                                    False)

        self.q_network_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='q_network')
        self.q_target_network_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='target_network')

        # update values for slowly-changing target network to match current critic network
        update_slow_target_ops = []
        for i, slow_target_var in enumerate(self.q_target_network_vars):
            update_slow_target_op = slow_target_var.assign(
                self.q_network_vars[i])
            update_slow_target_ops.append(update_slow_target_op)

        self.update_slow_target_op = tf.group(*update_slow_target_ops,
                                              name='update_slow_target')

        # define your update operations here...
        self.saver = tf.train.Saver(tf.trainable_variables())
        self.target = tf.placeholder(tf.float32, shape=[None])

        self.actions = tf.placeholder(shape=[None], dtype=tf.int32)
        #Calculating the action q value is taken from https://github.com/dennybritz/reinforcement-learning/tree/master/DQN
        gather_indices = tf.range(self.batch_size) * tf.shape(
            self.q_values)[1] + self.actions
        self.action_predictions = tf.gather(tf.reshape(self.q_values, [-1]),
                                            gather_indices)
        self.loss = tf.losses.huber_loss(
            self.target, self.action_predictions
        )  #tf.squared_difference(self.target, self.action_predictions)

        #Adding a regularization term for the weights
        for var in self.q_network_vars:
            if not 'bias' in var.name:
                self.loss += self.l2_reg * 0.5 * tf.nn.l2_loss(var)
        #self.loss = (self.target-self.action_predictions)**2
        #self.losses = tf.reduce_mean(self.loss)
        self.minimizer = tf.train.AdamOptimizer(learning_rate=1e-6).minimize(
            self.loss
        )  #tf.train.GradientDescentOptimizer(1e-5).minimize(self.losses)
        self.sess.run(tf.global_variables_initializer())
        self.writer = tf.summary.FileWriter(LOGDIR)
        self.writer.add_graph(self.sess.graph)
        self.count = 0

        # Summaries for Tensorboard
        tf.summary.scalar("loss", self.loss)
        #tf.summary.scalar("loss_hist", self.losses),
        tf.summary.histogram("q_values_hist", self.q_values),
        tf.summary.scalar("max_q_value", tf.reduce_max(self.q_values))
        self.summ = tf.summary.merge_all()
    def _build_graph(self):
        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')
        self.input_ids = tf.placeholder(shape=[None, None],
                                        dtype=tf.int32,
                                        name='input_ids')
        self.input_mask = tf.placeholder(shape=[None, None],
                                         dtype=tf.int32,
                                         name="input_mask")
        self.segment_ids = tf.placeholder(shape=[None, None],
                                          dtype=tf.int32,
                                          name="segment_ids")
        self.y = tf.placeholder(tf.int32, [None])
        self.bert_embedding = BertEmbedding(self.bert_dir)
        _, output_layer = self.bert_embedding(input_ids=self.input_ids,
                                              input_mask=self.input_mask,
                                              segment_ids=self.segment_ids,
                                              is_training=self.training,
                                              return_pool_output=True,
                                              use_fp16=self.use_fp16)

        hidden_size = output_layer.shape[-1].value

        # output_weights = tf.get_variable(
        #     "output_weights", [self.num_class, hidden_size],
        #     initializer=tf.truncated_normal_initializer(stddev=0.02))
        #
        # output_bias = tf.get_variable(
        #     "output_bias", [self.num_class], initializer=tf.zeros_initializer())

        dropout = Dropout(0.9)
        output_layer = dropout(output_layer, self.training)

        #add cnn layer
        pooled = []
        for idx, kernel_size in enumerate(self.filter_sizes1):
            con1d = tf.layers.conv1d(output_layer,
                                     self.filter_nums1[idx],
                                     kernel_size,
                                     padding='same',
                                     activation=tf.nn.relu,
                                     name='conv1d-%d' % (idx))
            pooled_conv = tf.reduce_max(con1d, axis=1)
            pooled.append(pooled_conv)
        merge = tf.concat(pooled, axis=1)
        merge = dropout(merge, self.training)
        merge = tf.layers.dense(merge,
                                128,
                                activation=tf.nn.tanh,
                                name='dense1')
        # merge=tf.layers.batch_normalization(inputs=merge)
        merge = dropout(merge, self.training)
        logits = tf.layers.dense(merge,
                                 self.num_class,
                                 activation=None,
                                 use_bias=False)
        # if is_training:
        #     # I.e., 0.1 dropout
        #     output_layer = tf.nn.dropout(output_layer, keep_prob=0.9,)
        # logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        # logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1, name="probs")
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(self.y,
                                    depth=self.num_class,
                                    dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        self.loss = tf.reduce_mean(per_example_loss)
        self.probs = probabilities

        self.input_placeholder_dict = OrderedDict({
            "input_ids": self.input_ids,
            "segment_ids": self.segment_ids,
            "labels": self.y,
            "input_mask": self.input_mask,
            "training": self.training
        })

        self.output_variable_dict = OrderedDict({
            "predict":
            tf.argmax(self.probs, axis=1),
            "probabilities":
            probabilities
        })

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
Exemple #41
0
    def get_tfrecord_path(self, mode):
        return os.path.join(self.data_path, mode + ".tfr")


if __name__ == '__main__':
    pars_wsj = ParseWSJ('../../data/wsj')
    pars_wsj.build_tfrecords("train")
    pars_wsj.build_tfrecords("dev")
    pars_wsj.build_tfrecords("test")

    batch_size = 10
    dataset = tf.data.TFRecordDataset(pars_wsj.get_tfrecord_path(mode="train"))
    dataset = dataset.map(pars_wsj.parse_examples)
    dataset = dataset.padded_batch(batch_size,
                                   padded_shapes=pars_wsj.get_padded_shapes())
    iterator = dataset.make_initializable_iterator()

    example = iterator.get_next()
    inputs, targets, inputs_length, targets_length = example

    global_step = tf.train.get_or_create_global_step()
    scaffold = tf.train.Scaffold(local_init_op=tf.group(
        tf.local_variables_initializer(), iterator.initializer))
    with tf.train.MonitoredTrainingSession(checkpoint_dir='logs/tests',
                                           scaffold=scaffold) as sess:
        inp, targ, tag_len = \
          sess.run([inputs, targets, targets_length])
        print(pars_wsj.decode(inp[0], pars_wsj.input_id2word))
        print(pars_wsj.decode(targ[0], pars_wsj.target_id2word))
        print(tag_len)
def build_train_imitation(make_obs_ph,
                          q_func,
                          num_actions,
                          optimizer,
                          grad_norm_clipping=None,
                          gamma=1.0,
                          double_q=False,
                          scope="deepq",
                          reuse=None,
                          param_noise=False,
                          param_noise_filter_func=None):
    """Creates the train function:

    Parameters
    ----------
    make_obs_ph: str -> tf.placeholder or TfInput
        a function that takes a name and creates a placeholder of input with that name
    q_func: (tf.Variable, int, str, bool) -> tf.Variable
        the model that takes the following inputs:
            observation_in: object
                the output of observation placeholder
            num_actions: int
                number of actions
            scope: str
            reuse: bool
                should be passed to outer variable scope
        and returns a tensor of shape (batch_size, num_actions) with values of every action.
    num_actions: int
        number of actions
    reuse: bool
        whether or not to reuse the graph variables
    optimizer: tf.train.Optimizer
        optimizer to use for the Q-learning objective.
    grad_norm_clipping: float or None
        clip gradient norms to this value. If None no clipping is performed.
    gamma: float
        discount rate.
    double_q: bool
        if true will use Double Q Learning (https://arxiv.org/abs/1509.06461).
        In general it is a good idea to keep it enabled.
    scope: str or VariableScope
        optional scope for variable_scope.
    reuse: bool or None
        whether or not the variables should be reused. To be able to reuse the scope must be given.
    param_noise: bool
        whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905)
    param_noise_filter_func: tf.Variable -> bool
        function that decides whether or not a variable should be perturbed. Only applicable
        if param_noise is True. If set to None, default_param_noise_filter is used by default.

    Returns
    -------
    act: (tf.Variable, bool, float) -> tf.Variable
        function to select and action given observation.
`       See the top of the file for details.
    train: (object, np.array, np.array, object, np.array, np.array) -> np.array```
        optimize the error in Bellman's equation.
`       See the top of the file for details.
    update_target: () -> ()
        copy the parameters from optimized Q function to the target Q function.
`       See the top of the file for details.
    debug: {str: function}
        a bunch of functions to print debug data like q_values.
    """
    if param_noise:
        act_f = build_act_with_param_noise(
            make_obs_ph,
            q_func,
            num_actions,
            scope=scope,
            reuse=reuse,
            param_noise_filter_func=param_noise_filter_func)
    else:
        act_f = build_act_imitation(make_obs_ph,
                                    q_func,
                                    num_actions,
                                    scope=scope,
                                    reuse=reuse)

    with tf.variable_scope(scope, reuse=reuse):
        # set up placeholders
        obs_t_input = make_obs_ph("obs_t")
        act_t_ph = tf.placeholder(tf.int32, [None], name="action")
        rew_t_ph = tf.placeholder(tf.float32, [None], name="reward")
        obs_tp1_input = make_obs_ph("obs_tp1")
        done_mask_ph = tf.placeholder(tf.float32, [None], name="done")
        importance_weights_ph = tf.placeholder(tf.float32, [None],
                                               name="weight")

        # q network evaluation
        q_t = q_func(obs_t_input.get(),
                     num_actions,
                     scope="q_func",
                     reuse=True)  # reuse parameters from act
        q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope=tf.get_variable_scope().name +
                                        "/q_func")

        # target q network evalution
        q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func")
        target_q_func_vars = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES,
            scope=tf.get_variable_scope().name + "/target_q_func")

        # q scores for actions which we know were selected in the given state.
        q_t_selected = tf.reduce_sum(q_t * tf.one_hot(act_t_ph, num_actions),
                                     1)  # Q(s,a;θi)

        # compute estimate of best possible value starting from state at t + 1
        if double_q:
            q_tp1_using_online_net = q_func(obs_tp1_input.get(),
                                            num_actions,
                                            scope="q_func",
                                            reuse=True)
            q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1)
            q_tp1_best = tf.reduce_sum(
                q_tp1 * tf.one_hot(q_tp1_best_using_online_net, num_actions),
                1)
        else:
            q_tp1_best = tf.reduce_max(q_tp1, 1)
        q_tp1_best_masked = (1.0 -
                             done_mask_ph) * q_tp1_best  # maxQ(s',a';θi-)

        # compute RHS of bellman equation
        q_t_selected_target = rew_t_ph + gamma * q_tp1_best_masked

        # compute the error (potentially clipped)
        td_error = q_t_selected - tf.stop_gradient(q_t_selected_target)
        errors = U.huber_loss(td_error)
        weighted_error = tf.reduce_mean(importance_weights_ph * errors)

        # compute optimization op (potentially with gradient clipping)
        if grad_norm_clipping is not None:
            gradients = optimizer.compute_gradients(weighted_error,
                                                    var_list=q_func_vars)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad,
                                                    grad_norm_clipping), var)
            optimize_expr = optimizer.apply_gradients(gradients)
        else:
            optimize_expr = optimizer.minimize(weighted_error,
                                               var_list=q_func_vars)

# -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-! OBSERVER !-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-

# TED's set up placeholders
        ment_obs_t_input = make_obs_ph("ment_obs_t")
        ment_act_t_ph = tf.placeholder(tf.int32, [None], name="ment_action")
        ment_obs_tp1_input = make_obs_ph("ment_obs_tp1")
        old_error_ph = tf.placeholder(tf.float32,
                                      shape=[None],
                                      name="old_error")
        old_imp_weights_ph = tf.placeholder(tf.float32, [None],
                                            name="old_imp_weights")

        # TED's q network evaluation
        aug_q_t = q_func(obs_t_input.get(),
                         num_actions,
                         scope="q_func",
                         reuse=True)  # reuse parameters from act
        aug_q_func_vars = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES,
            scope=tf.get_variable_scope().name + "/q_func")

        # TED's target q network evalution
        aug_q_tp1 = q_func(obs_tp1_input.get(),
                           num_actions,
                           scope="target_q_func",
                           reuse=True)
        aug_target_q_func_vars = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES,
            scope=tf.get_variable_scope().name + "/target_q_func")

        # TED's q scores for actions which we know were selected in the given state.
        aug_q_t_selected = tf.reduce_sum(
            aug_q_t * tf.one_hot(act_t_ph, num_actions), 1)  # Q(s,a;θi)

        aug_q_tp1_selected = tf.reduce_sum(
            q_tp1 * tf.one_hot(ment_act_t_ph, num_actions), 1)  # Q(s',am;θi)
        aug_q_tp1_selected_masked = (1.0 - done_mask_ph) * aug_q_tp1_selected

        # TED's compute estimate of best possible value starting from state at t + 1
        if double_q:
            aug_q_tp1_using_online_net = q_func(obs_tp1_input.get(),
                                                num_actions,
                                                scope="q_func",
                                                reuse=True)
            aug_q_tp1_best_using_online_net = tf.argmax(
                aug_q_tp1_using_online_net, 1)
            aug_q_tp1_best = tf.reduce_sum(
                aug_q_tp1 *
                tf.one_hot(aug_q_tp1_best_using_online_net, num_actions), 1)
        else:
            aug_q_tp1_best = tf.reduce_max(aug_q_tp1, 1)
        aug_q_tp1_best_masked = (
            1.0 - done_mask_ph) * aug_q_tp1_best  # maxQ(s',a';θi-)

        # TED's compute RHS of bellman equation
        aug_q_t_selected_target = rew_t_ph + gamma * tf.maximum(
            aug_q_tp1_best_masked, aug_q_tp1_selected_masked)
        # aug_q_t_selected_target = rew_t_ph + gamma * aug_q_tp1_best_masked

        # TED's compute the error (potentially clipped)
        aug_td_error = aug_q_t_selected - tf.stop_gradient(
            aug_q_t_selected_target)
        aug_errors = U.huber_loss(aug_td_error)
        aug_weighted_error = tf.reduce_mean(importance_weights_ph * aug_errors)
        # aug_weighted_error = tf.Print(aug_weighted_error, [tf.shape(importance_weights_ph)], "AGENT WEIGHTED ERROR: ")

        # TED's compute optimization op (potentially with gradient clipping)
        if grad_norm_clipping is not None:
            gradients = optimizer.compute_gradients(aug_weighted_error,
                                                    var_list=aug_q_func_vars)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad,
                                                    grad_norm_clipping), var)
            aug_optimize_expr = optimizer.apply_gradients(gradients)
        else:
            aug_optimize_expr = optimizer.minimize(aug_weighted_error,
                                                   var_list=aug_q_func_vars)

# -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-! OBSERVER !-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-

# -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!- MENTOR -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-

# TED's mentor's q network evaluation
        ment_q_t = q_func(ment_obs_t_input.get(),
                          num_actions,
                          scope="q_func",
                          reuse=True)  # reuse parameters from act
        ment_q_func_vars = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES,
            scope=tf.get_variable_scope().name + "/q_func")

        # TED's mentor's target q network evalution
        ment_q_tp1 = q_func(ment_obs_tp1_input.get(),
                            num_actions,
                            scope="target_q_func",
                            reuse=True)
        ment_target_q_func_vars = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES,
            scope=tf.get_variable_scope().name + "/target_q_func")

        # TED's mentor's q scores for action am which we know was selected in the given state.
        ment_q_t_selected = tf.reduce_sum(
            ment_q_t * tf.one_hot(ment_act_t_ph, num_actions),
            1)  # Q(sm,am;θi)

        ment_q_tp1_selected = tf.reduce_sum(
            ment_q_tp1 * tf.one_hot(ment_act_t_ph, num_actions),
            1)  # Q(sm',am;θi-)
        ment_q_tp1_selected_masked = (1.0 - done_mask_ph) * ment_q_tp1_selected

        # TED's compute estimate of best possible value starting from state at t + 1
        if double_q:
            ment_q_tp1_using_online_net = q_func(ment_obs_tp1_input.get(),
                                                 num_actions,
                                                 scope="q_func",
                                                 reuse=True)
            ment_q_tp1_best_using_online_net = tf.argmax(
                ment_q_tp1_using_online_net, 1)
            ment_q_tp1_best = tf.reduce_sum(
                ment_q_tp1 *
                tf.one_hot(ment_q_tp1_best_using_online_net, num_actions), 1)
        else:
            ment_q_tp1_best = tf.reduce_max(ment_q_tp1, 1)
        ment_q_tp1_best_masked = (
            1.0 - done_mask_ph) * ment_q_tp1_best  # maxQ(sm',a';θi-)

        # TED's compute RHS of bellman equation
        ment_q_t_selected_target = rew_t_ph + gamma * tf.maximum(
            ment_q_tp1_best_masked, ment_q_tp1_selected_masked)

        # TED's compute the error (potentially clipped)
        ment_td_error = ment_q_t_selected - tf.stop_gradient(
            ment_q_t_selected_target)
        ment_errors = U.huber_loss(ment_td_error)
        ment_weighted_error = tf.reduce_mean(importance_weights_ph *
                                             ment_errors)
        # ment_weighted_error = tf.Print(ment_weighted_error, [tf.shape(importance_weights_ph)], "MENTOR WEIGHTED ERROR: ")

        # TED's compute optimization op (potentially with gradient clipping)
        if grad_norm_clipping is not None:
            gradients = optimizer.compute_gradients(ment_weighted_error,
                                                    var_list=ment_q_func_vars)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad,
                                                    grad_norm_clipping), var)
            ment_optimize_expr = optimizer.apply_gradients(gradients)
        else:
            ment_optimize_expr = optimizer.minimize(ment_weighted_error,
                                                    var_list=ment_q_func_vars)


# -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!- MENTOR -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-

        def temp_func1():
            return aug_td_error, aug_optimize_expr
            # return td_error, optimize_expr
        def temp_func2():
            return ment_td_error, ment_optimize_expr

        old_errors = U.huber_loss(old_error_ph)
        old_weighted_error = tf.reduce_mean(old_imp_weights_ph * old_errors)

        final_td_error, final_optimize_expr = tf.cond(
            tf.greater((ment_weighted_error - old_weighted_error)**2,
                       (aug_weighted_error - old_weighted_error)**2),
            temp_func1, temp_func2)

        # update_target_fn will be called periodically to copy Q network to target Q network
        update_target_expr = []
        for var, var_target in zip(
                sorted(q_func_vars, key=lambda v: v.name),
                sorted(target_q_func_vars, key=lambda v: v.name)):
            update_target_expr.append(var_target.assign(var))
        update_target_expr = tf.group(*update_target_expr)

        # Create callable functions
        train = U.function(inputs=[
            obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph,
            importance_weights_ph
        ],
                           outputs=td_error,
                           updates=[optimize_expr])
        update_target = U.function([], [], updates=[update_target_expr])

        q_values = U.function([obs_t_input], q_t)

        # TED's create callable functions
        trainAugmented = U.function(inputs=[
            obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph,
            importance_weights_ph, ment_obs_t_input, ment_obs_tp1_input,
            ment_act_t_ph, old_error_ph, old_imp_weights_ph
        ],
                                    outputs=final_td_error,
                                    updates=[final_optimize_expr])

        return act_f, train, trainAugmented, update_target, {
            'q_values': q_values
        }