def assign_w2v_pretrained_vectors(session, word2vec_model, embedding_key, vocab_path, vocab_size, id_to_check):
    embedding_variable = [v for v in tf.trainable_variables() if embedding_key in v.name]
    if len(embedding_variable) != 1:
        print("Word vector variable not found or too many. key: " + embedding_key)
        print("Existing embedding trainable variables:")
        print([v.name for v in tf.trainable_variables() if "embedding" in v.name])
        sys.exit(1)

    embedding_variable = embedding_variable[0]
    vectors = embedding_variable.eval()

    with gfile.GFile(vocab_path, mode="r") as vocab_file:
        counter = 0
        while counter < vocab_size:
            vocab_w = vocab_file.readline().replace("\n", "")
            # for each word in vocabulary check if w2v vector exist and inject.
            # otherwise dont change value initialise randomly.
            if vocab_w and word2vec_model.__contains__(vocab_w):
                w2w_word_vector = word2vec_model.get_vector(vocab_w)
                vectors[counter] = w2w_word_vector
            if counter == id_to_check:
                print(vectors[counter])
            counter += 1
    print("Reinitialising embeddings with pretrained")
    session.run(tf.assign(embedding_variable, vectors))
Esempio n. 2
0
File: rnn.py Progetto: wpm/tfrnnlm
    def __init__(self, max_gradient, batch_size, time_steps, vocabulary_size, hidden_units, layers):
        self.max_gradient = max_gradient
        self.layers = layers
        # Add vocabulary slots of out of vocabulary (index 1) and padding (index 0).
        vocabulary_size += 2

        with tf.name_scope("Parameters"):
            self.learning_rate = tf.placeholder(tf.float32, name="learning_rate")
            self.keep_probability = tf.placeholder(tf.float32, name="keep_probability")

        with tf.name_scope("Input"):
            self.input = tf.placeholder(tf.int64, shape=(batch_size, time_steps), name="input")
            self.targets = tf.placeholder(tf.int64, shape=(batch_size, time_steps), name="targets")
            self.init = tf.placeholder(tf.float32, shape=(), name="init")

        with tf.name_scope("Embedding"):
            self.embedding = tf.Variable(tf.random_uniform((vocabulary_size, hidden_units), -self.init, self.init),
                                         dtype=tf.float32,
                                         name="embedding")
            self.embedded_input = tf.nn.embedding_lookup(self.embedding, self.input, name="embedded_input")

        with tf.name_scope("RNN"):
            cell = tf.nn.rnn_cell.LSTMCell(hidden_units)
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_probability)
            rnn_layers = tf.nn.rnn_cell.MultiRNNCell([cell] * layers)
            self.reset_state = rnn_layers.zero_state(batch_size, dtype=tf.float32)
            self.state = tf.placeholder(tf.float32, self.reset_state.get_shape(), "state")
            self.outputs, self.next_state = tf.nn.dynamic_rnn(rnn_layers, self.embedded_input, time_major=True,
                                                              initial_state=self.state)

        with tf.name_scope("Cost"):
            # Concatenate all the batches into a single row.
            self.flattened_outputs = tf.reshape(tf.concat(1, self.outputs), (-1, hidden_units),
                                                name="flattened_outputs")
            # Project the outputs onto the vocabulary.
            self.w = tf.get_variable("w", (hidden_units, vocabulary_size))
            self.b = tf.get_variable("b", vocabulary_size)
            self.predicted = tf.matmul(self.flattened_outputs, self.w) + self.b
            # Compare predictions to labels.
            self.loss = tf.nn.seq2seq.sequence_loss_by_example([self.predicted], [tf.concat(-1, self.targets)],
                                                               [tf.ones(batch_size * time_steps)])
            self.cost = tf.div(tf.reduce_sum(self.loss), batch_size, name="cost")

        with tf.name_scope("Train"):
            self.validation_perplexity = tf.Variable(dtype=tf.float32, initial_value=float("inf"), trainable=False,
                                                     name="validation_perplexity")
            tf.scalar_summary(self.validation_perplexity.op.name, self.validation_perplexity)
            self.training_epoch_perplexity = tf.Variable(dtype=tf.float32, initial_value=float("inf"), trainable=False,
                                                         name="training_epoch_perplexity")
            tf.scalar_summary(self.training_epoch_perplexity.op.name, self.training_epoch_perplexity)
            self.iteration = tf.Variable(0, dtype=tf.int64, name="iteration", trainable=False)
            self.gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tf.trainable_variables()),
                                                       max_gradient, name="clip_gradients")
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
            self.train_step = optimizer.apply_gradients(zip(self.gradients, tf.trainable_variables()),
                                                        name="train_step",
                                                        global_step=self.iteration)

        self.initialize = tf.initialize_all_variables()
        self.summary = tf.merge_all_summaries()
    def train(self, total_loss):
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        losses = tf.get_collection('losses')
        loss_averages_op = loss_averages.apply(losses + [total_loss])

        for l in losses + [total_loss]:
            tf.scalar_summary(l.op.name + ' (raw)', l)

        # Apply gradients, and add histograms
        with tf.control_dependencies([loss_averages_op]):
            opt = tf.train.AdamOptimizer()
            grads = opt.compute_gradients(total_loss)
        apply_gradient_op = opt.apply_gradients(grads)
        for var in tf.trainable_variables():
            tf.histogram_summary(var.op.name, var)
        for grad, var in grads:
            if grad is not None:
                tf.histogram_summary(var.op.name + '/gradients', grad)

        # Track the moving averages of all trainable variables
        variable_averages = tf.train.ExponentialMovingAverage(Recognizer.MOVING_AVERAGE_DECAY)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
            train_op = tf.no_op(name='train')
        return train_op
Esempio n. 4
0
 def testFunctionalDenseTwiceReuse(self):
   inputs = tf.random_uniform((5, 3), seed=1)
   core_layers.dense(inputs, 2, name='my_dense')
   vars1 = tf.trainable_variables()
   core_layers.dense(inputs, 2, name='my_dense', reuse=True)
   vars2 = tf.trainable_variables()
   self.assertEqual(vars1, vars2)
Esempio n. 5
0
def train(total_loss,global_step) :
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
        global_step,
        decay_steps,
        LEARNING_RATE_DECAY_FACTOR,
        staircase = True)
    tf.scalar_summary('learning_rate',lr)

    loss_averages_op = _add_loss_summaries(total_loss)

    with tf.control_dependencies([loss_averages_op]) :
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)

    apply_gradient_op = opt.apply_gradients(grads,global_step = global_step)

    for var in tf.trainable_variables() :
        tf.histogram_summary(var.op.name,var)

    for grad,var in grads :
        if grad is not None :
            tf.histogram_summary(var.op.name + '/gradients',grad)

    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY,global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op,variables_averages_op]) :
        train_op = tf.no_op(name = 'train')

    return train_op
  def _CheckDecay(self, ema, actual_decay, dim):
    tens = _Repeat(10.0, dim)
    thirties = _Repeat(30.0, dim)
    var0 = tf.Variable(tens, name="v0")
    var1 = tf.Variable(thirties, name="v1")
    tf.initialize_all_variables().run()
    # Note that tensor2 is not a Variable but just a plain Tensor resulting
    # from the sum operation.
    tensor2 = var0 + var1
    update = ema.apply([var0, var1, tensor2])
    avg0 = ema.average(var0)
    avg1 = ema.average(var1)
    avg2 = ema.average(tensor2)

    self.assertItemsEqual([var0, var1], tf.moving_average_variables())

    self.assertFalse(avg0 in tf.trainable_variables())
    self.assertFalse(avg1 in tf.trainable_variables())
    self.assertFalse(avg2 in tf.trainable_variables())
    tf.initialize_all_variables().run()

    self.assertEqual("v0/ExponentialMovingAverage:0", avg0.name)
    self.assertEqual("v1/ExponentialMovingAverage:0", avg1.name)
    self.assertEqual("add/ExponentialMovingAverage:0", avg2.name)

    # Check initial values.
    self.assertAllClose(tens, var0.eval())
    self.assertAllClose(thirties, var1.eval())
    self.assertAllClose(_Repeat(10.0 + 30.0, dim), tensor2.eval())

    # Check that averages are initialized correctly.
    self.assertAllClose(tens, avg0.eval())
    self.assertAllClose(thirties, avg1.eval())
    # Note that averages of Tensor's initialize to zeros_like since no value
    # of the Tensor is known because the Op has not been run (yet).
    self.assertAllClose(_Repeat(0.0, dim), avg2.eval())

    # Update the averages and check.
    update.run()
    dk = actual_decay

    expected = _Repeat(10.0 * dk + 10.0 * (1 - dk), dim)
    self.assertAllClose(expected, avg0.eval())
    expected = _Repeat(30.0 * dk + 30.0 * (1 - dk), dim)
    self.assertAllClose(expected, avg1.eval())
    expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk), dim)
    self.assertAllClose(expected, avg2.eval())

    # Again, update the averages and check.
    update.run()
    expected = _Repeat((10.0 * dk + 10.0 * (1 - dk)) * dk + 10.0 * (1 - dk),
                       dim)
    self.assertAllClose(expected, avg0.eval())
    expected = _Repeat((30.0 * dk + 30.0 * (1 - dk)) * dk + 30.0 * (1 - dk),
                       dim)
    self.assertAllClose(expected, avg1.eval())
    expected = _Repeat(((0.0 * dk + (10.0 + 30.0) * (1 - dk)) * dk +
                        (10.0 + 30.0) * (1 - dk)),
                       dim)
    self.assertAllClose(expected, avg2.eval())
Esempio n. 7
0
  def testCompatibleNames(self):
    with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()):
      cell = tf.nn.rnn_cell.LSTMCell(10)
      pcell = tf.nn.rnn_cell.LSTMCell(10, use_peepholes=True)
      inputs = [tf.zeros([4, 5])] * 6
      tf.nn.rnn(cell, inputs, dtype=tf.float32, scope="basic")
      tf.nn.rnn(pcell, inputs, dtype=tf.float32, scope="peephole")
      basic_names = {v.name: v.get_shape() for v in tf.trainable_variables()}

    with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()):
      cell = tf.contrib.rnn.LSTMBlockCell(10, use_compatible_names=True)
      pcell = tf.contrib.rnn.LSTMBlockCell(
          10, use_peephole=True, use_compatible_names=True)
      inputs = [tf.zeros([4, 5])] * 6
      tf.nn.rnn(cell, inputs, dtype=tf.float32, scope="basic")
      tf.nn.rnn(pcell, inputs, dtype=tf.float32, scope="peephole")
      block_names = {v.name: v.get_shape() for v in tf.trainable_variables()}

    with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()):
      cell = tf.contrib.rnn.LSTMBlockFusedCell(10)
      pcell = tf.contrib.rnn.LSTMBlockFusedCell(10, use_peephole=True)
      inputs = [tf.zeros([4, 5])] * 6
      cell(inputs, dtype=tf.float32, scope="basic/LSTMCell")
      pcell(inputs, dtype=tf.float32, scope="peephole/LSTMCell")
      fused_names = {v.name: v.get_shape() for v in tf.trainable_variables()}

    self.assertEqual(basic_names, block_names)
    self.assertEqual(basic_names, fused_names)
def build_model(x, y_, n_workers, is_chief):
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    y = mnist_inference.inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)

    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, global_step, 60000 / BATCH_SIZE, LEARNING_RATE_DECAY)
   
    # 通过tf.train.SyncReplicasOptimizer函数实现同步更新。
    opt = tf.train.SyncReplicasOptimizer(
        tf.train.GradientDescentOptimizer(learning_rate),
        replicas_to_aggregate=n_workers,
        total_num_replicas=n_workers)

    train_op = opt.minimize(loss, global_step=global_step)     
    if is_chief:
        variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())
        with tf.control_dependencies([variables_averages_op, train_op]):
            train_op = tf.no_op()

    return global_step, loss, train_op, opt
Esempio n. 9
0
def create_critic_train_op(hparams, critic_loss, global_step):
  """Create Discriminator train op."""
  with tf.name_scope('train_critic'):
    critic_optimizer = tf.train.AdamOptimizer(hparams.critic_learning_rate)
    output_vars = [
        v for v in tf.trainable_variables() if v.op.name.startswith('critic')
    ]

    if FLAGS.critic_update_dis_vars:
      if FLAGS.discriminator_model == 'bidirectional_vd':
        critic_vars = [
            v for v in tf.trainable_variables()
            if v.op.name.startswith('dis/rnn')
        ]
      elif FLAGS.discriminator_model == 'seq2seq_vd':
        critic_vars = [
            v for v in tf.trainable_variables()
            if v.op.name.startswith('dis/decoder/rnn/multi_rnn_cell')
        ]
      critic_vars.extend(output_vars)
    else:
      critic_vars = output_vars
    print('\nOptimizing Critic vars:')
    for v in critic_vars:
      print(v)
    critic_grads = tf.gradients(critic_loss, critic_vars)
    critic_grads_clipped, _ = tf.clip_by_global_norm(critic_grads,
                                                     FLAGS.grad_clipping)
    critic_train_op = critic_optimizer.apply_gradients(
        zip(critic_grads_clipped, critic_vars), global_step=global_step)
    return critic_train_op, critic_grads_clipped, critic_vars
Esempio n. 10
0
def train(total_loss, global_step):
    total_sample = 274
    num_batches_per_epoch = 274/1
    """ fix lr """
    lr = INITIAL_LEARNING_RATE
    loss_averages_op = _add_loss_summaries(total_loss)

    # Compute gradients.
    with tf.control_dependencies([loss_averages_op]):
      opt = tf.train.AdamOptimizer(lr)
      grads = opt.compute_gradients(total_loss)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
      tf.summary.histogram(var.op.name, var)

    # Add histograms for gradients.
    for grad, var in grads:
      if grad is not None:
        tf.summary.histogram(var.op.name + '/gradients', grad)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
      train_op = tf.no_op(name='train')

    return train_op
Esempio n. 11
0
  def testMap_Scoped(self):
    with self.test_session() as sess:

      def double_scoped(x):
        """2x with a dummy 2 that is scoped."""
        with tf.variable_scope("body"):
          # Dummy variable, just to check that scoping works as intended.
          two = tf.get_variable("two", [], dtype=tf.int32,
                                initializer=tf.constant_initializer(2))
          return tf.mul(x, two)

      with tf.variable_scope("root") as varscope:
        elems = tf.constant([1, 2, 3, 4, 5, 6], name="data")
        doubles = np.array([2*x for x in [1, 2, 3, 4, 5, 6]])

        r = tf.map_fn(double_scoped, elems)
        # Check that we have the one variable we asked for here.
        self.assertEqual(len(tf.trainable_variables()), 1)
        self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0")
        sess.run([tf.initialize_all_variables()])
        self.assertAllEqual(doubles, r.eval())

        # Now let's reuse our single variable.
        varscope.reuse_variables()
        r = tf.map_fn(double_scoped, elems)
        self.assertEqual(len(tf.trainable_variables()), 1)
        self.assertAllEqual(doubles, r.eval())
Esempio n. 12
0
def dis_decoder_seq2seq(hparams):
  assert FLAGS.discriminator_model == 'seq2seq_vd'
  assert hparams.dis_num_layers == 2

  if not FLAGS.dis_share_embedding:
    decoder_embedding = [
        v for v in tf.trainable_variables()
        if v.op.name == 'dis/decoder/rnn/embedding'
    ][0]
  decoder_lstm_w_0 = [
      v for v in tf.trainable_variables()
      if v.op.name ==
      'dis/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/weights'
  ][0]
  decoder_lstm_b_0 = [
      v for v in tf.trainable_variables()
      if v.op.name ==
      'dis/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/biases'
  ][0]
  decoder_lstm_w_1 = [
      v for v in tf.trainable_variables()
      if v.op.name ==
      'dis/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/weights'
  ][0]
  decoder_lstm_b_1 = [
      v for v in tf.trainable_variables()
      if v.op.name ==
      'dis/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/biases'
  ][0]

  if FLAGS.data_set == 'ptb':
    model_str = 'Model'
  else:
    model_str = 'model'

  if not FLAGS.dis_share_embedding:
    variable_mapping = {
        str(model_str) + '/embedding':
            decoder_embedding,
        str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/weights':
            decoder_lstm_w_0,
        str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/biases':
            decoder_lstm_b_0,
        str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/weights':
            decoder_lstm_w_1,
        str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/biases':
            decoder_lstm_b_1
    }
  else:
    variable_mapping = {
        str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/weights':
            decoder_lstm_w_0,
        str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/biases':
            decoder_lstm_b_0,
        str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/weights':
            decoder_lstm_w_1,
        str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/biases':
            decoder_lstm_b_1,
    }
  return variable_mapping
	def __init__(self, num_actions, num_states, num_trainable_vars):		
		self._num_actions = num_actions
		self._num_states = num_states
	
		# Input (not the cell state)
		self.state = tf.placeholder(tf.float32, [1,num_states])

		# Weights for policy output layer
		self.W_fc1 = self.init_torch_matrix([rnn_size, num_actions])
		self.b_fc1 = self.init_torch_vector([num_actions], rnn_size)
		
		# Weights for value output layer
		self.W_fc2 = self.init_torch_matrix([rnn_size, 1])
		self.b_fc2 = self.init_torch_vector([1], rnn_size)	
		
		rnn_cell = tf.nn.rnn_cell.BasicRNNCell(rnn_size, activation=tf.identity) ### Use LSTM
		### Dropout?
		self.cell = tf.nn.rnn_cell.MultiRNNCell([rnn_cell] * num_rnn_layers)

		self.rnn_state = self.cell.zero_state(1, tf.float32)
		output, rnn_state_out = self.cell(self.state, self.rnn_state)
		
		self.rnn_state_out = rnn_state_out
	
		# policy (output)
		self.pi = tf.nn.softmax(tf.matmul(output, self.W_fc1) + self.b_fc1)

		# value - linear output layer
		self.v = tf.matmul(output, self.W_fc2) + self.b_fc2
		
		if num_trainable_vars[0] == None:
			num_trainable_vars[0] = len(tf.trainable_variables())
		
		self.trainable_vars = tf.trainable_variables()[-num_trainable_vars[0]:]
def evaluate():
    with tf.Graph().as_default():
        # testデータのロード
        images, labels = data_inputs.inputs('data/train_kirin_norm_32.tfrecords')
        logits = model.inference(images)

        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        
        variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay)
        variables_to_restore = {}
        for v in tf.trainable_variables():
            if v in tf.trainable_variables():
                restore_name = variable_averages.average_name(v)
            else:
                restore_name = v.op.name
            variables_to_restore[restore_name] = v
        saver = tf.train.Saver(variables_to_restore)
        summary_op = tf.merge_all_summaries()

        graph_def = tf.get_default_graph().as_graph_def()
        summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def)

        while True:
            eval_once(saver, summary_writer, top_k_op, summary_op)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
    def update_parameters(self, loss):
        if self.regularization_constant != 0:
            l2_norm = tf.reduce_sum([tf.sqrt(tf.reduce_sum(tf.square(param))) for param in tf.trainable_variables()])
            loss = loss + self.regularization_constant*l2_norm

        optimizer = self.get_optimizer(self.learning_rate_var, self.beta1_decay_var)
        grads = optimizer.compute_gradients(loss)
        clipped = [(tf.clip_by_value(g, -self.grad_clip, self.grad_clip), v_) for g, v_ in grads]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            step = optimizer.apply_gradients(clipped, global_step=self.global_step)

        if self.enable_parameter_averaging:
            maintain_averages_op = self.ema.apply(tf.trainable_variables())
            with tf.control_dependencies([step]):
                self.step = tf.group(maintain_averages_op)
        else:
            self.step = step

        logging.info('all parameters:')
        logging.info(pp.pformat([(var.name, shape(var)) for var in tf.global_variables()]))

        logging.info('trainable parameters:')
        logging.info(pp.pformat([(var.name, shape(var)) for var in tf.trainable_variables()]))

        logging.info('trainable parameter count:')
        logging.info(str(np.sum(np.prod(shape(var)) for var in tf.trainable_variables())))
Esempio n. 16
0
def optim(loss, **kwargs):
    r"""Applies gradients to variables.

    Args:
        loss: A 0-D `Tensor` containing the value to minimize.
        kwargs:
          optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', or 'sgd'.
          lr: A Python Scalar (optional). Learning rate. Default is .001.
          beta1: A Python Scalar (optional). Default is .9.
          beta2: A Python Scalar (optional). Default is .99.
          category: A string or string list. Specifies the variables that should be trained (optional).
            Only if the name of a trainable variable starts with `category`, it's value is updated.
            Default is '', which means all trainable variables are updated.
    """
    opt = Opt(kwargs)
    # opt += Opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='')

    # default training options
    opt += Opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='')

    # select optimizer
    # if opt.optim == 'MaxProp':
        # optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2)
    # elif opt.optim == 'AdaMax':
        # optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    # elif opt.optim == 'Adam':
    if opt.optim == 'Adm':
        optim = tf.train.AdamOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    else:
        optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr)

    # get trainable variables
    if isinstance(opt.category, (tuple, list)):
        var_list = []
        for cat in opt.category:
            var_list.extend([t for t in tf.trainable_variables() if t.name.startswith(cat)])
    else:
        var_list = [t for t in tf.trainable_variables() if t.name.startswith(opt.category)]

    # calc gradient
    gradient = optim.compute_gradients(loss, var_list=var_list)

    # add summary
    for v, g in zip(var_list, gradient):
        # exclude batch normal statics
        if 'mean' not in v.name and 'variance' not in v.name \
                and 'beta' not in v.name and 'gamma' not in v.name:
                prefix = ''
                # summary name
                name = prefix + ''.join(v.name.split(':')[:-1])
                # summary statistics
                # noinspection PyBroadException
                try:
                    tf.summary.scalar(name + '/grad', tf.global_norm([g]))
                    tf.summary.histogram(name + '/grad-h', g)
                except:
                    pass
    global_step = tf.Variable(0, name='global_step', trainable=False)
    # gradient update op
    return optim.apply_gradients(gradient, global_step=global_step), global_step
Esempio n. 17
0
    def __init__(self, actions, name=NAME, learning_rate=1e-4,  x_dim=210, y_dim=160, eps_start=1.0, eps_decay=0.0000001, eps_end=0.1, num_channels=3, should_train=True, from_checkpoint=None, player_id=1):
        Agent.__init__(self, name=name, actions=[])
        self.learning_rate = learning_rate
        self.x_dim, self.y_dim = x_dim, y_dim
        self.actions, self.num_actions = actions, len(actions)
        self.hidden_layers = [32, 32]
        self.num_channels = num_channels
        self.eps_start, self.epsilon_decay, self.epsilon_end = eps_start, eps_decay, eps_end
        self.should_train = should_train
        self.reset()

        # Parameters for updating target network.
        tau = 0.001

        # TODO: Update to support player_id > 2.
        # NOTE: This is a bit of a hack to update the variables in the target
        # network. It can be fixed by using scope and Tensorflow 1.4 which takes
        # a scope argument in tf.trainable_variables().
        if player_id == 2:
            vs = tf.trainable_variables()
            self.target_ops = update_target_graph(vs[len(vs)//2:], tau)
        else:
            self.target_ops = update_target_graph(tf.trainable_variables(), tau)

        # Load model from a checkpoint
        if not (from_checkpoint is None):
            self.saver.restore(self.sess, from_checkpoint)
            print('Restored model from checkpoint: {}'.format(from_checkpoint))
Esempio n. 18
0
    def __init__(self,
                 sess,
                 state_dim,
                 action_dim,
                 learning_rate,
                 tau,
                 gamma,
                 name=None):
        self.sess = sess
        self.s_dim = state_dim
        self.a_dim = action_dim
        self.learning_rate = learning_rate
        self.tau = tau
        self.gamma = gamma

        # build networks
        net_name = 'critic' if name is None else name
        with tf.variable_scope(net_name):
            (self.obs,
             self.action,
             self.q_value) = self.create_critic_network()
        self.params = tf.trainable_variables(scope=net_name)
        with tf.variable_scope(net_name + '_target'):
            (self.target_obs,
             self.target_action,
             self.target_q_value) = self.create_critic_network()
        self.target_params = tf.trainable_variables(scope=net_name + '_target')

        # build ops
        (self.update_target_op,
         self.y_ph,
         self.train_op,
         self.action_grad) = self.create_critic_ops()
Esempio n. 19
0
  def testCustomGetter(self):
    custom_getter = snt.custom_getters.Context(snt.custom_getters.stop_gradient)
    module = snt.nets.ConvNet2D(output_channels=self.output_channels,
                                kernel_shapes=self.kernel_shapes,
                                rates=self.rates,
                                strides=self.strides,
                                paddings=self.paddings,
                                custom_getter=custom_getter)

    input_shape = [10, 100, 100, 3]
    input_to_net = tf.random_normal(dtype=tf.float32, shape=input_shape)

    if tf.executing_eagerly():
      with tf.GradientTape() as tape0:
        out0 = module(input_to_net)
      with tf.GradientTape() as tape1:
        with custom_getter:
          out1 = module(input_to_net)
      all_vars = tf.trainable_variables()
      out0_grads = tape0.gradient(out0, all_vars)
      out1_grads = tape1.gradient(out1, all_vars)

    else:
      out0 = module(input_to_net)
      with custom_getter:
        out1 = module(input_to_net)
      all_vars = tf.trainable_variables()
      out0_grads = tf.gradients(out0, all_vars)
      out1_grads = tf.gradients(out1, all_vars)

    for grad in out0_grads:
      self.assertNotEqual(None, grad)
    self.assertEqual([None] * len(out1_grads), out1_grads)
Esempio n. 20
0
    def __init__(self,
                 sess,
                 state_dim,
                 action_dim,
                 action_high,
                 action_low,
                 learning_rate,
                 grad_norm_clip,
                 tau,
                 batch_size,
                 name=None):
        self.sess = sess
        self.s_dim = state_dim
        self.a_dim = action_dim
        self.a_high = action_high
        self.a_low = action_low
        self.learning_rate = learning_rate
        self.grad_norm_clip = grad_norm_clip
        self.tau = tau
        self.batch_size = batch_size

        # create networks
        net_name = 'actor' if name is None else name
        with tf.variable_scope(net_name):
            self.obs, self.action = self.create_actor_network()
        self.params = tf.trainable_variables(scope=net_name)
        with tf.variable_scope(net_name + '_target'):
            self.target_obs, self.target_action = self.create_actor_network()
        self.target_params = tf.trainable_variables(scope=net_name + '_target')

        # create ops
        (self.update_target_op,
         self.action_gradient,
         self.train_op) = self.create_actor_ops()
def grads_and_loss():
  """Creates loss tensor for resnet model."""
  images = tf.ones([BATCH_SIZE, HEIGHT, WIDTH, DEPTH])/1000
  labels = tf.ones(shape=[BATCH_SIZE, NUM_CLASSES])/1000
                                              
  #  images = tf.random_uniform((BATCH_SIZE, HEIGHT, WIDTH, DEPTH), seed=1)
  #  labels = tf.random_uniform((BATCH_SIZE, NUM_CLASSES), seed=1)
  if USE_TINY:
    network = resnet_model.tiny_resnet_v2(resnet_size=RESNET_SIZE, num_classes=NUM_CLASSES)
  else:
    network = resnet_model.resnet_v2(resnet_size=RESNET_SIZE,
                                     num_classes=NUM_CLASSES)

    
  inputs = tf.reshape(images, [BATCH_SIZE, HEIGHT, WIDTH, DEPTH])
  logits = network(inputs,True)
  cross_entropy = tf.losses.softmax_cross_entropy(logits=logits,
                                                  onehot_labels=labels)

  loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
    [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
  
  global_step = tf.train.get_or_create_global_step()
  optimizer = tf.train.MomentumOptimizer(
    learning_rate=_INITIAL_LEARNING_RATE,
    momentum=_MOMENTUM)

  # Batch norm requires update_ops to be added as a train_op dependency.
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  with tf.control_dependencies(update_ops):
    grads = tf.gradients(loss, tf.trainable_variables())
    # TODO: move to train_op
    # train_op = optimizer.minimize(loss, global_step)
  return grads, loss
Esempio n. 22
0
    def build_gen_graph(self):
        # forward pass through generator
        # returns a (batch_size, sequence_length, input_dim) for generated
        self.generated, self.timestep_probs, self.predicted_rewards = self.generate()

        # get the predictions from the discriminator
        # returns a (batch_size, 1) output
        self.gen_scores = self.discriminate(self.generated, reuse=False)

        # formulate the policy gradient loss
        self.gen_train_loss_out, self.baseline_loss = self.gen_train_loss(self.gen_scores,
             self.predicted_rewards)

        # get generative parameters and baseline params
        self.g_params = [p for p in tf.trainable_variables() if 'g' in p.name and 'b' not in p.name]
        self.b_params = [p for p in tf.trainable_variables() if 'b' in p.name]

        # create the gen train op
        self.gen_optimize_rewards(self.gen_train_loss_out)

        # create the baseline train op
        if self.opts.with_baseline:
            self.optimize_baseline(self.baseline_loss)

        # initialize all variable and prep to save model
        tf.initialize_all_variables().run()
Esempio n. 23
0
  def run_model(self, train_config, eval_config):
    with tf.Graph().as_default() as g:
      train_model = base_model(params=train_config, mode="train", hvd=None)
      train_model.compile()
      eval_model = base_model(params=eval_config, mode="eval", hvd=None)
      eval_model.compile(force_var_reuse=True)

      train(train_model, eval_model)
      saver = tf.train.Saver()
      checkpoint = tf.train.latest_checkpoint(train_model.params['logdir'])
      with self.test_session(g, use_gpu=True) as sess:
        saver.restore(sess, checkpoint)
        sess.run([train_model.get_data_layer(i).iterator.initializer
                  for i in range(train_model.num_gpus)])
        sess.run([eval_model.get_data_layer(i).iterator.initializer
                  for i in range(eval_model.num_gpus)])

        weights = sess.run(tf.trainable_variables())
        loss = sess.run(train_model.loss)
        eval_losses = sess.run(eval_model.eval_losses)
        eval_loss = np.mean(eval_losses)
        weights_new = sess.run(tf.trainable_variables())

        # checking that the weights has not changed from just computing the loss
        for w, w_new in zip(weights, weights_new):
          npt.assert_allclose(w, w_new)
      eval_dict = evaluate(eval_model, checkpoint)
    return loss, eval_loss, eval_dict
Esempio n. 24
0
def train(total_loss, global_step):
  """Train CIFAR-10 model.

  Create an optimizer and apply to all trainable variables. Add moving
  average for all trainable variables.

  Args:
    total_loss: Total loss from loss().
    global_step: Integer Variable counting the number of training steps
      processed.
  Returns:
    train_op: op for training.
  """
  # Variables that affect learning rate.
  num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
  decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

  # Decay the learning rate exponentially based on the number of steps.
  lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                  global_step,
                                  decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True)
  tf.scalar_summary('learning_rate', lr)

  # Generate moving averages of all losses and associated summaries.
  loss_averages_op = _add_loss_summaries(total_loss)

  # Compute gradients.
  with tf.control_dependencies([loss_averages_op]):
    # opt = tf.train.GradientDescentOptimizer(lr)
    opt = tf.train.AdamOptimizer(learning_rate=0.0001,
                                       beta1=0.9,
                                       beta2=0.999,
                                       epsilon=1e-08,
                                       use_locking=False,
                                       name='Adam')#.minimize(loss,global_step=batch)
    grads = opt.compute_gradients(total_loss)

  # Apply gradients.
  apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

  # Add histograms for trainable variables.
  for var in tf.trainable_variables():
    tf.histogram_summary(var.op.name, var)

  # Add histograms for gradients.
  for grad, var in grads:
    if grad is not None:
      tf.histogram_summary(var.op.name + '/gradients', grad)

  # Track the moving averages of all trainable variables.
  variable_averages = tf.train.ExponentialMovingAverage(
      MOVING_AVERAGE_DECAY, global_step)
  variables_averages_op = variable_averages.apply(tf.trainable_variables())

  with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
    train_op = tf.no_op(name='train')

  return train_op
Esempio n. 25
0
File: ddpg.py Progetto: ataitler/DQN
    def __init__(self, sess, state_dim, action_dim, learning_rate, tau, num_actor_vars):
        self.sess = sess
        self.s_dim = state_dim
        self.a_dim = action_dim
        self.learning_rate = learning_rate
        self.tau = tau

        # Create the critic network
        self.inputs, self.action, self.out = self.create_critic_network()

        self.network_params = tf.trainable_variables()[num_actor_vars:]

        # Target Network
        self.target_inputs, self.target_action, self.target_out = self.create_critic_network()
        
        self.target_network_params = tf.trainable_variables()[(len(self.network_params) + num_actor_vars):]

        # Op for periodically updating target network with online network weights with regularization
        self.update_target_network_params = \
            [self.target_network_params[i].assign(tf.mul(self.network_params[i], self.tau) + tf.mul(self.target_network_params[i], 1. - self.tau))
                for i in range(len(self.target_network_params))]
    
        # Network target (y_i)
        self.predicted_q_value = tf.placeholder(tf.float32, [None, 1])

        # Define loss and optimization Op
        self.loss = tflearn.mean_square(self.predicted_q_value, self.out)
        self.optimize = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)

        # Get the gradient of the net w.r.t. the action
        self.action_grads = tf.gradients(self.out, self.action)
Esempio n. 26
0
File: ddpg.py Progetto: ataitler/DQN
    def __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau):
        self.sess = sess
        self.s_dim = state_dim
        self.a_dim = action_dim
        self.action_bound = action_bound
        self.learning_rate = learning_rate
        self.tau = tau

        # Actor Network
        self.inputs, self.out, self.scaled_out = self.create_actor_network()

        self.network_params = tf.trainable_variables()

        # Target Network
        self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
        
        self.target_network_params = tf.trainable_variables()[len(self.network_params):]

        # Op for periodically updating target network with online network weights
        self.update_target_network_params = \
            [self.target_network_params[i].assign(tf.mul(self.network_params[i], self.tau) + \
                tf.mul(self.target_network_params[i], 1. - self.tau))
                for i in range(len(self.target_network_params))]

        # This gradient will be provided by the critic network
        self.action_gradient = tf.placeholder(tf.float32, [None, self.a_dim])
        
        # Combine the gradients here 
        self.actor_gradients = tf.gradients(self.scaled_out, self.network_params, -self.action_gradient)

        # Optimization Op
        self.optimize = tf.train.AdamOptimizer(self.learning_rate).\
            apply_gradients(zip(self.actor_gradients, self.network_params))

        self.num_trainable_vars = len(self.network_params) + len(self.target_network_params)
Esempio n. 27
0
File: dqn.py Progetto: danfeiX/drl
def train(lr, total_loss, global_step):
    # Variables that affect learning rate.

    # Compute gradients.
    #with tf.control_dependencies([loss_averages_op]):
    opt = tf.train.GradientDescentOptimizer(lr)
    grads = opt.compute_gradients(total_loss)

    # Add histograms for gradients.
    for i, (grad, var) in enumerate(grads):
        if grad is not None:
            tf.histogram_summary(var.op.name + '/gradients', grad)
            grads[i] = (tf.clip_by_norm(grad, 5), var)

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op
 def __init__(self,learning_rate, cost,feed,sess,m,comm,size,rank):
     self.Y=[]
     self.S=[]
     self.YS=[]
     self.cost=cost
     self.sess=sess
     self.NumIter=0
     self.m=m
     self.counter=0
     self.gradientEval=0
     self.functionEval=0
     self.last_func=0
     self.innerEval=0
     self.HessianEval=0
     self.last_z1=0.01
     self.memorySize=0
     self.rank=rank
     self.comm=comm
     self.size=size
     v=[]
     self.assign_placeholders=[]
     assign_op=[]
     for t in tf.trainable_variables():
         v.append(sess.run(t))
         self.assign_placeholders.append(tf.placeholder(shape=v[-1].shape,dtype="float32"))
         assign_op.append(t.assign(self.assign_placeholders[-1]))
     self.assign=tf.group(*assign_op)
     self.var=np.array(v)
     # self.var=np.load('var.npy')
     np.save('var.npy',self.var)
     comm.scatter(['Init' for i in range(size)],root=rank)
     self.gradient=tf.gradients(cost,tf.trainable_variables(),gate_gradients=True)
     self.learningRate=learning_rate
     self.old_grad=None
def train(total_loss, global_step):
    """Entrenamiento del modelo
    Crea un optimizador y lo aplica a todas las variables
    Args:
        total_loss: costo total desde loss()
        global_step: variable que cuenta la cantidad de pasos de entrenamiento
    Returns:
        train_op: operación para entrenar.
    """

    # Reducimos el learning rate exponencialmente dependiendo el número de pasos de entrenamiento
    lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                    global_step,
                                    FLAGS.decay_steps,
                                    FLAGS.decay_rate,
                                    staircase=True)


    # Definimos el optimizador a utilizar
    if FLAGS.optimezer == "GradientDescentOptimizer":
        opt = tf.train.GradientDescentOptimizer(lr)
    if FLAGS.optimezer == "AdamOptimizer":
        opt = tf.train.AdamOptimizer(lr)
    if FLAGS.optimezer == "AdadeltaOptimizer":
        opt = tf.train.AdadeltaOptimizer(lr)
    if FLAGS.optimezer == "RMSPropOptimizer":
        opt = tf.train.RMSPropOptimizer(lr)
    if FLAGS.optimezer == "ProximalGradientDescentOptimizer":
        opt = tf.train.ProximalGradientDescentOptimizer(lr)


    # Agrega summaries
    tf.summary.scalar('learning_rate', lr)
    loss_averages_op = _add_loss_summaries(total_loss)

    with tf.control_dependencies([loss_averages_op]):
        # Computa los gradientes
        grads = opt.compute_gradients(total_loss)

    # aplica los gradientes.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Agrega el histograma para las variables de entrenamiento
    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)

    # Agrega el histograma para los gradientes
    for grad, var in grads:
        if grad is not None:
            tf.summary.histogram(var.op.name + '/gradients', grad)

    # guardamos el promedio movil de las variables, es util para mejorar la eficiencia del optimizador.
    variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step)
    maintain_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op]):
        train_op = tf.group(maintain_averages_op )

    return train_op
def captcha_train(total_loss, global_step):
    """
    Train captcha model.

    Create an optimizer and apply to all trainable variables. Add moving
    average for all trainable variables.

    Args:
        total_loss: Total loss from loss().
        global_step: Integer Variable counting the number of training steps
          processed.
    Returns:
        train_op: op for training.
    """
    # Variables that affect learning rate.
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

    # Decay the learning rate exponentially based on the number of steps.
    lr = tf.train.exponential_decay(
        INITIAL_LEARNING_RATE,
        global_step,
        decay_steps,
        LEARNING_RATE_DECAY_FACTOR,
        staircase=True
    )
    tf.scalar_summary('learning_rate', lr)

    # Generate moving averages of all losses and associated summaries.
    loss_averages_op = _add_loss_summaries(total_loss)

    # Compute gradients.
    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.AdamOptimizer(lr)
        grads = opt.compute_gradients(total_loss)


    # Apply gradients.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    # Add histograms for gradients.
    for grad, var in grads:
        if grad:
            tf.histogram_summary(var.op.name + '/gradients', grad)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_to_average = list(set(tf.trainable_variables() + filter(lambda v: "_mean" in v.name or "_variance" in v.name, tf.all_variables())))
    variables_averages_op = variable_averages.apply(variables_to_average)

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, logits, probabilities) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
        num_labels, use_one_hot_embeddings, softmax)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(per_example_loss, label_ids, logits, is_real_example):
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(
            labels=label_ids, predictions=predictions, weights=is_real_example)
        loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
        }

      eval_metrics = (metric_fn,
                      [per_example_loss, label_ids, logits, is_real_example])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
    return output_spec
Esempio n. 32
0
    def __init__(self,
                 source_vocab_size,
                 target_vocab_size,
                 buckets,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 keep_prob=1,
                 use_lstm=False,
                 num_samples=512,
                 forward_only=False):
        """Create the model.

    Args:
      source_vocab_size: size of the source vocabulary.
      target_vocab_size: size of the target vocabulary.
      buckets: a list of pairs (I, O), where I specifies maximum input length
        that will be processed in that bucket, and O specifies maximum output
        length. Training instances that have inputs longer than I or outputs
        longer than O will be pushed to the next bucket and padded accordingly.
        We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
      size: number of units in each layer of the model.
      num_layers: number of layers in the model.
      max_gradient_norm: gradients will be clipped to maximally this norm.
      batch_size: the size of the batches used during training;
        the model construction is independent of batch_size, so it can be
        changed after initialization if this is convenient, e.g., for decoding.
      learning_rate: learning rate to start with.
      learning_rate_decay_factor: decay learning rate by this much when needed.
      use_lstm: if true, we use LSTM cells instead of GRU cells.
      num_samples: number of samples for sampled softmax.
      forward_only: if set, we do not construct the backward pass in the model.
    """
        self.source_vocab_size = source_vocab_size
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None
        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.target_vocab_size:
            w = tf.get_variable("proj_w", [size, self.target_vocab_size])
            w_t = tf.transpose(w)
            b = tf.get_variable("proj_b", [self.target_vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                labels = tf.reshape(labels, [-1, 1])
                return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                  num_samples,
                                                  self.target_vocab_size)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_cell = tf.nn.rnn_cell.GRUCell(size)
        if use_lstm:
            single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
        cell = single_cell
        if keep_prob < 1:
            cell = tf.nn.rnn_cell.DropoutWrapper(cell,
                                                 output_keep_prob=keep_prob)
        if num_layers > 1:
            cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs,
                      decoder_inputs,
                      do_decode,
                      attention=False):
            if attention:
                return tf.nn.seq2seq.embedding_attention_seq2seq(
                    encoder_inputs,
                    decoder_inputs,
                    cell,
                    num_encoder_symbols=source_vocab_size,
                    num_decoder_symbols=target_vocab_size,
                    embedding_size=size,
                    output_projection=output_projection,
                    feed_previous=do_decode)
            return tf.nn.seq2seq.embedding_rnn_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                num_encoder_symbols=source_vocab_size,
                num_decoder_symbols=target_vocab_size,
                embedding_size=size,
                output_projection=output_projection,
                feed_previous=do_decode)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
        for i in xrange(buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))

        # Our targets are decoder inputs shifted by one.
        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, True),
                softmax_loss_function=softmax_loss_function)
            # If we use output projection, we need to project outputs for decoding.
            if output_projection is not None:
                for b in xrange(len(buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]
        else:
            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, False),
                softmax_loss_function=softmax_loss_function)

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            for b in xrange(len(buckets)):
                gradients = tf.gradients(self.losses[b], params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 33
0
def test_suite(
        out_tensor,
        train_op,
        sess_conf=None,
        output_range=None,
        scope="",
        var_list=None,
        feed_dict=None,
        init_op=None,
        test_all_inputs_dependent=True,
        test_other_vars_dont_change=True,
        test_output_range=True,
        test_nan_vals=True,
        test_inf_vals=True):
    """Full set of common tests to run for most ML programs.
    Args:
      out_tensor: Output tensor of your model.
      train_op: Op you call to train the model.
      sess_conf: Session configuration to use.
      output_range: Optional. The range you expect your output to have.
          If None, then we test if the output has both positive and negative
          values.
      scope: Scope of the variables that are to be trained by the train_op.
          Default is "". Can not be used with var_list.
      var_list: List of variables that will change by train_op. Default is
          None. Can not be used with scope.
      feed_dict: Feed diction to pass whenever out_tensor or train_op is
          called. Default is None.
      init_op: The operation to call to initialize the network. If set to None,
          we call tf.global_variables_initializer()
      test_all_inputs_dependent: Make sure that train_op depends on all values
          passed in with feed_dict. We require that all inputs to the network
          are with "tf.placeholder". Default to True.
      test_other_vars_dont_change: Whether we check if the other variables in
          the graph don't change when we call train_op. Default to True.
      test_output_range: Whether we do the output range check. Default to True.
    Raises:
      VariablesChangeException: If a variable does/does not change
          when it should not/should have.
      RangeException: If the output range does not conform to what was
          expected.
      DependencyException: If the train_op can be called successfully
          without pass in values to the variables in feed_dict.
      tf.errors.InvalidArgumentError: If you are missing a variable that
          train_op depends on in feed_dict.
    """

    # Grab the nessissary variables.
    if var_list is None:
        variables = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)
    else:
        variables = var_list
    other_vars = list(set(tf.trainable_variables()) - set(variables))

    # Run default variable changes test.
    assert_vars_change(train_op, sess_conf=sess_conf,
                       scope=scope, var_list=var_list, feed_dict=feed_dict)
    # Run the 'other variables test'
    if test_other_vars_dont_change:
        assert_vars_same(train_op, sess_conf=sess_conf,
                         var_list=other_vars, feed_dict=feed_dict)

    # Run the range tests
    if test_output_range:
        if output_range is None:
            assert_any_greater_than(out_tensor, 0, sess_conf=sess_conf,
                                    feed_dict=feed_dict, init_op=init_op)
            assert_any_less_than(out_tensor, 0, sess_conf=sess_conf,
                                 feed_dict=feed_dict, init_op=init_op)
        else:
            assert_all_greater_than(out_tensor, output_range[0],
                                    sess_conf=sess_conf,
                                    feed_dict=feed_dict, init_op=init_op)
            assert_all_less_than(out_tensor, output_range[1],
                                 sess_conf=sess_conf, feed_dict=feed_dict,
                                 init_op=init_op)

    # Run the dependency tests.
    if test_all_inputs_dependent:
        assert_input_dependency(train_op, feed_dict, sess_conf, init_op)
    if test_nan_vals:
        assert_never_nan(out_tensor, feed_dict, sess_conf, init_op)
        assert_never_nan(train_op, feed_dict, sess_conf, init_op)
    if test_inf_vals:
        assert_never_inf(out_tensor, feed_dict, sess_conf, init_op)
        assert_never_nan(train_op, feed_dict, sess_conf, init_op)
Esempio n. 34
0
    def buildNetwork(self):
        #state输入的步长不定,为了训练和测试时候步长不同
        self.states = tf.placeholder(tf.float32,
                                     shape=[None, None, self.inputSize],
                                     name="states")
        self.actions_taken = tf.placeholder(tf.float32,
                                            shape=[None],
                                            name="actions_taken")
        self.critic_feedback = tf.placeholder(tf.float32,
                                              shape=[None],
                                              name="critic_feedback")
        self.critic_rewards = tf.placeholder(tf.float32,
                                             shape=[None],
                                             name="critic_rewards")

        # PolicyNetwork
        with tf.variable_scope("Policy"):

            #三层128全连接,用于提取特征
            L0 = tf.contrib.layers.fully_connected(
                inputs=self.states,
                num_outputs=self.hiddenSize,  #hidden
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=1.0),
                biases_initializer=tf.zeros_initializer())
            L01 = tf.contrib.layers.fully_connected(
                inputs=L0,
                num_outputs=self.hiddenSize,  #hidden
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=1.0),
                biases_initializer=tf.zeros_initializer())
            L1 = tf.contrib.layers.fully_connected(
                inputs=L01,
                num_outputs=self.hiddenSize,  #hidden
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=1.0),
                biases_initializer=tf.zeros_initializer())

            #construct a lstmcell ,the size is neuronNum
            lstmcell = tf.contrib.rnn.BasicLSTMCell(self.neuronNum,
                                                    forget_bias=1.0,
                                                    state_is_tuple=True)
            cell_drop = tf.contrib.rnn.DropoutWrapper(
                lstmcell, output_keep_prob=0.5)  #防止过拟合
            #lstmcell = tf.contrib.rnn.BasicLSTMCell(self.neuronNum, forget_bias=1.0, state_is_tuple=True,activation=tf.nn.relu)
            cell = tf.contrib.rnn.MultiRNNCell([cell_drop for _ in range(2)],
                                               state_is_tuple=True)

            #RNN记录当前时刻以及下一时刻的状态特征
            #if self.trainable:
            outputnew, statenew = tf.nn.dynamic_rnn(cell, L1, dtype=tf.float32)

            outputs = outputnew[:, 1, :]
            print("outputs")
            print(outputs)
            print(outputnew)

            # last layer is a fully connected network + softmax
            softmax_w = tf.get_variable(
                "softmax_w", [self.neuronNum, 3],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1.0))
            softmax_b = tf.get_variable("softmax_b", [3], dtype=tf.float32)
            logits = tf.matmul(outputs, softmax_w) + softmax_b
            self.probs = tf.nn.softmax(logits, name="action")
            # fetch the maximum probability
            self.action0 = tf.reduce_max(self.probs, axis=1)
            # fetch the index of the maximum probability
            self.argAction = tf.argmax(self.probs, axis=1)

            #loss,train
            #self.policyloss =policyloss  = tf.log(self.action0)*(self.critic_rewards-self.critic_feedback)
            self.policyloss = policyloss = tf.log(
                self.action0) * self.critic_rewards
            loss = tf.negative(tf.reduce_mean(policyloss), name="loss")
            tf.summary.scalar('actor_loss', tf.abs(loss))
            self.actor_train = tf.train.AdamOptimizer(0.01).minimize(loss)

            self.atvars = tvars = tf.trainable_variables()
            #print(tvars)
            #self.gg=tf.gradients(loss, tvars)
            #self.agrads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),5)
            #print(self.agrads)
            #optimizer = tf.train.AdamOptimizer(0.001)
            #self.actor_train = optimizer.apply_gradients(zip(self.agrads, tvars))

        # Critic Network
        with tf.variable_scope("critic") as scopeB:

            self.critic_target = tf.placeholder(tf.float32,
                                                name="critic_target")

            #construct a layer of fully connected network
            critic_L1 = tf.nn.relu(tf.matmul(self.states, self.w) + self.b)
            #critic_L1= tf.contrib.layers.fully_connected(
            #    inputs=self.states,
            #    num_outputs= self.hiddenSize, #hidden
            #    activation_fn= tf.nn.relu,
            #weights_initializer = tf.truncated_normal_initializer(stddev=1.0),
            #biases_initializer = tf.zeros_initializer()
            #    weights_initializer=self.w,
            #    biases_initializer=self.b
            #biases_initializer = tf.zeros_initializer()
            #)
            #construct 5 layers of lstm
            lstmcell = tf.contrib.rnn.BasicLSTMCell(self.neuronNum,
                                                    forget_bias=1.0,
                                                    state_is_tuple=True)
            cell = tf.contrib.rnn.DropoutWrapper(lstmcell,
                                                 output_keep_prob=0.5)
            #lstmcell=tf.contrib.rnn.BasicLSTMCell(self.neuronNum, forget_bias=1.0, state_is_tuple=True,activation=tf.nn.relu)
            #cell_drop=tf.contrib.rnn.DropoutWrapper(lstmcell, output_keep_prob=0.5)
            #cell = tf.contrib.rnn.MultiRNNCell([cell_drop for _ in range(2)], state_is_tuple=True)

            state = cell.zero_state(self.stepNum, tf.float32)

            # a feature has a length of inputSize
            with tf.variable_scope("criticScope"):
                for i in range(self.inputSize):
                    cellinput = tf.reshape(critic_L1[:, i], [-1, 1])
                    (output, state) = cell(cellinput, state)
                    #outputs.append(tf.reshape(output,[-1]))
                    tf.get_variable_scope().reuse_variables()

            nowbatch = self.stepNum
            nowinput = []
            start = tf.constant(0, dtype=tf.float32, shape=[128], name="zeros")
            nowinput.append([start, critic_L1[0, :]])
            for i in range(0, self.stepNum - 1):
                nowinput.append([critic_L1[i, :], critic_L1[i + 1, :]])
            nowinput = tf.reshape(nowinput, [-1, 2, 128])
            outputnew, statenew = tf.nn.dynamic_rnn(cell,
                                                    nowinput,
                                                    dtype=tf.float32)

            output = outputnew[:, 1, :]

            #state = cell.zero_state(1, tf.float32)
            #ss_step= tf.unstack(critic_L1)
            #outputs=[]
            #with tf.variable_scope("criticScope"):
            #    for i in ss_step:
            #        ii=tf.reshape(i,[1,-1])
            #        (output, state) = cell(ii, state)
            #        outputs.append(tf.reshape(output,[-1]))
            #        tf.get_variable_scope().reuse_variables()
            #output=outputs

            #print("critic")
            #print(np.shape(outputs))

            #output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, 10])

            # weights = tf.Variable(tf.truncated_normal([28, 10],stddev=1.0 / math.sqrt(float(28))),name='weights')
            # biases = tf.Variable(tf.zeros([10]),name='biases')
            # logits = tf.matmul(cell_output, weights) + biases
            self.critic_value = tf.contrib.layers.fully_connected(
                inputs=output,
                num_outputs=1,  #hidden
                activation_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=1.0),
                biases_initializer=tf.zeros_initializer())

            #loss,train
            self.critic_loss = critic_loss = tf.reduce_mean(
                tf.square(self.critic_target - self.critic_value), name="loss")
            tf.summary.scalar('critic_loss', self.critic_loss)
            self.critic_train = tf.train.AdamOptimizer(0.01).minimize(
                critic_loss)  #global_step
Esempio n. 35
0
print('exp_id', experiment_id)
if args.resume:
    print('Resuming training')

# create the model
model = tf.make_template('model', config.build_model)

# run once for data dependent initialization of parameters
x_init = tf.placeholder(tf.float32,
                        shape=(config.batch_size, ) + config.obs_shape)
y_init = tf.placeholder(tf.float32,
                        shape=(config.batch_size, ) + config.label_shape)
init_pass = model(x_init, y_init, init=True)[0]

all_params = tf.trainable_variables()
n_parameters = 0
for variable in all_params:
    shape = variable.get_shape()
    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value
    n_parameters += variable_parameters
print('Number of parameters', n_parameters)

# get loss gradients over multiple GPUs
xs = []
ys = []
grads = []
train_losses = []
Esempio n. 36
0
def main(unused_argv):
    print("Setting up image reader...")
    data_reader = Segmentation_BatchDataset.seg_dataset_reader(
        FLAGS.data_dir, crop=FLAGS.crop, crop_size=FLAGS.crop_size)
    print("Images read")

    #Placeholders for FeedDict
    keep_probability_conv = tf.placeholder(tf.float32,
                                           name="keep_probability_conv")
    image = tf.placeholder(
        tf.float32,
        shape=[None, FLAGS.crop_size[0], FLAGS.crop_size[0], 1],
        name="image")
    annotation = tf.placeholder(
        tf.int32,
        shape=[None, FLAGS.crop_size[0], FLAGS.crop_size[0], 1],
        name="labels")

    # Apply FCN
    pred_annotation, logits = segment(image, keep_probability_conv, 1,
                                      FLAGS.nr_classes, "labels")

    # compute cross-entropy loss
    loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits,
        labels=tf.squeeze(annotation, squeeze_dims=[3]),
        name="loss_labels")))
    # set up adam-optimizer
    trainable_var = tf.trainable_variables()
    train_op = train(loss, trainable_var)

    # get TF session
    sess = tf.Session()

    # set up saver
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
    if ckpt and ckpt.model_checkpoint_path:
        step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')
                   [1])  # get the step from the last checkpoint
        saver.restore(sess, ckpt.model_checkpoint_path)
        print("Model restored...")
    else:
        step = 0

    for itr in xrange(step, FLAGS.MAX_ITERATION):
        train_images, train_annotations = data_reader.next_batch(
            FLAGS.batch_size)
        feed_dict = {
            image: train_images,
            annotation: train_annotations,
            keep_probability_conv: 0.85
        }
        sess.run(train_op, feed_dict=feed_dict)

        print(itr)

        if itr % 10 == 0:
            train_loss, summary_str = sess.run([loss], feed_dict=feed_dict)
            print("Step: %d, Train_loss: %g" % (itr, train_loss))

        if itr % 500 == 0 and itr != 0:
            valid_images, valid_annotations, valid_o_annotations = data_reader.get_test_records(
            )
            valid_loss = sess.run(loss,
                                  feed_dict={
                                      image: valid_images,
                                      annotation: valid_annotations,
                                      keep_probability_conv: 1.0
                                  })
            print("%s ---> Validation_loss: %g" %
                  (datetime.datetime.now(), valid_loss))
            saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)
Esempio n. 37
0
def main(_):
    try:
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  # Alway use minimum memory.
        sess = tf.Session(config = config)

        nnet_config = nnet.parse_config(args.nnet_config)
        nnet_config['is_training'] = False

        nnet_type = nnet_config.get('nnet_type')
        left_context = nnet_config.get('left_context')
        right_context = nnet_config.get('right_context')
        subsample = nnet_config.get('subsample')

        filename, tfrecord, input_dim = \
            nnet.dataset_from_tfrecords(
                tfrecords_scp=args.tfrecords_scp,
                left_context=left_context,
                right_context=right_context,
                subsample=subsample,
                shuffle=False,
            )

        if args.objective == 'ctc':
            if nnet_type == 'blstm' or nnet_type == 'cudnnlstm' or nnet_type == 'lstm':
                pipeline_initializer, pipeline = \
                    nnet.create_pipeline_sequence_batch(
                        dataset=tfrecord,
                        input_dim=input_dim,
                        batch_size=args.batch_size,
                        batch_threads=args.batch_threads,
                        num_epochs=1,
                    )
                graph = \
                    nnet.create_graph_for_validation_ctc(
                        pipeline=pipeline,
                        nnet_config=nnet_config,
                    )
            else:
                log = 'unsupported nnet_type: %s' % nnet_type
                tf.logging.fatal(log)
                sys.exit(1)
        else:
            log = 'unsupported objective: %s' % args.objective
            tf.logging.fatal(log)
            sys.exit(1)

        sess.run(pipeline_initializer)
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        success = \
            nnet.validate(
                sess=sess,
                graph=graph,
                evaluate=args.evaluate,
                report_interval=args.report_interval
            )

        saver = tf.train.Saver(tf.trainable_variables())
        tf.logging.info('saving nnet to "%s"', args.nnet_out)
        saver.save(sess, args.nnet_out)

    except KeyboardInterrupt:
        log = 'interrupted by user'
        tf.logging.fatal(log)
        sys.exit(1)
Esempio n. 38
0
def load_model_3(sess3, X_3, tst_3, yhat_3, decoder2_3, encoder2_3, test_x,
                 test_y, test_len):
    """
        Loading the pre-trained model and parameters.
    """
    # global X_3, tst_3, yhat_3,decoder2_3,encoder2_3
    with sess3.as_default():
        with sess3.graph.as_default():
            print("------------------------------load_model_3")
            modelpath = r'AE1/model2/'
            saver = tf.train.import_meta_graph(modelpath + 'model.ckpt.meta')
            saver.restore(sess3, tf.train.latest_checkpoint(modelpath))
            graph = tf.get_default_graph()
            X_3 = graph.get_tensor_by_name("xs:0")
            tst_3 = graph.get_tensor_by_name("ys:0")
            yhat_3 = graph.get_tensor_by_name("cross_entropy:0")
            decoder2_3 = graph.get_tensor_by_name("decoder2:0")
            encoder2_3 = graph.get_tensor_by_name("encoder2:0")
            # decoder1_3=graph.get_tensor_by_name("decoder1:0")
            # encoder1_3=graph.get_tensor_by_name("encoder1:0")
            encoder_h1 = graph.get_tensor_by_name("encoder_h1:0")
            encoder_b1 = graph.get_tensor_by_name("encoder_b1:0")
            encoder_h2 = graph.get_tensor_by_name("encoder_h2:0")
            encoder_b2 = graph.get_tensor_by_name("encoder_b2:0")
            decoder_h1 = graph.get_tensor_by_name("decoder_h1:0")
            decoder_b1 = graph.get_tensor_by_name("decoder_b1:0")
            decoder_h2 = graph.get_tensor_by_name("decoder_h2:0")
            decoder_b2 = graph.get_tensor_by_name("decoder_b2:0")
            weights = tf.Variable(tf.random_normal([128, 10]),
                                  name="weights_out")
            bias = tf.Variable(tf.random_normal([10]), name="bias_out")
            result = tf.nn.softmax(tf.add(tf.matmul(encoder2_3, weights),
                                          bias))
            targ_list = [
                'decoder_h1', 'decoder_h2', 'decoder_b1', 'decoder_b2'
            ]
            var_list = list(tf.trainable_variables())
            trg = list(get_var_list(targ_list, var_list))
            cross_entropy = tf.reduce_mean(-tf.reduce_sum(
                tst_3 * tf.log(tf.clip_by_value(result, 1e-8, 1.0)),
                reduction_indices=[1]),
                                           name='cost')
            train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(
                cross_entropy)
            train_step_ = tf.train.GradientDescentOptimizer(1e-5).minimize(
                yhat_3, var_list=trg)
            saver_ = tf.train.Saver(max_to_keep=1)
            sess3.run(weights.initializer)
            sess3.run(bias.initializer)
            # result_=sess3.run(result,feed_dict={X_3:test_x})
            batch_size = 100
            update_1 = 200
            update_2 = 200
            show = 50
            update_size = 1
            index = 0
            for i in range(update_1):
                index, batch_x, batch_y = fine_update(test_x, test_y,
                                                      update_size, test_len,
                                                      index)
                _, loss_1 = sess3.run([train_step, cross_entropy],
                                      feed_dict={
                                          X_3: batch_x,
                                          tst_3: batch_y
                                      })
                if (i % show == 0):
                    index, batch_x, batch_y = fine_update(
                        test_x, test_y, update_size, test_len, index)
                    start_time = time.time()
                    _, loss_train = sess3.run([train_step, cross_entropy],
                                              feed_dict={
                                                  X_3: batch_x,
                                                  tst_3: batch_y
                                              })
                    duration = time.time() - start_time
                    print('train iter  {}    time is  {}'.format(i, duration))
                    print('parameters numbers is {}'.format(get_num_params()))
                    print('[Train] Step: %d, loss: %4.5f' % (i, loss_train))
            print("decoder_h1:", sess3.run(decoder_h1))
            print("encoder_h1:", sess3.run(encoder_h1))
            for i in range(update_2):
                index, batch_x, batch_y = fine_update(test_x, test_y,
                                                      update_size, test_len,
                                                      index)
                _, loss_1 = sess3.run([train_step_, yhat_3],
                                      feed_dict={
                                          X_3: batch_x,
                                          tst_3: batch_y
                                      })
                if (i % show == 0):
                    index, batch_x, batch_y = fine_update(
                        test_x, test_y, update_size, test_len, index)
                    start_time = time.time()
                    _, loss_fine = sess3.run([train_step_, yhat_3],
                                             feed_dict={
                                                 X_3: batch_x,
                                                 tst_3: batch_y
                                             })
                    duration = time.time() - start_time
                    print('fine iter  {}    time is  {}'.format(i, duration))
                    print('parameters numbers is {}'.format(get_num_params()))
                    print('[fine] Step: %d, loss: %4.5f' % (i, loss_fine))
            print("decoder_h1:", sess3.run(decoder_h1))
            print("encoder_h1:", sess3.run(encoder_h1))
            shutil.rmtree("AE1/model2/")
            saver_.save(sess3, "AE1/model2/model.ckpt")
            # result_=sess3.run(result,feed_dict={X_3:test_x})
            e1_value = encoder_h1.eval()
            e2_value = encoder_h2.eval()
            e3_value = encoder_b1.eval()
            e4_value = encoder_b2.eval()
            e5_value = decoder_h1.eval()
            e6_value = decoder_h2.eval()
            e7_value = decoder_b1.eval()
            e8_value = decoder_b2.eval()
            shutil.rmtree('AE1/retrain_logs1/')
            writer = tf.summary.FileWriter('AE1/retrain_logs1/', sess3.graph)
            print(
                '----------------------------------------------Successfully load the model_3------------------success!'
            )
    return X_3, tst_3, yhat_3, decoder2_3, e1_value, e2_value, e3_value, e4_value, e5_value, e6_value, e7_value, e8_value
Esempio n. 39
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""
    if print_feature:
      tf.logging.info("*** Features ***")
      for name in sorted(features.keys()):
        tf.logging.info(
            "  name = %s, shape = %s" % (name, features[name].shape))

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    global_step = tf.train.get_or_create_global_step()
    ##### Classification objective
    # print("Right at the uda.model_fn!! before the label_ids was reshaped")
    label_ids = features["label_ids"]
    print("label_ids:{}, type:{}".format(label_ids, type(label_ids)))
    label_ids = tf.reshape(label_ids, [-1])

    if unsup_ratio > 0 and "ori_input_ids" in features:
      input_ids = tf.concat([
          features["input_ids"],
          features["ori_input_ids"],
          features["aug_input_ids"]], 0)
      input_mask = tf.concat([
          features["input_mask"],
          features["ori_input_mask"],
          features["aug_input_mask"]], 0)
      input_type_ids = tf.concat([
          features["input_type_ids"],
          features["ori_input_type_ids"],
          features["aug_input_type_ids"]], 0)
    else:
      input_ids = features["input_ids"]
      input_mask = features["input_mask"]
      input_type_ids = features["input_type_ids"]
    (sup_loss, unsup_loss, logits,
     per_example_loss, loss_mask,
     tsa_threshold,
     unsup_loss_mask, correct_label_probs,pooled_output) = create_model(
         bert_config=bert_config,
         is_training=is_training,
         input_ids=input_ids,
         input_mask=input_mask,
         input_type_ids=input_type_ids,
         labels=label_ids,
         num_labels=num_labels,
         use_one_hot_embeddings=use_one_hot_embeddings,
         tsa=tsa,
         unsup_ratio=unsup_ratio,
         global_step=global_step,
         num_train_steps=num_train_steps,
         )

    ##### Aggregate losses into total_loss
    metric_dict = {}

    # number of correct predictions
    predictions = tf.argmax(logits, axis=-1, output_type=label_ids.dtype)
    is_correct = tf.to_float(tf.equal(predictions, label_ids))
    acc = tf.reduce_mean(is_correct)
    # add sup. metrics to dict
    metric_dict["sup/loss"] = sup_loss
    metric_dict["sup/accu"] = acc
    metric_dict["sup/correct_cat_probs"] = correct_label_probs
    metric_dict["sup/tsa_threshold"] = tsa_threshold

    metric_dict["sup/sup_trained_ratio"] = tf.reduce_mean(loss_mask)
    total_loss = sup_loss

    if unsup_ratio > 0 and uda_coeff > 0 and "input_ids" in features:
      total_loss += uda_coeff * unsup_loss
      metric_dict["unsup/loss"] = unsup_loss

    if unsup_loss_mask is not None:
      metric_dict["unsup/high_prob_ratio"] = tf.reduce_mean(unsup_loss_mask)

    ##### Initialize variables with pre-trained models
    tvars = tf.trainable_variables()

    scaffold_fn = None
    if init_checkpoint:
      (assignment_map,
       initialized_variable_names) = get_assignment_map_from_checkpoint(
           tvars, init_checkpoint)
      if use_tpu:
        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
    else:
      initialized_variable_names = {}

    # The trainable Variables are all from the BERT model?
    if print_structure:
      tf.logging.info("**** Trainable Variables ****")
      for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
          init_string = ", *INIT_FROM_CKPT*"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                        init_string)

    ##### Construct TPU Estimator Spec based on the specific mode
    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      ## Create optimizer for training
      train_op, curr_lr = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps,
          use_tpu, clip_norm, global_step)
      metric_dict["learning_rate"] = curr_lr

      ## Create host_call for training
      host_call = tpu_utils.construct_scalar_host_call(
          metric_dict=metric_dict,
          model_dir=params["model_dir"],
          prefix="training/",
          reduce_fn=tf.reduce_mean)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          host_call=host_call,
          scaffold_fn=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.EVAL:
      def clas_metric_fn(per_example_loss, label_ids, logits):
        ## classification loss & accuracy
        loss = tf.metrics.mean(per_example_loss)
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(label_ids, predictions)
        # print("acc:{}".format(accuracy))
        num_labels = len(label_list)
        y_true = label_ids
        y_pred = tf.argmax(logits, 1)
        recall_n = [0] * num_labels
        precision_n = [0] * num_labels
        update_op_rec_n = [[]] * num_labels
        update_op_pre_n = [[]] * num_labels
        for k in range(num_labels):
            recall_n[k] = tf.metrics.recall(
                labels=tf.equal(y_true, k),
                predictions=tf.equal(y_pred, k)
            )
            precision_n[k] = tf.metrics.precision(
                labels=tf.equal(y_true, k),
                predictions=tf.equal(y_pred, k)
            )
            # precision_n_max[k] = max(precision_n_max[k], precision_n[k])
            # print("recall_{}_max:\t{}".format(k, recall_n_max[k]))
            # print("precision_{}_max:\t{}".format(k, precision_n_max[k]))
        # recall_value = sum(recall_n) * 1.0 / num_labels
        # precision_value = sum(precision_n) * 1.0 / num_labels
        # update_op_rec = sum(update_op_rec_n) * 1.0 / num_labels
        # update_op_pre = sum(update_op_pre_n) * 1.0 / num_labels
        # recall = (recall_value, update_op_rec)
        # precision = (precision_value, update_op_pre)
        metric_map = {}
        for i in range(len(label_list)):
            metric_map['recall_{}'.format(label_list[i])] = recall_n[i]
        for i in range(len(label_list)):
            metric_map['precision_{}'.format(label_list[i])] = precision_n[i]
        metric_map["eval_classify_accuracy"] = accuracy
        metric_map["eval_loss"] = loss
        return metric_map
        # return {
        #     "eval_classify_accuracy": accuracy,
        #     "recall_0": recall_n[0],
        #     "recall_1": recall_n[1],
        #     "recall_2": recall_n[2],
        #     "recall_3": recall_n[3],
        #     "recall_4": recall_n[4],
        #     "eval_loss": loss,
        # }
      ## eval_metricsis a tuple of metric_fn and tensors
      eval_metrics = (clas_metric_fn, [per_example_loss, label_ids,logits])

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)

    else:
      print("i'm in the estimator predictions block!!!!")
      probabilities = tf.nn.softmax(logits, axis=-1)
      prediction_result = {
        'pooled_output': pooled_output,
        'probabilities': probabilities,
      }
      print("type: {}, value: {}".format(type(probabilities), probabilities))
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions=prediction_result,
          scaffold_fn=scaffold_fn)

    return output_spec
Esempio n. 40
0
def main():
    for k, v in a._get_kwargs():
        print(k, "=", v)

    with open(os.path.join(a.output_dir, "options.json"), "w") as f:
        f.write(json.dumps(vars(a), sort_keys=True, indent=4))

    # no need to load options from options.json
    loader = Loader(a.batch_size)

    train_cnn = CNN(loader.height, loader.width, loader.depth)
    train_cnn.build_graph(False, True)

    val_cnn = CNN(loader.height, loader.width, loader.depth)
    val_cnn.build_graph(True, False)

    with tf.name_scope("parameter_count"):
        parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])

    saver = tf.train.Saver(max_to_keep=50)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print("parameter_count =", sess.run(parameter_count))

        if a.checkpoint is not None:
            print("loading model from checkpoint")
            checkpoint = tf.train.latest_checkpoint(a.checkpoint)
            saver.restore(sess, checkpoint)

        if a.mode == 'test':
            draw(sess, val_cnn, os.path.join(a.output_dir, 'test.jpg'))
        else:
            # training
            start = time.time()
            for epoch in range(a.max_epochs):
                def should(freq):
                    return freq > 0 and ((epoch + 1) % freq == 0 or epoch == a.max_epochs - 1)

                fetches = {
                    "train": train_cnn.optimize,
                    "loss": train_cnn.loss
                }

                training_loss = 0
                for _ in range(loader.ntrain):
                    X, y = loader.next_batch(0)
                    results = sess.run(fetches, {train_cnn.input: X, train_cnn.target: y})
                    training_loss += results['loss']
                training_loss /= loader.ntrain

                if should(a.validation_freq):
                    print('validating model')
                    validation_loss = 0
                    for _ in range(loader.nval):
                        X, y = loader.next_batch(1)
                        loss = sess.run(val_cnn.loss, {val_cnn.input: X, val_cnn.target: y})
                        validation_loss += loss
                    validation_loss /= loader.nval

                if should(a.summary_freq):
                    print("recording summary")
                    with open(os.path.join(a.output_dir, 'loss_record.txt'), "a") as loss_file:
                        loss_file.write("%s\t%s\t%s\n" % (epoch, training_loss, validation_loss))

                if should(a.progress_freq):
                    rate = (epoch + 1) / (time.time() - start)
                    remaining = (a.max_epochs - 1 - epoch) / rate
                    print("progress  epoch %d  remaining %dh" % (epoch, remaining / 3600))
                    print("training loss", training_loss)

                if should(a.save_freq):
                    print("saving model")
                    saver.save(sess, os.path.join(a.output_dir, "model"), global_step=epoch)
Esempio n. 41
0
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name="x-input")
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input")
    # 生成隐藏层的参数。
    weights1 = tf.Variable(
        tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层的参数
    weights2 = tf.Variable(
        tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    # 计算在当前参数下神经网络的前向传播结果,这里给出用于计算滑动平均的类为None,所以函数不会使用参数的滑动平均
    y = inference(input_tensor=x,
                  avg_class=None,
                  weights1=weights1,
                  biases1=biases1,
                  weights2=weights2,
                  biases2=biases2)

    # 定义存储训练轮数的变量,这个变量不需要计算滑动平均值,所以这里指定这个变量为不可训练变量,(trainable=False)
    # 在使用TensorFlow训练神经网络时,一般会将代表训练轮数的变量指定为不可训练变量
    global_step = tf.Variable(0, trainable=False)

    # 给定滑动平均衰减率和训练轮数变量,初始化滑动平均类,给定训练轮数变量,可以加快训练早期变量的更新速度
    variable_average = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)

    # 在所有代表神经网络参数的变量上使用滑动平均,其他辅助变量,比如global_step,就不需要了。
    # tf.trainable_variables返回的就是图上的集合GraphKeys.TRAINABLE_VARIABLES中的元素,
    # 这个集合的元素就是所有没有指定trainable = Fales的元素
    variables_averages_op = variable_average.apply(tf.trainable_variables())

    # 计算使用了滑动平均之后的前向传播结果,滑动平均不会改变变量本身的取值,而是会维护一个影子变量来记录其滑动平均值。
    # 所以当需要这个滑动平均值时,需要明确调用average函数
    average_y = inference(input_tensor=x,
                          avg_class=variable_average,
                          weights1=weights1,
                          biases1=biases1,
                          weights2=weights2,
                          biases2=biases2)

    # 计算交叉熵来作为刻画预测和真实值之间差距的损失函数,这里使用了TensorFlow中提供的sparse_softmax_cross_entropy_with_logits
    # 函数来计算交叉熵,当分类问题只有一个正确答案时,可以使用这个函数来加速交叉熵的计算,MNIST问题的图片中只包含了0-9中的一个数字,所以
    # 可以使用这个函数来计算交叉熵损失,这个函数的第一个参数是神经网络中不包含softmax层的前向传播结果,第二个是训练数据的正确答案,因为标准
    # 答案是一个长度为10的一维数组,而该函数需要提供的是一个正确答案的数字,所以需要使用tf.argmmax函数来得到正确答案的类别编号
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=y, labels=tf.argmax(y_, 1))

    # 计算在当前batch中所有样例的交叉熵平均值
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    # 计算L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    # 计算模型的正则化损失,一般只计算神经网络边上权重的正则化损失,而不使用偏置项
    regularization = regularizer(weights1) + regularizer(weights2)
    # 总损失等于交叉熵损失与正则化损失的和
    loss = cross_entropy_mean + regularization

    # 设置指数衰减的学习率
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,  # 基础的学习率,随着迭代的进行,更新变量时使用的学习率在这个基础上递减
        global_step,  # 当前迭代的轮数
        mnist.train.num_examples / BATCH_SIZE,  # 过完所有训练数据需要的迭代次数
        LEARNING_RATE_DECAY  # 学习率的衰减速度
    )

    # 使用tf.train.GradientDescentOptimizer优化算法来优化损失函数,请注意这里损失函数包含了交叉熵损失和L2正则化损失
    train_step = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).\
                          minimize(loss=loss, global_step=global_step)

    # 在训练神经网络模型时,每过一遍数据既需要通过反向传播来更新神经网络中的参数,又要更新每一个参数的滑动平均,为了一次完成多个
    # 操作,TensorFlow提供了tf.control_dependencies 和 tf.group两种机制,下面两行程序和
    # train_op = tf.group(train_step, variables_averages_op)是等价的
    # with tf.control_dependencies([train_step, variables_averages_op]):
    #     train_op = tf.no_op(name="train")
    train_op = tf.group(train_step, variables_averages_op)
    # 检验使用了滑动平均模型的神经网络前向传播是否正确,tf.argmax(average_y, 1)计算每一个样例的预测答案,其中average_y是一个
    # batch_size * 10的二维数组,每一个表示一个样例的前向传播结果,tf.argmax的第二个参数“1”,表示选取最大值的操作仅在第一个
    # 维度中进行,也就是说只在第一行选取最大值对应的下标,于是得到的结果是一个长度为batch的一维数组,这个一维数组的值表示了每一个样例
    # 对应的数字识别结果。tf.equal判断两个张量的每一维是否相等,如果相等返回True,否则返回False
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))

    # 这个运算首先将bool值的转为实数,然后计算平均值,这个平均值就是模型在这一组数据上的正确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 初始化会话,开始训练过程
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # 准备验证数据,一般在神经网络训练过程中会通过验证数据来大致判断停止的条件和p判断训练的效果
        validate_feed = {
            x: mnist.validation.images,
            y_: mnist.validation.labels
        }
        # 准备测试数据,在真实的应用中,这部分数据在训练时是不可见的,这个数据只是作为模型优劣势的最后评价标准
        test_feed = {x: mnist.test.images, y_: mnist.test.labels}

        # 迭代的训练神经网络
        for i in range(TRAINING_STEPS):
            # 每1000轮输出一次在验证集上的测试结果
            if i % 1000 == 0:
                # 计算滑动平均模型在验证数据集上的结果,因为mnist数据集比较小,所以一次可以处理所有的验证数据,
                # 为了计算方便,本样例程序没有将验证数据集分为更小的 bitch。当神经网络模型比较复杂或者验证
                # 数据比较大时,太大的batch会导致计算时间过长甚至发生内存溢出的错误。
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print(
                    "After %d trainingg step(s), validation accuracy, using average model is %g"
                    % (i, validate_acc))

            # 产生这一轮使用的一个batch_size的数据,并运行训练过程
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x: xs, y_: ys})

        # 在训练结束后,在测试数据集上检测神经网络模型的最终正确率
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print(
            "After %d trainingg step(s), test accuracy, using average model is %g"
            % (TRAINING_STEPS, test_acc))
Esempio n. 42
0
z_ass = z.assign(z_plhdr)

reconstructed = Generator(z)

feature_residual = tf.reduce_sum(tf.abs(
    tf.subtract(Discriminator(real, signature='feature_match'),
                Discriminator(reconstructed, signature='feature_match'))),
                                 axis=[1, 2, 3])

residual = tf.reduce_sum(tf.abs(tf.subtract(real, reconstructed)), axis=1)

anomaly_weight = 0.05
anomaly_score = (1 -
                 anomaly_weight) * residual + anomaly_weight * feature_residual

t_vars = tf.trainable_variables()
params = [var for var in t_vars if 'ano_z' in var.name]

optimizer = tf.train.AdamOptimizer(
    learning_rate=1e-1,  #1e-4,
    beta1=0.4,
    beta2=0.9).minimize(anomaly_score, var_list=params)

#hfile = lib.datautils.load(imarr_fn, dataset=None)
#fim = h5py.File(imarr_fn,"r")
if not os.path.isfile(result_fn):
    print(f"Making new result file at {result_fn}")
    fres = h5py.File(result_fn, "w")
    #fres.create_dataset('idxs', data=fim['idxs'])
    #fres.create_dataset('object_ids', data=fim['object_ids'])
    fres.create_dataset('idxs', (0, ),
Esempio n. 43
0
    def __init__(self, is_training, config, use_fp16=False):
        self._data_type = tf.float16 if use_fp16 else tf.float32

        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=1.0, state_is_tuple=True)
        if is_training and config.keep_prob < 1:
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers, state_is_tuple=True)

        self._initial_state = cell.zero_state(batch_size, self._data_type)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                "embedding", [vocab_size, size], dtype=self._data_type)
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        inputs = [tf.squeeze(input_, [1])
                  for input_ in tf.split(1, num_steps, inputs)]
        outputs, state = tf.nn.rnn(cell, inputs, initial_state=self._initial_state)
        # outputs = []
        # state = self._initial_state
        # with tf.variable_scope("RNN"):
        #     for time_step in range(num_steps):
        #         if time_step > 0: tf.get_variable_scope().reuse_variables()
        #         (cell_output, state) = cell(inputs[:, time_step, :], state)
        #         outputs.append(cell_output)
        ##############################################################################

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        softmax_w = tf.get_variable(
            "softmax_w", [size, vocab_size], dtype=self._data_type)
        softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=self._data_type)
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits],
            [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps], dtype=self._data_type)])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        self.probabilities = tf.nn.softmax(logits)
        self.logits = logits

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

        self._new_lr = tf.placeholder(
            tf.float32, shape=[], name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
def train(dataset):
    # 定义输入输出placeholder
    x = tf.placeholder(tf.float32, [None, inference.INPUT_NODE],
                       name='x-input')
    y_ = tf.placeholder(tf.float32, [None, inference.OUTPUT_NODE],
                        name='y-input')

    # 神经网络优化
    # 正则化类
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZER_RATE)

    y = inference.inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)

    # 滑动平均模型
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())

    # softmax交叉熵
    cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y,
                                                            labels=y_)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    # 损失函数
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))

    # 指数衰减学习率
    DECAY_STEPS = int(STEPS * len(dataset['train_x']) / BATCH_SIZE)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step,
                                               DECAY_STEPS,
                                               LEARNING_RATE_DECAY)

    # 梯度下降法
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss, global_step=global_step)

    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name='train')

    # 准确率
    prediction = tf.cast(tf.sigmoid(y) > 0.5, tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, y_), tf.float32))

    # 初始化Tensorflow持久化类
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for i in range(STEPS):
            index = np.random.permutation(len(dataset['train_y_']))
            xs = dataset['train_x'].take(index)
            ys = dataset['train_y_'][index]
            for j in range(len(dataset['train_y_']) // 100 + 1):
                start = j * BATCH_SIZE
                end = start + BATCH_SIZE
                sess.run(train_op,
                         feed_dict={
                             x: xs[start:end],
                             y_: ys[start:end]
                         })
            if i % 1000 == 0:
                step, loss_value, accuracy_value = sess.run(
                    [global_step, loss, accuracy],
                    feed_dict={
                        x: xs[start:end],
                        y_: ys[start:end]
                    })
                print(
                    "After %d training steps, loss on training batch is %f, accuracy is %f"
                    % (step, loss_value, accuracy_value))
                saver.save(sess,
                           os.path.join(MODEL_SAVE_PATH, MODEL_NAME),
                           global_step=global_step)
Esempio n. 45
0
                input_tensor=input_tensor, name='discriminative_inference')
            fc_out_r, attention_mask_r, fc2_r = self.build(
                input_tensor=label_tensor,
                name='discriminative_inference',
                reuse=True)

            l_map = tf.losses.mean_squared_error(attention_map, attention_mask_o) + \
                    tf.losses.mean_squared_error(attention_mask_r, zeros_mask)

            entropy_loss = -tf.log(fc_out_r) - tf.log(
                -tf.subtract(fc_out_o, tf.constant(1.0, tf.float32)))
            entropy_loss = tf.reduce_mean(entropy_loss)

            loss = entropy_loss + 0.05 * l_map

            return fc_out_o, loss


if __name__ == '__main__':
    input_image = tf.placeholder(dtype=tf.float32, shape=[32, 896, 896, 3])
    attention_map = tf.placeholder(dtype=tf.float32, shape=[32, 896, 896, 1])
    label_image = tf.placeholder(dtype=tf.float32, shape=[32, 896, 896, 3])
    net = DiscriminativeNet(phase='train')
    loss = net.compute_loss(input_tensor=input_image,
                            label_tensor=label_image,
                            attention_map=attention_map,
                            name='test')

    for vv in tf.trainable_variables():
        print(vv.name)
    def init_model(self):
        #if not os.path.exists(self.modelPath) or os.listdir(self.modelPath) == []:
        # Create the whole training graph
        self.realX = tf.placeholder(tf.float32, [None, self.imgDim, self.imgDim, 3], name="realX")
        self.realLabels = tf.placeholder(tf.float32, [None, self.numClass], name="realLabels")
        self.realLabelsOneHot = tf.placeholder(tf.float32, [None, self.imgDim, self.imgDim, self.numClass], name="realLabelsOneHot")
        self.fakeLabels = tf.placeholder(tf.float32, [None, self.numClass], name="fakeLabels")
        self.fakeLabelsOneHot = tf.placeholder(tf.float32, [None, self.imgDim, self.imgDim, self.numClass], name="fakeLabelsOneHot")
        self.alphagp = tf.placeholder(tf.float32, [], name="alphagp")


        # Initialize the generator and discriminator
        self.Gen = Generator()
        self.Dis = Discriminator()



        # -----------------------------------------------------------------------------------------
        # -----------------------------------Create D training pipeline----------------------------
        # -----------------------------------------------------------------------------------------

        # Create fake image
        self.fakeX = self.Gen.recForward(self.realX, self.fakeLabelsOneHot)
        YSrc_real, YCls_real = self.Dis.forward(self.realX)
        YSrc_fake, YCls_fake = self.Dis.forward(self.fakeX)

        YCls_real = tf.squeeze(YCls_real)  # remove void dimensions
        self.d_loss_real = - tf.reduce_mean(YSrc_real)
        self.d_loss_fake = tf.reduce_mean(YSrc_fake)
        self.d_loss_cls = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.realLabels,logits=YCls_real, name="d_loss_cls")) / self.batchSize




        #TOTAL LOSS
        self.d_loss = self.d_loss_real + self.d_loss_fake + self.lambdaCls * self.d_loss_cls #+ self.d_loss_gp
        vars = tf.trainable_variables()
        self.d_params = [v for v in vars if v.name.startswith('Discriminator/')]
        train_D = tf.train.AdamOptimizer(learning_rate=self.learningRateD, beta1=0.5, beta2=0.999)
        self.train_D_loss = train_D.minimize(self.d_loss, var_list=self.d_params)
        # gvs = self.train_D.compute_gradients(self.d_loss, var_list=self.d_params)
        # capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
        # self.train_D_loss = self.train_D.apply_gradients(capped_gvs)

        #-------------GRADIENT PENALTY---------------------------
        interpolates = self.alphagp * self.realX + (1 - self.alphagp) * self.fakeX
        out,_ = self.Dis.forward(interpolates)
        gradients = tf.gradients(out, [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1,2,3]))
        _gradient_penalty = tf.reduce_mean(tf.square(slopes - 1.0))
        self.d_loss_gp   = self.lambdaGp * _gradient_penalty
        self.train_D_gp = train_D.minimize(self.d_loss_gp, var_list=self.d_params)
        # gvs = self.train_D.compute_gradients(self.d_loss_gp)
        # capped_gvs = [(ClipIfNotNone(grad), var) for grad, var in gvs]
        # self.train_D_gp = self.train_D.apply_gradients(capped_gvs)
        #-------------------------------------------------------------------------------

        #-----------------accuracy--------------------------------------------------------------
        YCls_real_sigmoid = tf.sigmoid(YCls_real)
        predicted = tf.to_int32(YCls_real_sigmoid > 0.5)
        labels = tf.to_int32(self.realLabels)
        correct = tf.to_float(tf.equal(predicted, labels))
        hundred = tf.constant(100.0)
        self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), axis=0) * hundred
        #--------------------------------------------------------------------------------------


        #CLIP D WEIGHTS
        #self.clip_D = [p.assign(tf.clip_by_value(p, -self.clipD, self.clipD)) for p in self.d_params]


        # -----------------------------------------------------------------------------------------
        # ----------------------------Create G training pipeline-----------------------------------
        # -----------------------------------------------------------------------------------------
        #original to target and target to original domain
        #self.fakeX = self.Gen.recForward(self.realX, self.fakeLabelsOneHot)
        rec_x = self.Gen.recForward(self.fakeX,self.realLabelsOneHot)

        # compute losses
        #out_src, out_cls = self.Dis.forward(self.fakeX)
        self.g_loss_adv = - tf.reduce_mean(YSrc_fake)
        self.g_loss_cls = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.fakeLabels,logits=tf.squeeze(YCls_fake))) / self.batchSize

        self.g_loss_rec = tf.reduce_mean(tf.abs(self.realX - rec_x))
        # total G loss and optimize
        self.g_loss = self.g_loss_adv + self.lambdaCls * self.g_loss_cls + self.lambdaRec * self.g_loss_rec
        train_G = tf.train.AdamOptimizer(learning_rate=self.learningRateG, beta1=0.5, beta2=0.999)
        self.g_params = [v for v in vars if v.name.startswith('Generator/')]

        self.train_G_loss = train_G.minimize(self.g_loss, var_list=self.g_params)
        # gvs = self.train_G.compute_gradients(self.g_loss)
        # capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
        # self.train_G_loss = self.train_G.apply_gradients(capped_gvs)

        #TF session

        self.saver = tf.train.Saver()
        self.init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(self.init)


        #restore model if it exists
        if os.listdir(self.modelPath) == []:
            self.init = tf.global_variables_initializer()
            self.sess = tf.Session()
            self.sess.run(self.init)
            self.epoch_index = 1
            self.picture = 0


        else:
            self.sess = tf.Session()
            #self.saver = tf.train.import_meta_graph(os.path.join(self.modelPath, "model49999_1.meta"))
            checkpoint = tf.train.latest_checkpoint(self.modelPath)
            self.saver.restore(self.sess, checkpoint)
            #-------------------------------------------------------------------------------------------

            model_info = checkpoint.split("model/model",1)[1].split("_",1)
            self.picture = int(model_info[0])
            self.epoch_index = int(model_info[1])
Esempio n. 47
0
    def build_model(self):
        if self.y_dim:
            self.y = tf.placeholder(tf.float32, [self.batch_size, self.y_dim],
                                    name='y')

        if self.is_crop:
            image_dims = [self.output_height, self.output_width, self.c_dim]
        else:
            image_dims = [self.input_height, self.input_width, self.c_dim]

        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + image_dims,
                                     name='real_images')
        self.sample_inputs = tf.placeholder(tf.float32,
                                            [self.sample_num] + image_dims,
                                            name='sample_inputs')

        inputs = self.inputs
        sample_inputs = self.sample_inputs
        '''
        Possible way of change z's dimension
        '''
        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        self.z_sum = histogram_summary("z", self.z)

        if self.y_dim:
            self.G = self.generator(self.z, self.y)
            self.D, self.D_logits = self.discriminator(inputs, self.y)

            self.sampler = self.sampler(self.z, self.y)
            self.D_, self.D_logits_ = self.discriminator(self.G,
                                                         self.y,
                                                         reuse=True)
        else:
            self.G = self.generator(self.z)
            self.D, self.D_logits = self.discriminator(inputs)

            self.sampler = self.sampler(self.z)
            self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True)

        self.d_sum = histogram_summary("d", self.D)
        self.d__sum = histogram_summary("d_", self.D_)
        self.G_sum = image_summary("G", self.G)

        def sigmoid_cross_entropy_with_logits(x, y):
            try:
                return tf.nn.sigmoid_cross_entropy_with_logits(logits=x,
                                                               labels=y)
            except:
                return tf.nn.sigmoid_cross_entropy_with_logits(logits=x,
                                                               targets=y)

        self.d_loss_real = tf.reduce_mean(
            sigmoid_cross_entropy_with_logits(self.D_logits,
                                              tf.ones_like(self.D)))
        self.d_loss_fake = tf.reduce_mean(
            sigmoid_cross_entropy_with_logits(self.D_logits_,
                                              tf.zeros_like(self.D_)))
        self.g_loss = tf.reduce_mean(
            sigmoid_cross_entropy_with_logits(self.D_logits_,
                                              tf.ones_like(self.D_)))

        self.d_loss_real_sum = scalar_summary("d_loss_real", self.d_loss_real)
        self.d_loss_fake_sum = scalar_summary("d_loss_fake", self.d_loss_fake)

        self.d_loss = self.d_loss_real + self.d_loss_fake

        self.g_loss_sum = scalar_summary("g_loss", self.g_loss)
        self.d_loss_sum = scalar_summary("d_loss", self.d_loss)

        t_vars = tf.trainable_variables()

        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]

        # Wasserstein-GAN
        # self.d_loss_real = tf.reduce_mean(self.D_logits)
        # self.d_loss_fake = tf.reduce_mean(self.D_logits_)
        # self.g_loss = -tf.reduce_mean(self.D_logits_)
        # self.d_loss = self.d_loss_real - self.d_loss_fake

        self.saver = tf.train.Saver()
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        #label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits, predictsDict,accuracy) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)
        predictsDict["input_mask"] = input_mask
        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()
                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            #logging_hook = tf.train.LoggingTensorHook({"loss": total_loss}, every_n_iter=1000)
            tf.summary.scalar('loss',total_loss)
            tf.summary.scalar('accuracy',accuracy[1])
            #log_dir = './ner_tensorboard'+'/run'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
            log_dir = './ner_tensorboard'
            summary_hook = tf.train.SummarySaverHook(
                save_steps=100,
                output_dir=log_dir,
                summary_op=tf.summary.merge_all())
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[summary_hook],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            
            def metric_fn(per_example_loss, label_ids, logits, num_labels):
            # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                precision = tf_metrics.precision(label_ids,predictions,num_labels,[1,2],average="macro")
                recall = tf_metrics.recall(label_ids,predictions,num_labels,[1,2],average="macro")
                f = tf_metrics.f1(label_ids,predictions,num_labels,[1,2],average="macro")
                #
                return {
                    "eval_precision":precision,
                    "eval_recall":recall,
                    "eval_f": f,
                    #"eval_loss": loss,
                }
            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, num_labels])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode = mode, predictions = predictsDict, scaffold_fn = scaffold_fn
            )
        return output_spec
Esempio n. 49
0
    def _make_graph(self):
        self.logger.info("Generating training graph on {} GPUs ...".format(self.cfg.nr_gpus))

        weights_initializer = slim.xavier_initializer()
        biases_initializer = tf.constant_initializer(0.)
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(self.cfg.weight_decay)

        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(self.cfg.nr_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i) as name_scope:
                        # Force all Variables to reside on the CPU.
                        with slim.arg_scope([slim.model_variable, slim.variable], device='/device:CPU:0'):
                            with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                                                 slim.conv2d_transpose, slim.separable_conv2d,
                                                 slim.fully_connected],
                                                weights_regularizer=weights_regularizer,
                                                biases_regularizer=biases_regularizer,
                                                weights_initializer=weights_initializer,
                                                biases_initializer=biases_initializer):
                                # loss over single GPU
                                self.net.make_network(is_train=True)
                                if i == self.cfg.nr_gpus - 1:
                                    loss = self.net.get_loss(include_wd=True)
                                else:
                                    loss = self.net.get_loss()
                                self._input_list.append( self.net.get_inputs() )

                        tf.get_variable_scope().reuse_variables()

                        if i == 0:
                            if self.cfg.nr_gpus > 1 and self.cfg.bn_train is True:
                                self.logger.warning("BN is calculated only on single GPU.")
                            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, name_scope)
                            with tf.control_dependencies(extra_update_ops):
                                grads = self._optimizer.compute_gradients(loss)
                        else:
                            grads = self._optimizer.compute_gradients(loss)
                        final_grads = []
                        with tf.variable_scope('Gradient_Mult') as scope:
                            for grad, var in grads:
                                scale = 1.
                                if self.cfg.double_bias and '/biases:' in var.name:
                                    scale *= 2.
                                if not np.allclose(scale, 1.):
                                    grad = tf.multiply(grad, scale)
                                final_grads.append((grad, var))
                        tower_grads.append(final_grads)

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        if False:
            variable_averages = tf.train.ExponentialMovingAverage(0.9999)
            variables_to_average = (tf.trainable_variables() + tf.moving_average_variables())
            variables_averages_op = variable_averages.apply(variables_to_average)

            apply_gradient_op = self._optimizer.apply_gradients(grads)
            train_op = tf.group(apply_gradient_op, variables_averages_op, *extra_update_ops)
        else:
            apply_gradient_op = self._optimizer.apply_gradients(grads)
            train_op = tf.group(apply_gradient_op, *extra_update_ops)

        return train_op
Esempio n. 50
0
biases2 = tf.get_variable("biases2", [layer_node_num[1]], initializer=tf.constant_initializer(0.0))
layer2 = tf.nn.relu(tf.matmul(layer1, weights2) + biases2)

weights3 = get_weight_variable("weights3", [layer_node_num[1], layer_node_num[2]],regularizer)
biases3 = tf.get_variable("biases3", [layer_node_num[2]], initializer=tf.constant_initializer(0.0))
layer3 = tf.nn.tanh(tf.matmul(layer2, weights3) + biases3)

weights_out = get_weight_variable("weights_out",[layer_node_num[2], output_num], regularizer)
biases_out = tf.get_variable("biases_out", [output_num], initializer=tf.constant_initializer(0.0))
layer_out = tf.matmul(layer3, weights_out) + biases_out
y = layer_out

global_step = tf.Variable(0, trainable=False)

variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
learning_rate = tf.train.exponential_decay(
    LEARNING_RATE_BASE,
    global_step,
    TRAIN_DATA_SIZZE / BATCH_SIZE, LEARNING_RATE_DECAY,
    staircase=True)
    
# Optimizer 
# GradientDescentOptimizer 
# AdagradOptimizer 
# AdagradDAOptimizer 
# MomentumOptimizer 
# AdamOptimizer 
Esempio n. 51
0
def main(argv=None):
    keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
    image = tf.placeholder(tf.float32,
                           shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1],
                           name="input_image")
    annotation = tf.placeholder(tf.int32,
                                shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1],
                                name="annotation")

    pred_annotation, logits = inference(image, keep_probability)
    tf.summary.image("input_image", image, max_outputs=2)
    tf.summary.image("ground_truth",
                     tf.cast(annotation, tf.uint8),
                     max_outputs=2)
    tf.summary.image("pred_annotation",
                     tf.cast(pred_annotation, tf.uint8),
                     max_outputs=2)
    loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits,
        labels=tf.squeeze(annotation, squeeze_dims=[3]),
        name="entropy")))
    tf.summary.scalar("entropy", loss)

    trainable_var = tf.trainable_variables()
    if FLAGS.debug:
        for var in trainable_var:
            utils.add_to_regularization_and_summary(var)
    train_op = train(loss, trainable_var)

    print("Setting up summary op...")
    summary_op = tf.summary.merge_all()

    print("Setting up image reader...")
    train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir)
    print(len(train_records))
    print(len(valid_records))

    print("Setting up dataset reader")
    image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
    if FLAGS.mode == 'train':
        train_dataset_reader = dataset.BatchDatset(train_records,
                                                   image_options)
    validation_dataset_reader = dataset.BatchDatset(valid_records,
                                                    image_options)

    sess = tf.Session()

    print("Setting up Saver...")
    saver = tf.train.Saver()
    summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph)

    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt.model_checkpoint_path = 'logs/model.ckpt-1000'
        print(ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
        print("Model restored...")

    if FLAGS.mode == "train":
        for itr in xrange(MAX_ITERATION):
            train_images, train_annotations = train_dataset_reader.next_batch(
                FLAGS.batch_size)
            feed_dict = {
                image: train_images,
                annotation: train_annotations,
                keep_probability: 0.85
            }

            sess.run(train_op, feed_dict=feed_dict)

            if itr % 10 == 0:
                train_loss, summary_str = sess.run([loss, summary_op],
                                                   feed_dict=feed_dict)
                print("Step: %d, Train_loss:%g" % (itr, train_loss))
                summary_writer.add_summary(summary_str, itr)

            if itr % 500 == 0:
                valid_images, valid_annotations = validation_dataset_reader.next_batch(
                    FLAGS.batch_size)
                valid_loss = sess.run(loss,
                                      feed_dict={
                                          image: valid_images,
                                          annotation: valid_annotations,
                                          keep_probability: 1.0
                                      })
                print("%s ---> Validation_loss: %g" %
                      (datetime.datetime.now(), valid_loss))
                saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)

    elif FLAGS.mode == "visualize":
        valid_images, valid_annotations = validation_dataset_reader.get_random_batch(
            FLAGS.batch_size)
        pred = sess.run(pred_annotation,
                        feed_dict={
                            image: valid_images,
                            annotation: valid_annotations,
                            keep_probability: 1.0
                        })
        valid_annotations = np.squeeze(valid_annotations, axis=3)
        pred = np.squeeze(pred, axis=3)

        for itr in range(FLAGS.batch_size):
            utils.save_image(valid_images[itr].astype(np.uint8),
                             FLAGS.logs_dir,
                             name="inp_" + str(5 + itr))
            utils.save_image(valid_annotations[itr].astype(np.uint8),
                             FLAGS.logs_dir,
                             name="gt_" + str(5 + itr))
            utils.save_image(pred[itr].astype(np.uint8),
                             FLAGS.logs_dir,
                             name="pred_" + str(5 + itr))
            print("Saved image: %d" % itr)
Esempio n. 52
0
sess=tf.Session(config=config)


# Compute your softmax cross entropy loss
net_input = tf.placeholder(tf.float32,shape=[None,None,None,3])
net_output1 = tf.placeholder(tf.float32,shape=[None,None,None,num_classes])
net_output2 = tf.placeholder(tf.float32,shape=[None,None,None,num_classes])

network1, init_fn = model_builder.build_model(model_name=args.model, net_input=net_input, num_classes=num_classes, crop_width=args.crop_width, crop_height=args.crop_height, is_training=True)

loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network1, labels=net_output1))
loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network1, labels=net_output2))
loss = 0.5*loss1+0.5*loss2

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 
opt = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss, var_list=[var for var in tf.trainable_variables()])
#opt = tf.train.RMSPropOptimizer(learning_rate=0.00005, decay=0.995).minimize(loss, var_list=[var for var in tf.trainable_variables()])
opt = tf.group([opt, update_ops])

saver=tf.train.Saver(max_to_keep=1000)
sess.run(tf.global_variables_initializer())

ckpt_path = args.ckpt
model_checkpoint_name = ckpt_path+"/latest_model_" + args.model + "_" + args.dataset + ".ckpt"
if args.save_first_ckpt:
    import gc
    print("Saving the first checkpoint")
    first_ckpt_path = args.first_ckpt
    saver.save(sess, first_ckpt_path+"/latest_model_" + args.model + "_" + args.dataset + ".ckpt")
    gc.collect()
    exit()
Esempio n. 53
0
    def build_model(self):

        if self.crop:
            image_dims = [self.output_height, self.output_width, self.c_dim]
        else:
            image_dims = [self.output_height, self.output_width, self.c_dim]

        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + image_dims,
                                     name='real_images')

        inputs = self.inputs

        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        self.z_sum = histogram_summary("z", self.z)

        self.G = self.generator(self.z)
        self.D, self.D_logits = self.discriminator(inputs, reuse=False)
        self.sampler = self.sampler(self.z)
        self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True)

        self.d_sum = histogram_summary("d", self.D)
        self.d__sum = histogram_summary("d_", self.D_)
        self.G_sum = image_summary("G", self.G)

        def sigmoid_cross_entropy_with_logits(x, y):
            try:
                return tf.nn.sigmoid_cross_entropy_with_logits(logits=x,
                                                               labels=y)
            except:
                return tf.nn.sigmoid_cross_entropy_with_logits(logits=x,
                                                               targets=y)


# =============================================================================
#     ### Loss for GAN
#     self.d_loss_real = tf.reduce_mean(
#       sigmoid_cross_entropy_with_logits(self.D_logits, tf.ones_like(self.D)))
#     self.d_loss_fake = tf.reduce_mean(
#       sigmoid_cross_entropy_with_logits(self.D_logits_, tf.zeros_like(self.D_)))
#     self.g_loss = tf.reduce_mean(
#       sigmoid_cross_entropy_with_logits(self.D_logits_, tf.ones_like(self.D_)))
#
#     self.d_loss = self.d_loss_real + self.d_loss_fake
# =============================================================================

        t_vars = tf.trainable_variables()

        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]

        ## Loss for WGAN
        #self.d_loss_real = - tf.reduce_mean(self.D)  # i left it jus because it s used in an other place but not used in the calcul of loss
        #self.d_loss_fake = tf.reduce_mean(self.D_)
        #    self.d_loss = self.d_loss_real + self.d_loss_fake
        #    self.g_loss = - tf.reduce_mean(self.D_)
        #    ### Weight clip for WGAN
        #    self.clip_D = [var.assign(tf.clip_by_value(var, -0.05, 0.05)) for var in self.d_vars]
        #self.d_loss_real_sum = scalar_summary("d_loss_real", self.d_loss_real)
        #self.d_loss_fake_sum = scalar_summary("d_loss_fake", self.d_loss_fake)
        #

        ##
        #    # LSGAN
        self.d_loss_real = tf.reduce_sum(
            tf.square(self.D - 1)
        )  # i left it jus because it s used in an other place but not used in the calcul of loss
        self.d_loss_fake = tf.reduce_sum(tf.square(
            self.D_))  # in equation there is no sum
        self.d_loss = (self.d_loss_real + self.d_loss_fake)
        self.g_loss = tf.reduce_sum(tf.square(self.D_ - 1))
        #end lsgan

        self.g_loss_sum = scalar_summary("g_loss", self.g_loss)
        self.d_loss_sum = scalar_summary("d_loss", self.d_loss)
        self.d_loss_real_sum = scalar_summary("d_loss_real", self.d_loss_real)
        self.d_loss_fake_sum = scalar_summary("d_loss_fake", self.d_loss_fake)

        self.saver = tf.train.Saver()
Esempio n. 54
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        is_training = mode == tf.estimator.ModeKeys.TRAIN

        (start_logits, end_logits) = create_model(
            bert_config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            segment_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
        )

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (
                assignment_map,
                initialized_variable_names,
            ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape, init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            seq_length = modeling.get_shape_list(input_ids)[1]

            def compute_loss(logits, positions):
                one_hot_positions = tf.one_hot(positions, depth=seq_length, dtype=tf.float32)
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                loss = -tf.reduce_mean(tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
                return loss

            start_positions = features["start_positions"]
            end_positions = features["end_positions"]

            start_loss = compute_loss(start_logits, start_positions)
            end_loss = compute_loss(end_logits, end_positions)

            total_loss = (start_loss + end_loss) / 2.0

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu
            )

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn
            )
        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "unique_ids": unique_ids,
                "start_logits": start_logits,
                "end_logits": end_logits,
            }
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn
            )
        else:
            raise ValueError("Only TRAIN and PREDICT modes are supported: %s" % (mode))

        return output_spec
Esempio n. 55
0
def create_model(inputs1, inputs2, targets):
    def create_discriminator(discrim_inputs, discrim_targets):
        n_layers = 3
        layers = []

        input = tf.concat([discrim_inputs, discrim_targets], 3)

        with tf.variable_scope("layer_1"):
            convolved = conv(input, 3, a.ndf, 2)
            rectified = lrelu(convolved, 0.2)
            layers.append(rectified)

        for i in range(n_layers):
            with tf.variable_scope("layer_%d" % (len(layers) + 1)):
                out_channels = a.ndf * min(2**(i + 1), 8)
                stride = 1 if i == n_layers - 1 else 2  # last layer here has stride 1
                convolved = conv(layers[-1], 3, out_channels, stride=stride)
                rectified = lrelu(convolved, 0.2)
                layers.append(rectified)

        with tf.variable_scope("layer_%d" % (len(layers) + 1)):
            convolved = conv(rectified, 3, 1, 1)
            output = tf.sigmoid(convolved)
            layers.append(output)

        return layers[-1]

    with tf.variable_scope("generator") as scope:
        out_channels = int(targets.get_shape()[-1])
        outputs = create_generator(inputs1, inputs2, out_channels)

    with tf.name_scope("real_discriminator"):
        with tf.variable_scope("discriminator"):
            predict_real = create_discriminator(inputs1, targets)

    with tf.name_scope("fake_discriminator"):
        with tf.variable_scope("discriminator", reuse=True):
            predict_fake = create_discriminator(inputs1, outputs)

    with tf.name_scope("discriminator_loss"):
        discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) +
                                        tf.log(1 - predict_fake + EPS)))

    with tf.name_scope("generator_loss"):
        gen_loss_GAN = tf.reduce_mean(-tf.log(predict_fake + EPS))
        gen_loss_L1 = tf.reduce_mean(tf.abs(targets - outputs))
        gen_loss = gen_loss_GAN * a.gan_weight + gen_loss_L1 * a.l1_weight

    with tf.name_scope("discriminator_train"):
        discrim_tvars = [
            var for var in tf.trainable_variables()
            if var.name.startswith("discriminator")
        ]
        discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrim_grads_and_vars = discrim_optim.compute_gradients(
            discrim_loss, var_list=discrim_tvars)
        discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars)

    with tf.name_scope("generator_train"):
        with tf.control_dependencies([discrim_train]):
            gen_tvars = [
                var for var in tf.trainable_variables()
                if var.name.startswith("generator")
            ]
            gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
            gen_grads_and_vars = gen_optim.compute_gradients(
                gen_loss, var_list=gen_tvars)
            gen_train = gen_optim.apply_gradients(gen_grads_and_vars)

    ema = tf.train.ExponentialMovingAverage(decay=0.99)
    update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1])

    global_step = tf.contrib.framework.get_or_create_global_step()
    incr_global_step = tf.assign(global_step, global_step + 1)

    return Model(
        predict_real=predict_real,
        predict_fake=predict_fake,
        discrim_loss=ema.average(discrim_loss),
        discrim_grads_and_vars=discrim_grads_and_vars,
        gen_loss_GAN=ema.average(gen_loss_GAN),
        gen_loss_L1=ema.average(gen_loss_L1),
        gen_grads_and_vars=gen_grads_and_vars,
        outputs=outputs,
        train=tf.group(update_losses, incr_global_step, gen_train),
    )
def do_train(network, param):
    """Run training. If target labels are phone, the model is evaluated by PER
    with 39 phones.
    Args:
        network: network to train
        param: A dictionary of parameters
    """
    # Load dataset
    train_data = Dataset(data_type='train', label_type=param['label_type'],
                         batch_size=param['batch_size'],
                         num_stack=param['num_stack'],
                         num_skip=param['num_skip'],
                         is_sorted=True)
    dev_data = Dataset(data_type='dev', label_type=param['label_type'],
                       batch_size=param['batch_size'],
                       num_stack=param['num_stack'],
                       num_skip=param['num_skip'],
                       is_sorted=False)
    if param['label_type'] == 'character':
        test_data = Dataset(data_type='test', label_type='character',
                            batch_size=1,
                            num_stack=param['num_stack'],
                            num_skip=param['num_skip'],
                            is_sorted=False)
    else:
        test_data = Dataset(data_type='test', label_type='phone39',
                            batch_size=1,
                            num_stack=param['num_stack'],
                            num_skip=param['num_skip'],
                            is_sorted=False)

    # Tell TensorFlow that the model will be built into the default graph
    with tf.Graph().as_default():

        # Define placeholders
        network.inputs = tf.placeholder(
            tf.float32,
            shape=[None, None, network.input_size],
            name='input')
        indices_pl = tf.placeholder(tf.int64, name='indices')
        values_pl = tf.placeholder(tf.int32, name='values')
        shape_pl = tf.placeholder(tf.int64, name='shape')
        network.labels = tf.SparseTensor(indices_pl, values_pl, shape_pl)
        network.inputs_seq_len = tf.placeholder(tf.int64,
                                                shape=[None],
                                                name='inputs_seq_len')
        network.keep_prob_input = tf.placeholder(tf.float32,
                                                 name='keep_prob_input')
        network.keep_prob_hidden = tf.placeholder(tf.float32,
                                                  name='keep_prob_hidden')

        # Add to the graph each operation (including model definition)
        loss_op, logits = network.compute_loss(network.inputs,
                                               network.labels,
                                               network.inputs_seq_len,
                                               network.keep_prob_input,
                                               network.keep_prob_hidden)
        train_op = network.train(
            loss_op,
            optimizer=param['optimizer'],
            learning_rate_init=float(param['learning_rate']),
            decay_steps=param['decay_steps'],
            decay_rate=param['decay_rate'])
        decode_op = network.decoder(logits,
                                    network.inputs_seq_len,
                                    decode_type='beam_search',
                                    beam_width=20)
        ler_op = network.compute_ler(decode_op, network.labels)

        # Build the summary tensor based on the TensorFlow collection of
        # summaries
        summary_train = tf.summary.merge(network.summaries_train)
        summary_dev = tf.summary.merge(network.summaries_dev)

        # Add the variable initializer operation
        init_op = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        # Count total parameters
        parameters_dict, total_parameters = count_total_parameters(
            tf.trainable_variables())
        for parameter_name in sorted(parameters_dict.keys()):
            print("%s %d" % (parameter_name, parameters_dict[parameter_name]))
        print("Total %d variables, %s M parameters" %
              (len(parameters_dict.keys()),
               "{:,}".format(total_parameters / 1000000)))

        # Make mini-batch generator
        mini_batch_train = train_data.next_batch()
        mini_batch_dev = dev_data.next_batch()

        csv_steps, csv_loss_train, csv_loss_dev = [], [], []
        csv_ler_train, csv_ler_dev = [], []
        # Create a session for running operation on the graph
        with tf.Session() as sess:

            # Instantiate a SummaryWriter to output summaries and the graph
            summary_writer = tf.summary.FileWriter(
                network.model_dir, sess.graph)

            # Initialize parameters
            sess.run(init_op)

            # Train model
            iter_per_epoch = int(train_data.data_num / param['batch_size'])
            train_step = train_data.data_num / param['batch_size']
            if train_step != int(train_step):
                iter_per_epoch += 1
            max_steps = iter_per_epoch * param['num_epoch']
            start_time_train = time.time()
            start_time_epoch = time.time()
            start_time_step = time.time()
            error_best = 1
            for step in range(max_steps):

                # Create feed dictionary for next mini batch (train)
                with tf.device('/cpu:0'):
                    inputs, labels, inputs_seq_len, _ = mini_batch_train.__next__()
                feed_dict_train = {
                    network.inputs: inputs,
                    network.labels: list2sparsetensor(labels, padded_value=-1),
                    network.inputs_seq_len: inputs_seq_len,
                    network.keep_prob_input: network.dropout_ratio_input,
                    network.keep_prob_hidden: network.dropout_ratio_hidden
                }

                # Update parameters
                sess.run(train_op, feed_dict=feed_dict_train)

                if (step + 1) % 10 == 0:

                    # Create feed dictionary for next mini batch (dev)
                    with tf.device('/cpu:0'):
                        inputs, labels, inputs_seq_len, _ = mini_batch_dev.__next__()
                    feed_dict_dev = {
                        network.inputs: inputs,
                        network.labels: list2sparsetensor(labels,
                                                          padded_value=-1),
                        network.inputs_seq_len: inputs_seq_len,
                        network.keep_prob_input: network.dropout_ratio_input,
                        network.keep_prob_hidden: network.dropout_ratio_hidden
                    }

                    # Compute loss
                    loss_train = sess.run(loss_op, feed_dict=feed_dict_train)
                    loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev)
                    csv_steps.append(step)
                    csv_loss_train.append(loss_train)
                    csv_loss_dev.append(loss_dev)

                    # Change to evaluation mode
                    feed_dict_train[network.keep_prob_input] = 1.0
                    feed_dict_train[network.keep_prob_hidden] = 1.0
                    feed_dict_dev[network.keep_prob_input] = 1.0
                    feed_dict_dev[network.keep_prob_hidden] = 1.0

                    # Compute accuracy & update event file
                    ler_train, summary_str_train = sess.run(
                        [ler_op, summary_train], feed_dict=feed_dict_train)
                    ler_dev, summary_str_dev = sess.run(
                        [ler_op, summary_dev], feed_dict=feed_dict_dev)
                    csv_ler_train.append(ler_train)
                    csv_ler_dev.append(ler_dev)
                    summary_writer.add_summary(summary_str_train, step + 1)
                    summary_writer.add_summary(summary_str_dev, step + 1)
                    summary_writer.flush()

                    duration_step = time.time() - start_time_step
                    print("Step %d: loss = %.3f (%.3f) / ler = %.4f (%.4f) (%.3f min)" %
                          (step + 1, loss_train, loss_dev, ler_train,
                           ler_dev, duration_step / 60))
                    sys.stdout.flush()
                    start_time_step = time.time()

                # Save checkpoint and evaluate model per epoch
                if (step + 1) % iter_per_epoch == 0 or (step + 1) == max_steps:
                    duration_epoch = time.time() - start_time_epoch
                    epoch = (step + 1) // iter_per_epoch
                    print('-----EPOCH:%d (%.3f min)-----' %
                          (epoch, duration_epoch / 60))

                    # Save model (check point)
                    checkpoint_file = join(network.model_dir, 'model.ckpt')
                    save_path = saver.save(
                        sess, checkpoint_file, global_step=epoch)
                    print("Model saved in file: %s" % save_path)

                    if epoch >= 10:
                        start_time_eval = time.time()
                        with tf.device('/cpu:0'):
                            if param['label_type'] == 'character':
                                print('=== Dev Data Evaluation ===')
                                cer_dev_epoch = do_eval_cer(
                                    session=sess,
                                    decode_op=decode_op,
                                    network=network,
                                    dataset=dev_data,
                                    eval_batch_size=1)
                                print('  CER: %f %%' % (cer_dev_epoch * 100))

                                if cer_dev_epoch < error_best:
                                    error_best = cer_dev_epoch
                                    print('■■■ ↑Best Score (CER)↑ ■■■')

                                    print('=== Test Data Evaluation ===')
                                    cer_test = do_eval_cer(
                                        session=sess,
                                        decode_op=decode_op,
                                        network=network,
                                        dataset=test_data,
                                        eval_batch_size=1)
                                    print('  CER: %f %%' % (cer_test * 100))

                            else:
                                print('=== Dev Data Evaluation ===')
                                per_dev_epoch = do_eval_per(
                                    session=sess,
                                    decode_op=decode_op,
                                    per_op=ler_op,
                                    network=network,
                                    dataset=dev_data,
                                    label_type=param['label_type'],
                                    eval_batch_size=1)
                                print('  PER: %f %%' % (per_dev_epoch * 100))

                                if per_dev_epoch < error_best:
                                    error_best = per_dev_epoch
                                    print('■■■ ↑Best Score (PER)↑ ■■■')

                                    print('=== Test Data Evaluation ===')
                                    per_test = do_eval_per(
                                        session=sess,
                                        decode_op=decode_op,
                                        per_op=ler_op,
                                        network=network,
                                        dataset=test_data,
                                        label_type=param['label_type'],
                                        eval_batch_size=1)
                                    print('  PER: %f %%' % (per_test * 100))

                        duration_eval = time.time() - start_time_eval
                        print('Evaluation time: %.3f min' %
                              (duration_eval / 60))

                start_time_epoch = time.time()
                start_time_step = time.time()

            duration_train = time.time() - start_time_train
            print('Total time: %.3f hour' % (duration_train / 3600))

            # Save train & dev loss, ler
            save_loss(csv_steps, csv_loss_train, csv_loss_dev,
                      save_path=network.model_dir)
            save_ler(csv_steps, csv_ler_train, csv_ler_dev,
                     save_path=network.model_dir)

            # Training was finished correctly
            with open(join(network.model_dir, 'complete.txt'), 'w') as f:
                f.write('')
Esempio n. 57
0
    def test_mine(self):

        # Tests a single model

        # Initialize a new game and store the screens in the self.history
        screen, reward, is_done = self.game.new_game()
        for _ in range(self.params.history_length):
            self.history.add(screen)

        # Initialize the TensorFlow session
        gpu_options = tf.GPUOptions(
           per_process_gpu_memory_fraction=self.params.gpu_memory
        )

        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

            # Initialize the TensorFlow session
            init = tf.global_variables_initializer()
            sess.run(init)
            # Only save trainable variables and the global iteration to disk
            tf_vars_to_save = tf.trainable_variables() + [self.dqn_train.global_iteration]
            saver = tf.train.Saver(tf_vars_to_save, max_to_keep=200)

            # if self.params.model_file is not None:
            #     # Load pre-trained model from disk
            #     model_path = os.path.join(self.checkpoint_dir, self.params.model_file)
            #     saver.restore(sess, model_path)
            model_path = os.path.join(self.checkpoint_dir, self.params.model_file)
            saver.restore(sess, model_path)

            prev_action_id = -1
            prev_episode_num = -1 # Just has to be different intially than prev
            action_id = -1 
            eval_num_episodes = 0

            eval_total_reward = 0
            eval_num_episodes = 0
            eval_num_wins = 0
            eval_num_rewards = 0
            eval_episode_max_reward = 0
            eval_episode_reward = 0
            eval_actions = np.zeros(self.game.num_actions)

            # Initialize new game without random start moves
            screen, reward, done = self.game.new_game()

            for _ in range(self.params.history_length):
                self.history.add(screen)

            #for eval_iterations in range(self.params.eval_iterations):
            while eval_num_episodes < self.params.eval_iterations: # Play eval_iterations games
                if self.params.show_game:
                    inp = input("Enter and agent plays, e for exit: ")
                    if inp == "e":
                        break

                prev_action_id = action_id

                feed_dict_eval  = { self.dqn_train.pl_screens: self.history.get() }
                qvalues = sess.run(self.dqn_train.qvalues, feed_dict=feed_dict_eval)

                # Choose the best action based on the approximated Q-values
                qvalue_max = np.max(qvalues[0])
                action_id  = np.argmax(qvalues[0])

                # Skip this action if we are in the same game
                if prev_action_id == action_id and prev_episode_num == eval_num_episodes:
                    if self.params.show_game:
                        print("Agent repeated action, selecting random")
                    action_id = random.randrange(self.game.num_actions)

                prev_episode_num = eval_num_episodes


                # Perform the action
                screen, reward, done = self.game.act(action_id)
                self.history.add(screen)

                eval_episode_reward += reward
                if reward > 0:
                    eval_num_rewards += 1

                if reward == self.game.env.rewards["win"]:
                    eval_num_wins += 1

                if done:
                    # Note max reward is from playin gthe games
                    eval_total_reward += eval_episode_reward
                    eval_episode_max_reward = max(eval_episode_reward, eval_episode_max_reward)
                    eval_episode_reward = 0
                    eval_num_episodes += 1

                    screen, reward, done = self.game.new_game()
                    for _ in range(self.params.history_length):
                        self.history.add(screen)

            if eval_num_episodes > 0:
                print("  Win Rate: %.2f" % ((eval_num_wins / eval_num_episodes)*100))
Esempio n. 58
0
    def build_model(self):

        module = hub.Module(self.config.module_path, trainable=True)
        self.height, self.width = hub.get_expected_image_size(module)
        assert (self.height == self.config.image_height)
        assert (self.width == self.config.image_width)
        self.input, self.labels = self.data.next_element

        print(self.labels)
        self.input = tf.reshape(self.input, [-1, self.height, self.width, 3])
        self.labels = tf.reshape(self.labels, [-1])

        self.hidden_layer = module(self.input)
        self.logits = tf.layers.dense(self.hidden_layer, (self.n_classes + 1) *
                                      self.config.n_views,
                                      activation=None,
                                      name="logits")

        #n_objects_per_batch,n_views_in_batch,n_views_logits,n_classes+1
        self.probs = tf.nn.softmax(tf.reshape(self.logits, [
            self.n_objects, self.config.n_views, self.config.n_views,
            self.n_classes + 1
        ]),
                                   axis=-1)
        self.log_p = tf.math.log(self.probs)

        self.scores = self.log_p[..., :-1] - tf.tile(self.log_p[..., -1:],
                                                     [1, 1, 1, self.n_classes])

        tiled = tf.tile(tf.reshape(self.labels, [self.n_objects, -1]),
                        [1, self.n_cands])
        tiled = tf.reshape(
            tiled, [self.n_objects, self.n_cands, self.config.n_views, 1])
        #tensor of shape [n_objs, n_cands, n_views,4]
        self.gather_candidate_scores = tf.concat([self.indexes, tiled],
                                                 axis=-1)
        # candidates[i,j] is the score for object i and view order candidate j
        self.candidate_scores = tf.reduce_sum(tf.gather_nd(
            self.scores, self.gather_candidate_scores),
                                              axis=-1)

        best_candidates = tf.reshape(tf.argmin(self.candidate_scores, -1),
                                     [self.n_objects, 1])
        # pair [[0,cand_0],[1,cand_1],...]
        best_candidates = tf.concat([
            tf.reshape(tf.range(0, self.n_objects, dtype=tf.int64),
                       [self.n_objects, 1]), best_candidates
        ],
                                    axis=-1)
        """
        Calculate loss considering best order candidate
        """

        var_list = tf.trainable_variables()[-2:]
        # Train op
        with tf.name_scope("train"):
            #loss function
            with tf.name_scope("loss"):
                self.labels = tf.reshape(self.labels,
                                         [-1, self.config.n_views])[:, 0]
                #Indexes to calculate cross-entropy loss of best view candidates
                #shape [n_objects,n_views,3]
                self.gather_candidate_log_prob = tf.gather_nd(
                    self.gather_candidate_scores, best_candidates)

                #Indexes to dum the loss of i_view
                #Discount the loss of i_view for the best view point
                discount_iview = tf.concat(
                    [(self.n_classes + 1) *
                     tf.ones([self.n_objects, self.config.n_views, 1],
                             dtype=tf.int64),
                     self.gather_candidate_log_prob[..., :-1]],
                    axis=-1)
                self.discount_iview = discount_iview

                self.loss = -tf.gather_nd(self.log_p,
                                          self.gather_candidate_log_prob)
                self.loss = self.loss - tf.reduce_mean(self.log_p[:, :, :, -1],
                                                       axis=-1)
                self.loss = self.loss + tf.gather_nd(self.log_p,
                                                     discount_iview)
                self.loss = tf.reduce_mean(self.loss)

                #l2 loss (improves the performance)
                for var in var_list:
                    self.loss += tf.nn.l2_loss(var) * self.config.weight_decay

                self.predictions = self.select_best(self.logits)

            # setting different training ops for each part of the network
            # Get gradients of all trainable variables
            gradients = tf.gradients(self.loss, var_list)

            optimizer = tf.train.MomentumOptimizer(
                self.config.warmup_learning_rate, self.config.momentum)
            training_op = optimizer.apply_gradients(
                zip(gradients, var_list), global_step=self.global_step_tensor)
            self.train_step_warmup = training_op

            var_list = tf.trainable_variables()
            # learning rate
            self.config.learning_rate = tf.train.exponential_decay(
                self.config.learning_rate,
                self.global_step_tensor,
                self.config.decay_steps,
                self.config.decay_rate,
                staircase=True)

            gradients = tf.gradients(self.loss, var_list)
            optimizer = tf.train.MomentumOptimizer(self.config.learning_rate,
                                                   self.config.momentum)
            training_op = optimizer.apply_gradients(
                zip(gradients, var_list), global_step=self.global_step_tensor)
            self.train_step = training_op
            self.sess.run(tf.global_variables_initializer())
Esempio n. 59
0
def main():
    if not os.path.exists(a.output_dir):
        os.makedirs(a.output_dir)

    if a.mode == "test":
        if a.checkpoint is None:
            raise Exception("checkpoint required for test mode")

    for k, v in a._get_kwargs():
        print(k, "=", v)

    with open(os.path.join(a.output_dir, "options.json"), "w") as f:
        f.write(json.dumps(vars(a), sort_keys=True, indent=4))

    examples = load_examples()
    model = create_model(examples.inputs1, examples.inputs2, examples.targets)

    with tf.name_scope("images"):
        display_fetches = {
            "inputs1": examples.inputs1,
            "inputs2": examples.inputs2,
            "targets": examples.targets,
            "outputs": model.outputs,
        }
    with tf.name_scope("inputs1_summary"):
        tf.summary.image("inputs1", examples.inputs1)

    with tf.name_scope("inputs2_summary"):
        tf.summary.image("inputs2", examples.inputs2)

    with tf.name_scope("targets1_summary"):
        tf.summary.image("targets1", examples.targets)

    with tf.name_scope("outputs_summary"):
        tf.summary.image("outputs", model.outputs)

    with tf.name_scope("predict_real_summary"):
        tf.summary.image("predict_real", model.predict_real)

    with tf.name_scope("predict_fake_summary"):
        tf.summary.image("predict_fake", model.predict_fake)

    tf.summary.scalar("discriminator_loss", model.discrim_loss)
    tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN)
    tf.summary.scalar("generator_loss_L1", model.gen_loss_L1)

    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name + "/values", var)

    for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars:
        tf.summary.histogram(var.op.name + "/gradients", grad)

    with tf.name_scope("parameter_count"):
        parameter_count = tf.reduce_sum(
            [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])

    saver = tf.train.Saver(max_to_keep=1)

    logdir = a.output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None
    sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None)
    with sv.managed_session() as sess:
        print("parameter_count = ", sess.run(parameter_count))

        if a.checkpoint is not None:
            print("loading model from checkpoint")
            checkpoint = tf.train.latest_checkpoint(a.checkpoint)
            saver.restore(sess, checkpoint)

        max_steps = 2**32
        if a.max_epochs is not None:
            max_steps = examples.steps_per_epoch * a.max_epochs
        if a.max_steps is not None:
            max_steps = a.max_steps

        if a.mode == "test":
            max_steps = int(a.test_count / a.batch_size)
            for i in range(max_steps):
                results = sess.run(display_fetches)
                save_images(results, i)
        else:
            start = time.time()

            for step in range(max_steps):

                def should(freq):
                    return freq > 0 and ((step + 1) % freq == 0
                                         or step == max_steps - 1)

                options = None
                run_metadata = None
                if should(a.trace_freq):
                    options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()

                fetches = {
                    "train": model.train,
                    "global_step": sv.global_step,
                }

                if should(a.progress_freq):
                    fetches["discrim_loss"] = model.discrim_loss
                    fetches["gen_loss_GAN"] = model.gen_loss_GAN
                    fetches["gen_loss_L1"] = model.gen_loss_L1

                if should(a.summary_freq):
                    fetches["summary"] = sv.summary_op

                if should(a.display_freq):
                    fetches["display"] = display_fetches

                results = sess.run(fetches,
                                   options=options,
                                   run_metadata=run_metadata)

                if should(a.summary_freq):
                    print("recording summary")
                    sv.summary_writer.add_summary(results["summary"],
                                                  results["global_step"])

                if should(a.display_freq):
                    print("saving display images")
                    save_images(results["display"],
                                step=results["global_step"])

                if should(a.trace_freq):
                    print("recording trace")
                    sv.summary_writer.add_run_metadata(
                        run_metadata, "step_%d" % results["global_step"])

                if should(a.progress_freq):
                    # global_step will have the correct step count if we resume from a checkpoint
                    train_epoch = math.ceil(results["global_step"] /
                                            examples.steps_per_epoch)
                    train_step = (results["global_step"] -
                                  1) % examples.steps_per_epoch + 1
                    rate = (step + 1) * a.batch_size / (time.time() - start)
                    remaining = (max_steps - step) * a.batch_size / rate
                    print(
                        "progress  epoch %d  step %d  image/sec %0.1f  remaining %dm"
                        % (train_epoch, train_step, rate, remaining / 60))
                    print("discrim_loss", results["discrim_loss"])
                    print("gen_loss_GAN", results["gen_loss_GAN"])
                    print("gen_loss_L1", results["gen_loss_L1"])

                if should(a.save_freq):
                    print("saving model")
                    saver.save(sess,
                               os.path.join(a.output_dir, "model"),
                               global_step=sv.global_step)

                if sv.should_stop():
                    break
Esempio n. 60
0
    def evaluate_mine(self):
        # Test a number of models using the naming scheme
        # to find the best model in a range

        # Initialize a new game and store the screens in the self.history
        screen, reward, is_done = self.game.new_game()
        for _ in range(self.params.history_length):
            self.history.add(screen)

        # Initialize the TensorFlow session
        gpu_options = tf.GPUOptions(
           per_process_gpu_memory_fraction=self.params.gpu_memory
        )

        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            max_name = 800000
            min_name = 680000
            current_name = min_name
            best_model = min_name
            best_win_rate = 0
            current_win_rate = 0

            # Initialize the TensorFlow session
            init = tf.global_variables_initializer()
            sess.run(init)
            # Only save trainable variables and the global iteration to disk
            tf_vars_to_save = tf.trainable_variables() + [self.dqn_train.global_iteration]
            saver = tf.train.Saver(tf_vars_to_save, max_to_keep=200)


            while current_name <= max_name:

                print("Restoring: ", current_name)


                # if self.params.model_file is not None:
                #     # Load pre-trained model from disk
                #     model_path = os.path.join(self.checkpoint_dir, self.params.model_file)
                #     saver.restore(sess, model_path)
                model_path = os.path.join(self.checkpoint_dir, 'model-' + str(current_name))
                saver.restore(sess, model_path)

                prev_action_id = -1
                prev_episode_num = -1 # Just has to be different intially than prev
                action_id = -1 
                eval_num_episodes = 0

                eval_total_reward = 0
                eval_num_episodes = 0
                eval_num_wins = 0
                eval_num_rewards = 0
                eval_episode_max_reward = 0
                eval_episode_reward = 0
                eval_actions = np.zeros(self.game.num_actions)

                # Initialize new game without random start moves
                screen, reward, done = self.game.new_game()

                for _ in range(self.params.history_length):
                    self.history.add(screen)

                #for eval_iterations in range(self.params.eval_iterations):
                while eval_num_episodes < self.params.eval_iterations: # Play eval_iterations games
                    prev_action_id = action_id

                    feed_dict_eval  = { self.dqn_train.pl_screens: self.history.get() }
                    qvalues = sess.run(self.dqn_train.qvalues, feed_dict=feed_dict_eval)

                    # Choose the best action based on the approximated Q-values
                    qvalue_max = np.max(qvalues[0])
                    action_id  = np.argmax(qvalues[0])

                    # Skip this action if we are in the same game
                    if prev_action_id == action_id and prev_episode_num == eval_num_episodes:
                        action_id = random.randrange(self.game.num_actions)

                    prev_episode_num = eval_num_episodes


                    # Perform the action
                    screen, reward, done = self.game.act(action_id)
                    self.history.add(screen)

                    eval_episode_reward += reward
                    if reward > 0:
                        eval_num_rewards += 1

                    if reward == self.game.env.rewards["win"]:
                        eval_num_wins += 1

                    if done:
                        # Note max reward is from playin gthe games
                        eval_total_reward += eval_episode_reward
                        eval_episode_max_reward = max(eval_episode_reward, eval_episode_max_reward)
                        eval_episode_reward = 0
                        eval_num_episodes += 1

                        screen, reward, done = self.game.new_game()
                        for _ in range(self.params.history_length):
                            self.history.add(screen)

                current_win_rate = (eval_num_wins / eval_num_episodes)*100

                print("  Win Rate: %.2f" % (current_win_rate))

                if current_win_rate > best_win_rate:
                    best_win_rate = current_win_rate
                    best_model = current_name

                current_name = current_name + 20000

            print("Best model is: ", best_model)