Exemple #1
0
def get_run_op():
  # Create an optimizer that performs gradient descent.
  #opt = tf.train.GradientDescentOptimizer(learning_rate=0.01)
  slice_size = FLAGS.batch_size / FLAGS.num_cuts
  print('Slice size:{}'.format(slice_size))
  data = None
  label = None
  last_fc = [tf.no_op()]
  with tf.device('/gpu:0'):
    data = tf.get_variable(
        name = 'data',
        shape=[slice_size, FLAGS.hidden_size],
        trainable=False)
    '''
    label = tf.get_variable(
        name = 'label',
        shape = [slice_size, FLAGS.hidden_size],
        trainable=False))
    with tf.variable_scope('fc_in'):
      weight_in = tf.zeros([1000, FLAGS.hidden_size])
      for k in xrange(FLAGS.num_cuts):
        with tf.control_dependencies([last_fc[-1]]):
            last_fc.append(tf.matmul(data[k+1], weight_in))
    '''
  for i in xrange(FLAGS.num_cuts):
    last_fc.append(data)
  for i in xrange(FLAGS.num_layers):
    dev = '/gpu:%d' % (i * FLAGS.num_gpus / FLAGS.num_layers)
    with tf.device(dev), scopes.arg_scope([variables.variable], device=dev):
      tmp_fc = [tf.no_op()]
      with tf.variable_scope('fc%d' % i):
        w = tf.get_variable(
            name='w',
            shape=[FLAGS.hidden_size, FLAGS.hidden_size],
            trainable=True)
        for k in xrange(FLAGS.num_cuts):
          with tf.control_dependencies([tmp_fc[-1]]):
            tmp_fc.append(tf.matmul(last_fc[k+1], w))
      last_fc = tmp_fc
      if i == FLAGS.num_layers - 1:
        with tf.control_dependencies(last_fc):
          train_op = tf.no_op()
  '''
  with tf.device('/gpu:%d' % (FLAGS.num_gpus - 1)):
    tmp_fc = [tf.no_op()]
    with tf.variable_scope('fc_out'):
      weight_out = tf.zeros([FLAGS.hidden_size, 1000])
      for k in xrange(FLAGS.num_cuts):
        with tf.control_dependencies([tmp_fc[-1]]):
          tmp_fc.append(tf.matmul(last_fc[k+1], weight_out))
    last_fc = tmp_fc
  loss = tf.nn_softmax_cross_entropy_with_logits(last_fc, labels, name='xentropy')
  grads = opt.compute_gradients(loss)
  apply_gradient_op = opt.apply_gradients(grads)

  train_op = tf.group(apply_gradient_op)
  '''
  init_op = tf.initialize_all_variables()

  return init_op, train_op
Exemple #2
0
    def run_epoch(self, session, x, y=None, train_op=None, shuffle=True, verbose=10):
        dp=self.config.dropout
        predictions=self.predictions
        loss=self.loss
        if not train_op:
            train_op=tf.no_op()
            dp=1
        if y is None:
            loss=tf.no_op()

        total_steps=sum(1 for x in data_iterator(x, y, self.config.batch_size))
        total_loss=[]
        total_pred=[]

        for step, (_x, _y) in enumerate(data_iterator(x, y, self.config.batch_size, shuffle)):
            feed={self.input_placeholder: _x,
                  self.dropout_placeholder: dp}
            if _y is not None:
                feed[self.labels_placeholder]=_y
            
            _pred, _loss, _=session.run([predictions, loss, train_op], feed_dict=feed)
            total_pred.append(_pred)
            if y is not None:
                total_loss.append(_loss)
            if verbose and step % verbose==0:
                sys.stdout.write('\r{} / {} : loss = {}'.format(
                    step, total_steps, np.mean(total_loss)))
                sys.stdout.flush()
        if verbose:
            sys.stdout.write('\r')
            sys.stdout.flush()

        assert np.vstack(total_pred).reshape([-1]).shape[0]==x.shape[0], 'pred and x not equal size'
        return np.vstack(total_pred).reshape([-1]), np.mean(total_loss)
def main(_):
  assert(FLAGS.num_gpus > 1)
  slice_size = int(FLAGS.hidden_size / FLAGS.num_gpus)
  feature_size = slice_size * FLAGS.num_gpus
  print("Slice size: {} Feature size: {}".format(slice_size, feature_size))
  weight_shape = [slice_size, feature_size]

  # create graph
  weights, grads = make_weights(weight_shape)
  ff_deps = [[tf.no_op() for j in range(FLAGS.num_gpus)] for i in range(FLAGS.num_layers)]
  bp_deps = [[tf.no_op() for j in range(FLAGS.num_gpus)] for i in range(FLAGS.num_layers)]
  for i in range(FLAGS.num_cuts):
    with tf.name_scope('data_cut%d' % i):
      data = make_data(FLAGS.batch_size / FLAGS.num_cuts, slice_size)
    with tf.name_scope('model_cut%d' % i):
      ff_deps, bp_deps = ff_bp(data, weights, grads, ff_deps, bp_deps)

  # create session
  sess = tf.Session()
  # init variables
  print('Initialize Variables')
  sess.run(tf.initialize_all_variables())
  print('Initialize Done')
  # run
  merged = tf.merge_all_summaries()
  writer = tf.train.SummaryWriter(log_dir, sess.graph)
  grads_flatten = sum(grads, [])
  with tf.control_dependencies(grads_flatten):
    train_op = tf.no_op()
  time_tensorflow_run(sess, train_op, 'Training')
def main(unused_args):

    config = get_config(FLAGS.model_size)
    eval_config = get_config(FLAGS.model_size)
    saved_model_path = FLAGS.model_path
    weights_dir = FLAGS.weights_dir
    verbose = FLAGS.verbose
    debug = FLAGS.debug


    if weights_dir is not None:
        if not os.path.exists(weights_dir):
            os.mkdir(weights_dir)
    if not debug:
        raw_data = reader.ptb_raw_data(FLAGS.data_path, "ptb.train.txt", "ptb.valid.txt", "ptb.test.txt")
    else:
        raw_data = reader.ptb_raw_data(FLAGS.data_path, "emma.txt", "emma.val.txt", "emma.test.txt")

    # load up PTB data
    train_data, val_data, test_data, vocab, word_to_id = raw_data

    with tf.Graph().as_default(), tf.Session() as session:
        initialiser = tf.random_uniform_initializer(-config.init_scale, config.init_scale)

        with tf.variable_scope('model', reuse=None, initializer=initialiser):
            m = ACTModel(config,is_training=True)

            # if we have a saved/pre-trained model, load it.
            if saved_model_path is not None:
                saveload.main(saved_model_path, session)

        with tf.variable_scope("model", reuse=True):
            m_val = ACTModel(config, is_training=False)
            m_test = ACTModel(eval_config,is_training=False)

        tf.initialize_all_variables().run()

        print("starting training")
        for i in range(config.max_max_epoch):

            lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            session.run(tf.assign(m.lr, config.learning_rate * lr_decay))
            train_loss = run_epoch(session, m, train_data, m.train_op, verbose=True)
            valid_loss = run_epoch(session, m_val, val_data, tf.no_op())

            if verbose:
                print("Epoch: {} Learning rate: {}".format(i + 1, session.run(m.lr)))
                print("Epoch: {} Train Loss: {}".format(i + 1, train_loss))
                print("Epoch: %d Valid Loss: %.3f" % (i + 1, valid_loss))

            # save weights in a pickled dictionary format
            if weights_dir is not None:
                date = "{:%m.%d.%H.%M}".format(datetime.now())
                saveload.main(weights_dir + "/Epoch_{:02}Train_{:0.3f}Val_{:0.3f}date{}.pkl"
                              .format(i+1,train_loss,valid_loss, date), session)


        test_loss = run_epoch(session, m_test, test_data, tf.no_op())
    if verbose:
        print("Test Perplexity: %.3f" % test_loss)
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = PTBModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = PTBModel(is_training=False, config=config)
            mtest = PTBModel(is_training=False, config=eval_config)

        tf.initialize_all_variables().run()

        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity = run_epoch(session, m, train_data, m.train_op,
                                         verbose=True)
            print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
            valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
            print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

        test_perplexity = run_epoch(session, mtest, test_data, tf.no_op())
        print("Test Perplexity: %.3f" % test_perplexity)
    def build_eval_graph(self):
        # Keep track of the totals while running through the batch data
        self.total_loss = tf.Variable(0.0, trainable=False, collections=[])
        self.total_correct = tf.Variable(0.0, trainable=False, collections=[])
        self.example_count = tf.Variable(0.0, trainable=False, collections=[])

        # Calculates the means
        self.mean_loss = self.total_loss / self.example_count
        self.accuracy = self.total_correct / self.example_count

        # Operations to modify to the stateful variables
        inc_total_loss = self.total_loss.assign_add(self.model.total_loss)
        inc_total_correct = self.total_correct.assign_add(
            tf.reduce_sum(tf.cast(self.model.correct_predictions, "float")))
        inc_example_count = self.example_count.assign_add(self.model.batch_size)

        # Operation to reset all the stateful vars. Should be called before starting a data set evaluation.
        with tf.control_dependencies(
                [self.total_loss.initializer, self.total_correct.initializer, self.example_count.initializer]):
            self.eval_reset = tf.no_op()

        # Operation to modify the stateful variables with data from one batch
        # Should be called for each batch in the evaluatin set
        with tf.control_dependencies([inc_total_loss, inc_total_correct, inc_example_count]):
            self.eval_step = tf.no_op()

        # Summaries
        summary_mean_loss = tf.scalar_summary("mean_loss", self.mean_loss)
        summary_acc = tf.scalar_summary("accuracy", self.accuracy)
        self.summaries = tf.merge_summary([summary_mean_loss, summary_acc])
    def testQueueRunnerSerializationRoundTrip(self):
        graph = tf.Graph()
        with graph.as_default():
            queue = tf.FIFOQueue(10, tf.float32, name="queue")
            enqueue_op = tf.no_op(name="enqueue")
            close_op = tf.no_op(name="close")
            cancel_op = tf.no_op(name="cancel")
            qr0 = tf.train.QueueRunner(
                queue,
                [enqueue_op],
                close_op,
                cancel_op,
                queue_closed_exception_types=(tf.errors.OutOfRangeError, tf.errors.CancelledError),
            )
            qr0_proto = tf.train.QueueRunner.to_proto(qr0)
            qr0_recon = tf.train.QueueRunner.from_proto(qr0_proto)
            self.assertEqual("queue", qr0_recon.queue.name)
            self.assertEqual(1, len(qr0_recon.enqueue_ops))
            self.assertEqual(enqueue_op, qr0_recon.enqueue_ops[0])
            self.assertEqual(close_op, qr0_recon.close_op)
            self.assertEqual(cancel_op, qr0_recon.cancel_op)
            self.assertEqual(
                (tf.errors.OutOfRangeError, tf.errors.CancelledError), qr0_recon.queue_closed_exception_types
            )

            # Assert we reconstruct an OutOfRangeError for QueueRunners
            # created before QueueRunnerDef had a queue_closed_exception_types field.
            del qr0_proto.queue_closed_exception_types[:]
            qr0_legacy_recon = tf.train.QueueRunner.from_proto(qr0_proto)
            self.assertEqual("queue", qr0_legacy_recon.queue.name)
            self.assertEqual(1, len(qr0_legacy_recon.enqueue_ops))
            self.assertEqual(enqueue_op, qr0_legacy_recon.enqueue_ops[0])
            self.assertEqual(close_op, qr0_legacy_recon.close_op)
            self.assertEqual(cancel_op, qr0_legacy_recon.cancel_op)
            self.assertEqual((tf.errors.OutOfRangeError,), qr0_legacy_recon.queue_closed_exception_types)
Exemple #8
0
def main(_):

  filename = "Data11-17.txt"
  vectors_data1,labels_data1 = read_data.read_data(filename)
  filename = "valid18-20.txt"
  vectors_data2,labels_data2 = read_data.read_data(filename)
  filename = "Data21-25.txt"
  vectors_data3,labels_data3 = read_data.read_data(filename)

  vectors_data = np.vstack((vectors_data1,vectors_data2,vectors_data3))
  print(vectors_data.shape)
  labels_data = np.vstack((np.reshape(labels_data1,(len(labels_data1),1)),
    np.reshape(labels_data2,(len(labels_data2),1)),
      np.reshape(labels_data3,(len(labels_data3),1))))
  labels_data = np.reshape(labels_data,-1)
  print(labels_data.shape)

  filename = "Data4-10.txt"
  validation_data,vlabels_data = read_data.read_data(filename)
  filename = "Data26-29.txt"
  test_data,tlabels_data = read_data.read_data(filename)
  test_data = test_data[0:8000,]
  tlabels_data = tlabels_data[0:8000,]

  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1
  eval_config.num_steps = 1

  with tf.Graph().as_default(), tf.Session() as session:

    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    with tf.variable_scope("model", reuse=None, initializer=initializer):
      m = PTBModel(is_training=True, config=config)
    with tf.variable_scope("model", reuse=True, initializer=initializer):
      mvalid = PTBModel(is_training=False, config=config)
      mtest = PTBModel(is_training=False, config=eval_config)
    
    
    tf.initialize_all_variables().run()

    summary_writer = tf.train.SummaryWriter("train/lstm3s",session.graph)

    for i in range(config.max_max_epoch):
      lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
      m.assign_lr(session, config.learning_rate * lr_decay)

      print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))

      train_perplexity = run_epoch(session, m, vectors_data, labels_data, m.train_op,summary_writer, 
                                   verbose=True)
      print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))

      valid_perplexity = run_epoch(session, mvalid, validation_data, vlabels_data, tf.no_op(),summary_writer)
      print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

    test_perplexity = run_epoch(session, mtest, test_data, tlabels_data, tf.no_op(),summary_writer)
    print("Test Perplexity: %.3f" % test_perplexity)
def train():
  if not FLAGS.data_path:
    raise ValueError("Must set --data_path to data directory")

  raw_data = reader.raw_data(FLAGS.data_path)
  train_data, valid_data, test_data, _, word_to_id = raw_data

  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1
  eval_config.num_steps = 1

  with tf.Graph().as_default(), tf.Session() as session:
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    with tf.variable_scope("model", reuse=None, initializer=initializer):
      m = PTBModel(is_training=True, is_testing=False, config=config)
    with tf.variable_scope("model", reuse=True, initializer=initializer):
      mvalid = PTBModel(is_training=False, is_testing=False, config=config)
      mtest = PTBModel(is_training=False, is_testing=True, config=eval_config)

    # tf.initialize_all_variables().run()
    if not os.path.exists(FLAGS.train_path):
      os.makedirs(FLAGS.train_path)
      
    session.run(tf.initialize_all_variables())
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_path)
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
      print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
      m.saver.restore(session, ckpt.model_checkpoint_path)
    else:
      print("Created model with fresh parameters.")
      session.run(tf.initialize_all_variables())

    valid_perplexity_old = 1000000000000000000

    for i in range(config.max_max_epoch):
      [train_data, valid_data, test_data] = reader.split_data(raw_data)

      lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
      m.assign_lr(session, config.learning_rate * lr_decay)

      print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
      train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True)
      print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
      valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
      print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

      if valid_perplexity > valid_perplexity_old:
        break

      checkpoint_path = os.path.join(FLAGS.train_path, "translate.ckpt")
      m.saver.save(session, checkpoint_path, global_step=i)

      valid_perplexity_old = valid_perplexity

    test_perplexity = run_epoch(session, mtest, test_data, tf.no_op())
    print("Test Perplexity: %.3f" % test_perplexity)
Exemple #10
0
def main(unused_args):
  if not FLAGS.data_path:
    raise ValueError("Must specify --data_path to PTB data directory")

  if not FLAGS.save_path:
    raise ValueError("Must specify --save_path to model directory")

  raw_data = reader.ptb_raw_data(FLAGS.data_path)
  train_data, valid_data, test_data, _ = raw_data

  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1
  eval_config.num_steps = 1

  with tf.Graph().as_default(), tf.Session() as session:
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    with tf.variable_scope("model", reuse=None, initializer=initializer):
      m = PTBModel(is_training=True, config=config)
    with tf.variable_scope("model", reuse=True, initializer=initializer):
      mvalid = PTBModel(is_training=False, config=config)
      mtest = PTBModel(is_training=False, config=eval_config)

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    ckpt=tf.train.get_checkpoint_state(FLAGS.save_path)
    if (ckpt):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        tf.initialize_all_variables().run()

    if not FLAGS.testonly:

        for i in range(config.max_max_epoch):
        
           lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
           m.assign_lr(session, config.learning_rate * lr_decay)

           print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
           train_perplexity = run_epoch(session, m, train_data, m.train_op,
                                   verbose=True)
           print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
      
           save_path = saver.save(session, FLAGS.save_path+'/model.ckpt',i)
           print("Model saved in: %s" % save_path)
      
           valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
           print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

    else:
         print("Running only a perplexity test")

    test_perplexity = run_epoch(session, mtest, test_data, tf.no_op(),verbose=True)
    print("Test Perplexity: %.3f" % test_perplexity)
Exemple #11
0
def main(config_size='small', num_epochs=10):
    
    def get_config(config_size):
        config_size = config_size.lower()
        if config_size == 'small':
            return c.SmallConfig()
        elif config_size == 'medium':
            return c.MediumConfig()
        elif config_size == 'large':
            return c.LargeConfig()
        else:
            raise ValueError('Unknown config size {} (small, medium, large)'.format(config_size))

    def run_epoch(session, m, data, eval_op, verbose=False):
        """Runs the model on the given data."""
        epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps
        print(epoch_size)
        start_time = time.time()
        costs = 0.0
        iters = 0
        state = m.initial_state.eval()
        for step, (x, y) in enumerate(seq_iterator(data, m.batch_size, m.num_steps)):
            cost, state, _ = session.run([m.cost, m.final_state, eval_op],
                             {m.input_data: x, m.targets: y, m.initial_state: state})
            costs += cost
            iters += m.num_steps

            print_interval = 20
            if verbose and epoch_size > print_interval \
                    and step % (epoch_size // print_interval) == print_interval:
                print("%.3f mse: %.8f speed: %.0f ips" % (step * 1.0 / epoch_size, costs / iters,
                     iters * m.batch_size / (time.time() - start_time)))
        return costs / (iters if iters > 0 else 1)

    with tf.Graph().as_default(), tf.Session() as session:
        config = get_config(config_size)
        initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = StockLSTM(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mtest = StockLSTM(is_training=False, config=config)

        tf.initialize_all_variables().run()

        train_data, valid_data, test_data = get_data()

        for epoch in xrange(num_epochs):
            lr_decay = config.lr_decay ** max(epoch - num_epochs, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)
            cur_lr = session.run(m.lr)

            mse = run_epoch(session, m, train_data, m.train_op, verbose=True)
            vmse = run_epoch(session, mtest, valid_data, tf.no_op())
            print("Epoch: %d - learning rate: %.3f - train mse: %.3f - test mse: %.3f" %
                  (epoch, cur_lr, mse, vmse))

        tmse = run_epoch(session, mtest, test_data, tf.no_op())
        print("Test mse: %.3f" % tmse)
Exemple #12
0
def main(_):
    ##### Configure these based on current situation. #####
    preload_model = False   # Shall we preload preloaded_epoch or train it from scratch?
    preloaded_epoch = 0     # The epoch to load (if required). Counting from 0.
    #######################################################
    if preload_model:
        load_model_file = "model{}.ckpt".format(preloaded_epoch)
        preloaded_epoch += 1
    else:
        preloaded_epoch = 0
    
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = PTBModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = PTBModel(is_training=False, config=config)
            mtest = PTBModel(is_training=False, config=eval_config)

        tf.initialize_all_variables().run()

        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()
        if(preload_model):
            saver.restore(session, load_model_file)

        for i in range(preloaded_epoch, config.max_max_epoch):
            # Some simple learning rate scheduling. :-)
            if(i>3):
                config.learning_rate = 0.1
            lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity = run_epoch(session, m, train_data, m.train_op,
                                         verbose=True)
            print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
            valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
            print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
            # Save the variables to disk.
            save_path = saver.save(session, "model{}.ckpt".format(i))
            print("Model saved in file: %s" % save_path)

        test_perplexity = run_epoch(session, mtest, test_data, tf.no_op())
        print("Test Perplexity: %.3f" % test_perplexity)
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    var_list = [ v for _,v in grads_and_vars]
    d_vars = []
    g_vars = []
    for grad,var in grads_and_vars:
        if var in self.gan.d_vars():
            d_vars += [var]
        elif var in self.gan.g_vars():
            g_vars += [var]
        else:
            raise("Couldn't find var in g_vars or d_vars")
    w = [tf.Variable(self.config.start_at or 0.0), tf.Variable(self.config.start_at or 0.0)]

    Vidv = [self.gan.trainer.d_loss, self.gan.trainer.g_loss]
    #Vsoc = [1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss, -1/2. * self.gan.trainer.d_loss - 1/2.* self.gan.trainer.g_loss]
    Vsoc = [1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss, 1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss]

    wlr = self.config.w_learn_rate or 0.01
    wt1 = [w[0] + wlr * (Vidv[0] - Vsoc[0]), w[1] + wlr * (Vidv[1] - Vsoc[1])]
    def clamped(net):
        return tf.maximum(self.config.min or 0., tf.minimum(net, self.config.max or 1.))

    self._prepare()

    wt1 = [clamped(wt1[0]),clamped(wt1[1])]
    self.gan.add_metric('wt0', wt1[0])
    self.gan.add_metric('wt1', wt1[1])
    op1 = tf.group(*[tf.assign(w, v) for w,v in zip(w, wt1)]) # store variables

    with tf.get_default_graph().control_dependencies([op1]):
        Vi = [(1. - w[0]) * Vidv[0] + w[0] * Vsoc[0],
              (1. - w[1]) * Vidv[1] + w[1] * Vsoc[1]]
        if self.config.reverse_w:
            Vi = [(w[0]) * Vidv[0] + (1.0-w[0]) * Vsoc[0],
                  (w[1]) * Vidv[1] + (1.0-w[1]) * Vsoc[1]]
        self.gan.add_metric('w0', w[0])
        self.gan.add_metric('w1', w[1])

        new_grads = tf.gradients(Vi[0], d_vars) + tf.gradients(Vi[1], g_vars)
        self.gan.trainer.d_loss = Vi[0]
        self.gan.trainer.g_loss = Vi[1]
        new_grads_and_vars = list(zip(new_grads, var_list)).copy()
        op3 = self.optimizer.apply_gradients(new_grads_and_vars.copy(), global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op3]):
            if(self.config.w_l1):
                # return to selfish state
                wt1 = [wt1[0] + self.config.w_l1 * ((self.config.l1_default or 0.0)-wt1[0]),
                       wt1[1] + self.config.w_l1 * ((self.config.l1_default or 0.0)-wt1[1])]
                op4 = tf.group(*[tf.assign(w, v) for w,v in zip(w, wt1)]) # store variables
                with tf.get_default_graph().control_dependencies([op4]):
                    self.gan.add_metric('l1w0', w[0])
                    self.gan.add_metric('l1w1', w[1])
                    return tf.no_op()

            else:
                return tf.no_op()
def main(_):
    if not FLAGS.data_path:
        #raise ValueError("Must set --data_path to PTB data directory")
        pass

    train_data, valid_data, test_data = imdb_data.load_data()
    word2id, id2word = imdb_data.load_dict_imdb()

    accsTrain = []
    accsTest = []


    config = Config()
    eval_config = Config()
    eval_config.batch_size = 1
    
    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = SentimentModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = SentimentModel(is_training=False, config=config)
            mtest = SentimentModel(is_training=False, config=eval_config)

        tf.initialize_all_variables().run()

        print("Starting")
        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity, accTrain = run_epoch(session, m, train_data, m.train_op, id2word,
                                       verbose=True)
            accsTrain.append(accTrain)
            
            print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
            valid_perplexity, crap = run_epoch(session, mvalid, valid_data, tf.no_op(),
                                         id2word)
            print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

            test_perplexity, accTest = run_epoch(session, mtest, test_data, tf.no_op(),id2word)
            accsTest.append(accTest)
            print("Test Perplexity: %.3f" % test_perplexity)
        
    plt.figure()
    plt.plot(accsTrain, label="train")
    plt.plot(accsTest, label="test")
    plt.show()
    plt.close()    
  def _cached_copy(self, var, name, pass_through=False):
    """Helper function to create a worker cached copy of a Variable.

    This assigns the var (either a single Variable or a list of Variables) to
    local transient cache Variable(s). Note that if var is a list of Variables,
    the assignment is done sequentially to minimize the memory overheads.
    Also note that if pass_through is set to True, this does not create new
    Variables but simply return the input back.

    Args:
      var: A Variable or a list of Variables to cache.
      name: name of cached Variable.
      pass_through: when set to True, this simply pass through the var back
        through identity operator and does not actually creates a cache.

    Returns:
      Tuple consisting of following three entries:
      cache: the new transient Variable or list of transient Variables
        corresponding one-to-one with var.
      cache_init: op to initialize the Variable or the list of Variables.
      cache_reset: op to reset the Variable or the list of Variables to some
        default value.
    """
    if var is None:
      return None, None, None
    elif pass_through:
      cache = var
      cache_init = tf.no_op()
      cache_reset = tf.no_op()
    elif isinstance(var, tf.Variable):
      cache = WALSModel._transient_var(name=name)
      with ops.colocate_with(cache):
        cache_init = tf.assign(cache, var, validate_shape=False)
        cache_reset = tf.assign(cache, 1.0, validate_shape=False)
    else:
      assert isinstance(var, list)
      assert var
      cache = [WALSModel._transient_var(name='%s_shard_%d' % (name, i))
               for i in xrange(len(var))]
      reset_ops = []
      for i, c in enumerate(cache):
        with ops.colocate_with(c):
          if i == 0:
            cache_init = tf.assign(c, var[i], validate_shape=False)
          else:
            with ops.control_dependencies([cache_init]):
              cache_init = tf.assign(c, var[i], validate_shape=False)
          reset_ops.append(tf.assign(c, 1.0, validate_shape=False))
      cache_reset = tf.group(*reset_ops)

    return cache, cache_init, cache_reset
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to data directory")

    config = get_config()
    eval_config = get_config()
    # eval_config.batch_size = 1
    # eval_config.num_steps = 1

    raw_data, vocab_size = reader.converted_data(FLAGS.data_path, max_len=config.num_steps, min_nwords=200)
    config.vocab_size = vocab_size
    eval_config.vocab_size = vocab_size

    train_data, valid_data, test_data = reader.split_rawdata(raw_data)

    sess = tf.InteractiveSession()

    if os.path.exists(FLAGS.log_dir):
        shutil.rmtree(FLAGS.log_dir)
    writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph_def)

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = GenderModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = GenderModel(is_training=False, config=config)
            mtest = GenderModel(is_training=False, config=eval_config)

        tf.initialize_all_variables().run()

        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_error, train_acc, summary = run_epoch(session, m, train_data, m.train_op, verbose=True)
            writer.add_summary(summary, i)
            print("Epoch: %d Train xentorpy: %.3f" % (i + 1, train_error))
            print("Epoch: %d Train accuracy: %.3f" % (i + 1, train_acc))

            valid_error, valid_acc, summary = run_epoch(session, mvalid, valid_data, tf.no_op())
            print("Epoch: %d Validation xentropy: %.3f" % (i + 1, valid_error))
            print("Epoch: %d Validation accuracy: %.3f" % (i + 1, valid_acc))

        test_err, test_acc, summary = run_epoch(session, mtest, test_data, tf.no_op())
        print("Test Accuracy %.3f" % test_acc)
 def testReuseVars(self):
   height, width = 3, 3
   with self.test_session() as sess:
     image_shape = (10, height, width, 3)
     image_values = np.random.rand(*image_shape)
     expected_mean = np.mean(image_values, axis=(0, 1, 2))
     expected_var = np.var(image_values, axis=(0, 1, 2))
     images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
     output = ops.batch_norm(images, decay=0.1, is_training=False)
     update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
     with tf.control_dependencies(update_ops):
       barrier = tf.no_op(name='gradient_barrier')
       output = control_flow_ops.with_dependencies([barrier], output)
     # Initialize all variables
     sess.run(tf.global_variables_initializer())
     moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
     moving_variance = variables.get_variables('BatchNorm/moving_variance')[0]
     mean, variance = sess.run([moving_mean, moving_variance])
     # After initialization moving_mean == 0 and moving_variance == 1.
     self.assertAllClose(mean, [0] * 3)
     self.assertAllClose(variance, [1] * 3)
     # Simulate assigment from saver restore.
     init_assigns = [tf.assign(moving_mean, expected_mean),
                     tf.assign(moving_variance, expected_var)]
     sess.run(init_assigns)
     for _ in range(10):
       sess.run([output], {images: np.random.rand(*image_shape)})
     mean = moving_mean.eval()
     variance = moving_variance.eval()
     # Although we feed different images, the moving_mean and moving_variance
     # shouldn't change.
     self.assertAllClose(mean, expected_mean)
     self.assertAllClose(variance, expected_var)
Exemple #18
0
def namignize(names, checkpoint_path, config):
    """Recognizes names and prints the Perplexity of the model for each names
    in the list

    Args:
        names: a list of names in the model format
        checkpoint_path: the path to restore the trained model from, should not
            include the model name, just the path to
        config: one of the above configs that specify the model and how it
            should be run and trained
    Returns:
        None
    """
    with tf.Graph().as_default(), tf.Session() as session:

        with tf.variable_scope("model"):
            m = NamignizerModel(is_training=False, config=config)

        m.saver.restore(session, checkpoint_path)

        for name in names:
            x, y = data_utils.name_to_batch(name, m.batch_size, m.num_steps)

            cost, loss, _ = session.run([m.cost, m.loss, tf.no_op()],
                                  {m.input_data: x,
                                   m.targets: y,
                                   m.weights: np.concatenate((
                                       np.ones(len(name)), np.zeros(m.batch_size * m.num_steps - len(name))))})

            print("Name {} gives us a perplexity of {}".format(
                name, np.exp(cost)))
Exemple #19
0
def moving_average(value, window):
    value = tf.to_float(value)
    shape = value.get_shape()

    queue_init = tf.zeros(tf.TensorShape(window).concatenate(shape))
    total_init = tf.zeros(shape)
    num_init = tf.constant(0, dtype=tf.float32)

    queue = tf.FIFOQueue(window, [tf.float32], shapes=[shape])
    total = tf.Variable(total_init, trainable=False)
    num = tf.Variable(num_init, trainable=False)

    init = tf.cond(
        tf.equal(queue.size(), 0),
        lambda: tf.group(
            queue.enqueue_many(queue_init),
            total.assign(total_init),
            num.assign(num_init)),
        lambda: tf.no_op())

    with tf.control_dependencies([init]):
        total_ = total + value - queue.dequeue()
        num_ = num + 1
        value_averaged = total_ / (tf.minimum(num_, window) + EPSILON)

        with tf.control_dependencies([queue.enqueue([value]), total.assign(total_), num.assign(num_)]):
            return tf.identity(value_averaged)
 def testName(self):
     with tf.name_scope("scope"):
         queue = tf.FIFOQueue(10, tf.float32, name="queue")
     qr = tf.train.QueueRunner(queue, [tf.no_op()])
     self.assertEqual("scope/queue", qr.name)
     tf.train.add_queue_runner(qr)
     self.assertEqual(1, len(tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS, "scope")))
 def run_epoch(self, session, data, train_op=None, verbose=10):
   config = self.config
   dp = config.dropout
   if not train_op:
     train_op = tf.no_op()
     dp = 1.0
   total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
   total_loss = []
   state = self.initial_state.eval()
   for step, (x, y) in enumerate(
     ptb_iterator(data, config.batch_size, config.num_steps)):
     # We need to pass in the initial state and retrieve the final state to give
     # the RNN proper history
     feed = {self.input_placeholder: x,
             self.labels_placeholder: y,
             self.initial_state: state,
             self.dropout_placeholder: dp}
     loss, state, _ = session.run(
         [self.calculate_loss, self.final_state, train_op], feed_dict=feed)
     total_loss.append(loss)
     if verbose and step % verbose == 0:
         sys.stdout.write('\r{} / {} : pp = {}'.format(
             step, total_steps, np.exp(np.mean(total_loss))))
         sys.stdout.flush()
   if verbose:
     sys.stdout.write('\r')
   return np.exp(np.mean(total_loss))
def train(total_loss, global_step):
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True)
    tf.scalar_summary("learning_rate", lr)

    loss_averages_op = _add_loss_summaries(total_loss)

    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    for grad, var in grads:
        if grad:
            tf.histogram_summary(var.op.name + "/gradients", grad)

    #variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    #variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op]):
        train_op = tf.no_op(name="train")

    return train_op
	def run_epoch(self, session, data, train_op=None, verbose=10):
		config=self.config
		dp=config.dropout
		if not train_op:
			train_op=tf.no_op()
			dp=1
			
		total_steps=sum(1 for x in data_iterator(data, config.batch_size))
		total_loss=[]
		# for rnn
		#state=self.initial_state.eval()
		for step, (x, y) in enumerate(
				data_iterator(data, config.batch_size)):
			feed={self.input_placeholder: x,
					self.labels_placeholder: y,
					#self.initial_state: state, # for rnn
					self.dropout_placeholder: dp}
			loss, state, _ = session.run(
					[self.loss, self.final_state, train_op], feed_dict=feed)
			total_loss.append(loss)
			if verbose and step % verbose == 0:
				sys.stdout.write('\r{} / {} : loss = {}'.format(
					step, total_steps, np.mean(total_loss)))
				sys.stdout.flush()
		if verbose:
			sys.stdout.write('\r')
		return loss
    def train(self, total_loss):
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        losses = tf.get_collection('losses')
        loss_averages_op = loss_averages.apply(losses + [total_loss])

        for l in losses + [total_loss]:
            tf.scalar_summary(l.op.name + ' (raw)', l)

        # Apply gradients, and add histograms
        with tf.control_dependencies([loss_averages_op]):
            opt = tf.train.AdamOptimizer()
            grads = opt.compute_gradients(total_loss)
        apply_gradient_op = opt.apply_gradients(grads)
        for var in tf.trainable_variables():
            tf.histogram_summary(var.op.name, var)
        for grad, var in grads:
            if grad is not None:
                tf.histogram_summary(var.op.name + '/gradients', grad)

        # Track the moving averages of all trainable variables
        variable_averages = tf.train.ExponentialMovingAverage(Recognizer.MOVING_AVERAGE_DECAY)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
            train_op = tf.no_op(name='train')
        return train_op
 def testComputeMovingVars(self):
   height, width = 3, 3
   with self.test_session() as sess:
     image_shape = (10, height, width, 3)
     image_values = np.random.rand(*image_shape)
     expected_mean = np.mean(image_values, axis=(0, 1, 2))
     expected_var = np.var(image_values, axis=(0, 1, 2))
     images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
     output = ops.batch_norm(images, decay=0.1)
     update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
     with tf.control_dependencies(update_ops):
       barrier = tf.no_op(name='gradient_barrier')
       output = control_flow_ops.with_dependencies([barrier], output)
     # Initialize all variables
     sess.run(tf.global_variables_initializer())
     moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
     moving_variance = variables.get_variables('BatchNorm/moving_variance')[0]
     mean, variance = sess.run([moving_mean, moving_variance])
     # After initialization moving_mean == 0 and moving_variance == 1.
     self.assertAllClose(mean, [0] * 3)
     self.assertAllClose(variance, [1] * 3)
     for _ in range(10):
       sess.run([output])
     mean = moving_mean.eval()
     variance = moving_variance.eval()
     # After 10 updates with decay 0.1 moving_mean == expected_mean and
     # moving_variance == expected_var.
     self.assertAllClose(mean, expected_mean)
     self.assertAllClose(variance, expected_var)
 def model_fn(features, targets):
   # dummy variable:
   _ = tf.Variable([0.])
   _ = targets
   predictions = features["x"]
   loss = tf.constant([2.])
   return predictions, loss, tf.no_op()
def build_model(x, y_, n_workers, is_chief):
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    y = mnist_inference.inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)

    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, global_step, 60000 / BATCH_SIZE, LEARNING_RATE_DECAY)
   
    # 通过tf.train.SyncReplicasOptimizer函数实现同步更新。
    opt = tf.train.SyncReplicasOptimizer(
        tf.train.GradientDescentOptimizer(learning_rate),
        replicas_to_aggregate=n_workers,
        total_num_replicas=n_workers)

    train_op = opt.minimize(loss, global_step=global_step)     
    if is_chief:
        variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())
        with tf.control_dependencies([variables_averages_op, train_op]):
            train_op = tf.no_op()

    return global_step, loss, train_op, opt
Exemple #28
0
def main(unused_args):

  with tf.Graph().as_default(), tf.Session() as session:
    config = BaseConfig()
    testConfig = TestConfig()
    init_scale = 1.0 // np.sqrt(config.n_hidden)
    initializer = tf.random_uniform_initializer(-init_scale, init_scale)
    initializer = tf.random_normal_initializer(0.0, 1.0, None)
    with tf.variable_scope("model", reuse=None, initializer=initializer):
      model = RNNModel(True, config)
    with tf.variable_scope("model", reuse=True, initializer=initializer):
        testModel = RNNModel(False, testConfig)

    tf.initialize_all_variables().run()
    x_data = generateTestPattern(200, 0.2, 2.0, 0.2)
    naiveInError = getNaiveError(x_data)

    test_data = generateTestPattern(200, 0.2, 2.0, 0.2)
    test_data = test_data[20:len(test_data)]
    naiveTestError = getNaiveError(test_data)

    for i in range(config.max_epoch):
        # lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
        # model.assign_lr(session, config.learning_rate * lr_decay)

        cost = run_epoch(session, model, x_data, model.train_op) / naiveInError
        if i % 20 == 0:
            print ("cost", cost)

    cost = run_epoch(session, model, x_data, model.train_op) / naiveInError
    test_cost = run_epoch(session, testModel, test_data, tf.no_op(), True) / naiveTestError

    print("final cost", cost, "test_cost", test_cost)
Exemple #29
0
 def test_dequeue(self):
   p = plan.TrainPlan()
   p.compiler = block_compiler.Compiler().compile(blocks.Scalar())
   p.is_chief_trainer = True
   p.batch_size = 3
   p.batches_per_epoch = 2
   p.queue_capacity = 12
   p.num_dequeuers = 1
   p.ps_tasks = 1
   q = p._create_queue(0)
   p._setup_dequeuing([q])
   input_batch = list(p.compiler.build_loom_inputs([7])) * 3
   q_enqueue = q.enqueue_many([input_batch * 4])
   p.losses['foo'], = p.compiler.output_tensors
   p.train_op = tf.no_op()
   p.finalize_stats()
   p.logdir = self.get_temp_dir()
   p.epochs = 2
   p.print_file = six.StringIO()
   init_op = tf.global_variables_initializer()
   sv = p.create_supervisor()
   with self.test_session() as sess:
     sess.run(init_op)
     sess.run(q_enqueue)
     p.run(sv, sess)
   expected = '\n'.join(['running train',
                         'train_size: 6',
                         'epoch:    1 train[loss: 7.000e+00]',
                         'epoch:    2 train[loss: 7.000e+00]',
                         'final model saved in file: %s' % p.logdir])
   log_str = p.print_file.getvalue()
   self.assertIn(expected, log_str)
def train(mnist):
    x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name="x-input")
    y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name="y-input")
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = mnist_inference.inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)

    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))

    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
                                               global_step,
                                               mnist.train.num_examples/BATCH_SIZE,
                                               LEARNING_RATE_DECAY)
    train_step = tf.train.GradientDescentOptimizer(learning_rate)\
        .minimize(loss, global_step=global_step)
    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name='train')
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, setp = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
            if i % 1000 == 0:
                print("%d 训练后,损失值为 %g" % (i, loss_value))
                saver.save(sess, MODEL_SAVE_PATH, global_step=global_step)
Exemple #31
0
def run_training():
    # Get the sets of images and labels for training, validation, and
    # Tell TensorFlow that the model will be built into the default Graph.

    # Create model directory
    print('loading and init vgg16.........')
    vgg = vgg16.Vgg16()
    with tf.Graph().as_default():
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        images_placeholder, sc_labels_placeholder, ac_labels_placeholder, mc_labels_placeholder, keep_pro = placeholder_inputs(
            FLAGS.batch_size * gpu_num)
        tower_grads1 = []
        tower_grads2 = []
        tower_grads3 = []
        multi_logits = []

        learning_rate = tf.train.exponential_decay(
            1e-4,
            global_step,
            decay_steps=FLAGS.max_steps / 50,
            decay_rate=0.99,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)
        opt_multi = tf.train.AdamOptimizer(learning_rate)
        with tf.variable_scope('var_name') as var_scope:

            multi_fea_weights = {
                'w1':
                _variable_with_weight_decay('multi_w1', [4096, 2048], 0.005),
                'out':
                _variable_with_weight_decay('multi_feawout', [2048, 456],
                                            0.005)
            }
            multi_fea_biases = {
                'b1': _variable_with_weight_decay('multi_b1', [2048], 0.000),
                'out': _variable_with_weight_decay('multi_feabout', [456],
                                                   0.000),
            }
            sc_fea_weights = {
                'w1':
                _variable_with_weight_decay('sc_w1', [4096, 2048], 0.005),
                'out':
                _variable_with_weight_decay('sc_feawout', [2048, 100], 0.005)
            }
            sc_fea_biases = {
                'b1': _variable_with_weight_decay('sc_b1', [2048], 0.000),
                'out': _variable_with_weight_decay('sc_feabout', [100], 0.000),
            }
            ac_fea_weights = {
                'w1':
                _variable_with_weight_decay('ac_w1', [4096, 2048], 0.005),
                'out':
                _variable_with_weight_decay('ac_feawout', [2048, 100], 0.005)
            }
            ac_fea_biases = {
                'b1': _variable_with_weight_decay('ac_b1', [2048], 0.000),
                'out': _variable_with_weight_decay('ac_feabout', [100], 0.000),
            }
            mc_fea_weights = {
                'w1':
                _variable_with_weight_decay('mc_w1', [4096, 2048], 0.005),
                'out':
                _variable_with_weight_decay('mc_feawout', [2048, 256], 0.005)
            }
            mc_fea_biases = {
                'b1': _variable_with_weight_decay('mc_b1', [2048], 0.000),
                'out': _variable_with_weight_decay('mc_feabout', [256], 0.000),
            }

        for gpu_index in range(0, gpu_num):
            with tf.device('/gpu:%d' % gpu_index):

                varlist1 = [
                    multi_fea_weights.values(),
                    multi_fea_biases.values()
                ]

                vgg.build(images_placeholder[gpu_index *
                                             FLAGS.batch_size:(gpu_index + 1) *
                                             FLAGS.batch_size, :, :, :])
                train_features = vgg.fc7

                multi_logit = model.get_predict(train_features, keep_pro,
                                                FLAGS.batch_size,
                                                multi_fea_weights,
                                                multi_fea_biases)

                loss_name_scope = ('gpud_%d_loss' % gpu_index)

                multi_loss = tower_loss(
                    'multi', multi_logit,
                    sc_labels_placeholder[gpu_index *
                                          FLAGS.batch_size:(gpu_index + 1) *
                                          FLAGS.batch_size],
                    ac_labels_placeholder[gpu_index *
                                          FLAGS.batch_size:(gpu_index + 1) *
                                          FLAGS.batch_size],
                    mc_labels_placeholder[gpu_index *
                                          FLAGS.batch_size:(gpu_index + 1) *
                                          FLAGS.batch_size])
                grads1 = opt_multi.compute_gradients(multi_loss, varlist1)
                tower_grads1.append(grads1)
                multi_logits.append(multi_logit)

        multi_logits = tf.concat(multi_logits, 0)
        sc_logits = tf.slice(multi_logits, [0, 0], [6, 100])
        sc_predictions = tf.nn.top_k(tf.nn.softmax(sc_logits), 5)
        sc_accuracy = topk_acc(sc_logits, sc_labels_placeholder, 5)
        #sc_accuracy = tower_acc(sc_logits, sc_labels_placeholder)
        tf.summary.scalar('sc_accuracy', sc_accuracy)
        ac_logits = tf.slice(multi_logits, [0, 100], [6, 100])
        ac_predictions = tf.nn.top_k(tf.nn.softmax(ac_logits), 5)
        ac_accuracy = topk_acc(ac_logits, ac_labels_placeholder, 5)
        #ac_accuracy = tower_acc(ac_logits, ac_labels_placeholder)
        tf.summary.scalar('ac_accuracy', ac_accuracy)
        mc_logits = tf.slice(multi_logits, [0, 200], [6, 256])
        mc_predictions = tf.nn.top_k(tf.nn.softmax(mc_logits), 5)
        mc_accuracy = topk_acc(mc_logits, mc_labels_placeholder, 5)
        #mc_accuracy = tower_acc(mc_logits, mc_labels_placeholder)
        tf.summary.scalar('mc_accuracy', mc_accuracy)

        grads1 = average_gradients(tower_grads1)

        apply_gradient_multi = opt_multi.apply_gradients(
            grads1, global_step=global_step)

        train_multi = tf.group(apply_gradient_multi)

        null_op = tf.no_op()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver(multi_fea_weights.values() +
                               multi_fea_biases.values())
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph.
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)

    ckpt = tf.train.get_checkpoint_state(pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print "loading checkpoint,waiting......"
        saver.restore(sess, ckpt.model_checkpoint_path)
        print "load complete!"
    next_strat_pos = 0
    sc_predict_labels = []
    ac_predict_labels = []
    mc_predict_labels = []
    for step in xrange(FLAGS.max_steps):

        start_time = time.time()
        print('TEST Data Eval:')
        val_actions, val_images, val_ac_labels, val_sc_labels, val_mc_labels, next_strat_pos, _ = input_data(
            filename='./list/test.list',
            start_pos=next_strat_pos,
            batch_size=FLAGS.batch_size * gpu_num,
            shuffle=False)

        sc_predict, ac_predict, mc_predict, sc_acc, ac_acc, mc_acc = sess.run(
            [
                sc_predictions, ac_predictions, mc_predictions, sc_accuracy,
                ac_accuracy, mc_accuracy
            ],
            feed_dict={
                images_placeholder: val_images,
                ac_labels_placeholder: val_ac_labels,
                sc_labels_placeholder: val_sc_labels,
                mc_labels_placeholder: val_mc_labels,
                keep_pro: 1
            })
        #print (ac_predict)
        for i in range(FLAGS.batch_size):
            sc_predict_labels.append(sc_predict[1][i])
            ac_predict_labels.append(ac_predict[1][i])
            mc_predict_labels.append(mc_predict[1][i])

        duration = time.time() - start_time
        print('Batchnum %d: %.3f sec' % (step + 1, duration))
        #print predict_labels
        #print val_mc_labels

    print("get_predict_label_done!")
    return sc_predict_labels, ac_predict_labels, mc_predict_labels
Exemple #32
0
def train(logits,
          images_tensor,
          labels_tensor,
          is_training_tensor,
          iterator_num,
          summary_path='./log',
          restore=None):
    cross_id = 1
    roi_dir = '/home/give/Documents/dataset/MICCAI2018/Slices/crossvalidation'
    pre_load = True
    train_dataset = DataSet(os.path.join(roi_dir, str(cross_id), 'train'),
                            'train',
                            pre_load=pre_load,
                            rescale=True,
                            divied_liver=False)
    val_dataset = DataSet(os.path.join(roi_dir, str(cross_id), 'val'),
                          'val',
                          pre_load=pre_load,
                          rescale=True,
                          divied_liver=False)
    train_batchdata = train_dataset.get_next_batch(net_config.BATCH_SIZE)
    val_batchdata = val_dataset.get_next_batch(net_config.BATCH_SIZE)

    predicted_tensor = tf.argmax(logits, 1)
    global_step_tensor = tf.Variable(initial_value=0, trainable=False)
    softmax_loss = loss(logits, labels_tensor)
    loss_tensor = softmax_loss
    tf.summary.scalar('softmax loss', softmax_loss)
    tf.summary.scalar('loss', loss_tensor)
    train_step = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(
        loss_tensor, global_step=global_step_tensor)
    with tf.control_dependencies([train_step]):
        train_op = tf.no_op('train')

    correct_prediction = tf.equal(tf.argmax(logits, 1),
                                  tf.cast(tf.squeeze(labels_tensor), tf.int64))
    accuracy_tensor = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar('accuracy', accuracy_tensor)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        if restore is not None:
            full_path = tf.train.latest_checkpoint(restore['path'])
            print 'load model from ', full_path
            saver.restore(sess, full_path)
        train_summary_writer = tf.summary.FileWriter(os.path.join(
            summary_path, 'train'),
                                                     graph=sess.graph)
        val_summary_writer = tf.summary.FileWriter(os.path.join(
            summary_path, 'val'),
                                                   graph=sess.graph)
        merged_summary_op = tf.summary.merge_all()
        for i in range(iterator_num):
            step_value = sess.run(global_step_tensor)

            train_expand_roi_batch_images, train_labels = train_batchdata.next(
            )
            # 反向传播的同时更新center value
            _, train_acc, train_prediction, loss_value, merged_summary_value, softmax_loss_value = sess.run(
                [
                    train_op, accuracy_tensor, predicted_tensor, loss_tensor,
                    merged_summary_op, softmax_loss
                ],
                feed_dict={
                    images_tensor: train_expand_roi_batch_images,
                    labels_tensor: train_labels,
                    is_training_tensor: True,
                })
            train_summary_writer.add_summary(merged_summary_value,
                                             global_step=step_value)
            if step_value % 1000 == 0:
                val_expand_roi_batch_images, val_labels = val_batchdata.next()
                validation_acc, loss_value, merged_summary_value = sess.run(
                    [accuracy_tensor, loss_tensor, merged_summary_op],
                    feed_dict={
                        images_tensor: val_expand_roi_batch_images,
                        labels_tensor: val_labels,
                        is_training_tensor: False,
                    })
                val_summary_writer.add_summary(merged_summary_value,
                                               step_value)
                print 'step: %d, validation accuracy: %.2f, validation loss: %.2f' % (
                    step_value, validation_acc, loss_value)
                save_model_path = os.path.join('./parameters/', str(cross_id))
                checkpoint_path = os.path.join(save_model_path, 'model.ckpt')
                saver.save(sess,
                           checkpoint_path,
                           global_step=global_step_tensor)
                save_dir = os.path.join(save_model_path, str(step_value))
                if not os.path.exists(save_dir):
                    os.mkdir(save_dir)
                filenames = glob(
                    os.path.join(save_model_path,
                                 '*-' + str(int(step_value + 1)) + '.*'))
                for filename in filenames:
                    shutil.copy(
                        filename,
                        os.path.join(save_dir, os.path.basename(filename)))
            if step_value % 100 == 0:
                print 'step: %d, training accuracy: %.2f, training loss: %.2f, softmax_loss_value: %.2f' % (
                    step_value, train_acc, loss_value, softmax_loss_value)
                # print centers_value
        train_summary_writer.close()
        val_summary_writer.close()
def train_gan(train_set,
              indices: List,
              samples_per_N: int,
              repetition_n: int,
              identifier: str,
              experiment_name: str,
              batch_size: int = 256,
              desired_epochs: int = 2000,
              use_bot=False):
    """
    The GAN is trained for 1000 epochs. If a a set of 60k samples is trained with a batchsize of 256,
    then a epoch equals 226 iterations. A budget of 100,000 iterations would equals to 426

    """
    assert train_set.shape[0] > len(indices)

    print(train_set.shape)
    print(len(indices))

    my_ds = DataSetManager(train_set[indices])

    # print("Set number of iterations to train\n")
    v5 = (desired_epochs * (train_set[indices].shape[0])) // batch_size + 1

    print("ITERS " + str(v5))
    print("SIZE " + str(train_set[indices].shape))

    # print("Use pretrained model? (0 means No, some number different to 0 means yes)\n")
    decision_number = 0  #int( input() )

    # print("Type a name to save the model with?\n")
    model_tag = str(round(samples_per_N)) + '_' + str(repetition_n)

    storing_path = 'data/' + experiment_name + "/" + model_tag + '_data/'
    model_path = storing_path + model_tag + '.ckpt'

    # Recall that os.mkdir isn't recursive, so it only makes on directoryt at a time
    try:
        # Create target Directory
        os.mkdir(storing_path)
        print("Directory ", storing_path, " Created ")
    except FileExistsError:
        print("Directory ", storing_path, " already exists")

    # ===> Auxiliar functions <===
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """
    def save_history(files_prefix, gen_loss_record, disc_loss_record,
                     jsd_error, current_epoch, epoch_record, my_ds, iter_,
                     epochs, global_iters, BATCH_SIZE, low_lr, high_lr):
        # Save losses per epoch

        df = pd.DataFrame(np.array(gen_loss_record))
        with open(files_prefix + '_gen_loss.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        df = pd.DataFrame(np.array(disc_loss_record))
        with open(files_prefix + '_disc_loss.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        df = pd.DataFrame(np.array(epoch_record))
        with open(files_prefix + '_epoch_record.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        # Save current iter and epochs

        training_history = {
            'epochs': [epochs + my_ds.epochs_completed],
            'iters': [global_iters + iter_],
            'Batch Size': [BATCH_SIZE],
            'low LR': [low_lr],
            'high LR': [high_lr]
        }
        df = pd.DataFrame(training_history)

        with open(files_prefix + '_training.csv', 'w+') as f:
            df.to_csv(f, index=False)  #, header=False, index=False

        with open(files_prefix + '_jsd_error.csv', 'a') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerow([current_epoch, jsd_error])

    def send_bot_message(bot, my_ds, iter_, ITERS, identifier):
        """ 
        Not quite straighforward since the critic draws many more samples.

        """

        message = "\nEpochs [" + str(
            my_ds.epochs_completed) + "] Iter: " + str(iter_) + ";\t" + str(
                np.round(100 * iter_ / ITERS, 2)) + "% "
        message = message + identifier
        print(message)
        bot.set_status(message)
        # Send update message
        if bot.verbose:
            bot.send_message(message)

        print("\n")

    def save_gen_samples(gen_op, disc_op, sess, path, k, n=4):
        """
        k: is the number of epochs used to trained the generator
        n: is the number of batches to draw samples
        """

        suffix = '_gen_samples_' + str(k) + '_epochs_' + '.csv'

        for k in range(n):

            samples = sess.run(gen_op)
            df = pd.DataFrame(np.array(samples))
            with open(path + suffix, 'a') as f:
                df.to_csv(f, header=False, index=False)

            # Score the samples using the critic
            scores = sess.run(disc_op)
            df = pd.DataFrame(np.array(scores))
            with open(path + 'scores_' + suffix, 'a') as f:
                df.to_csv(f, header=False, index=False)

    # ===> Model Parameters <===
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """

    DIM = 512  # model dimensionality
    GEN_DIM = 100  # output dimension of the generator
    DIS_DIM = 1  # outptu dimension fo the discriminator
    FIXED_GENERATOR = False  # wheter to hold the generator fixed at ral data plus Gaussian noise, as in the plots in the paper
    LAMBDA = .1  # smaller lambda makes things faster for toy tasks, but isn't necessary if you increase CRITIC_ITERS enough
    BATCH_SIZE = batch_size  # batch size
    ITERS = v5  #100000 # how many generator iterations to train for
    FREQ = 250  # sample frequency

    print("==>>Using batch size of " + str(BATCH_SIZE))
    CRITIC_ITERS = 5  # homw many critic iteractions per generator iteration

    def Generator_Softmax(n_samples, name='gen'):

        with tf.variable_scope(name):
            noise = tf.random_normal([n_samples, GEN_DIM])
            output01 = tf_utils.linear(noise, 2 * DIM, name='fc-1')
            output01 = tf_utils.relu(output01, name='relu-1')

            output02 = tf_utils.linear(output01, 2 * DIM, name='fc-2')
            output02 = tf_utils.relu(output02, name='relu-2')

            output03 = tf_utils.linear(output02, 2 * DIM, name='fc-3')
            output03 = tf_utils.relu(output03, name='relu-3')

            output04 = tf_utils.linear(output03, GEN_DIM, name='fc-4')

            # Reminder: a logit can be modeled as a linear function of the predictors
            output05 = tf.nn.softmax(output04, name='softmax-1')

            return output05

    def Discriminator(inputs, is_reuse=True, name='disc'):
        with tf.variable_scope(name, reuse=is_reuse):
            print('is_reuse: {}'.format(is_reuse))
            output01 = tf_utils.linear(inputs, 2 * DIM, name='fc-1')
            output01 = tf_utils.relu(output01, name='relu-1')

            output02 = tf_utils.linear(output01, 2 * DIM, name='fc-2')
            output02 = tf_utils.relu(output02, name='relu-2')

            output03 = tf_utils.linear(output02, 2 * DIM, name='fc-3')
            output03 = tf_utils.relu(output03, name='relu-3')

            output04 = tf_utils.linear(output03, DIS_DIM, name='fc-4')

            return output04

    real_data = tf.placeholder(tf.float32, shape=[None, GEN_DIM])
    fake_data = Generator_Softmax(BATCH_SIZE)

    disc_real = Discriminator(real_data, is_reuse=False)
    disc_fake = Discriminator(fake_data)

    disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
    gen_cost = -tf.reduce_mean(disc_fake)

    # WGAN gradient penalty parameters

    alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.)
    interpolates = alpha * real_data + (1. - alpha) * fake_data
    disc_interpolates = Discriminator(interpolates)
    gradients = tf.gradients(disc_interpolates, [interpolates][0])
    slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
                                   reduction_indices=[1]))
    gradient_penalty = tf.reduce_mean((slopes - 1)**2)

    disc_cost += LAMBDA * gradient_penalty

    disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  scope='disc')
    gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gen')

    disc_lr = tf.placeholder(tf.float32, shape=())  # 1e-4
    gen_lr = tf.placeholder(tf.float32, shape=())  # 1e-4

    disc_train_op = tf.train.AdamOptimizer(learning_rate=disc_lr,
                                           beta1=0.5,
                                           beta2=0.9).minimize(
                                               disc_cost, var_list=disc_vars)

    if len(gen_vars) > 0:
        gen_train_op = tf.train.AdamOptimizer(learning_rate=gen_lr,
                                              beta1=0.5,
                                              beta2=0.9).minimize(
                                                  gen_cost, var_list=gen_vars)
    else:
        gen_train_op = tf.no_op()
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """
    # ===> Model Parameters <===

    session_saver = tf.train.Saver()

    # files_prefix = 'model/'+ model_tag

    if decision_number == 0:
        pre_trained = False

        gen_loss_record = []  # type: List[float]
        disc_loss_record = []  # type: List[float]
        epoch_record = []  # type: List[float]

        epochs = 0
        global_iters = 0

        df = pd.DataFrame(np.array(indices))
        with open(storing_path + 'training_indices.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

    else:
        pre_trained = True

        print(storing_path)
        print(storing_path + 'training_indices.csv')
        _indices = (pd.read_csv(storing_path + 'training_indices.csv',
                                header=None).values).tolist()

        print(len(_indices))
        print(train_set[indices].shape)
        print(train_set[_indices].squeeze().shape)
        assert train_set[_indices].squeeze().shape == train_set[indices].shape
        my_ds = DataSetManager(train_set[_indices].squeeze())

        temp = pd.read_csv(storing_path + '_training.csv', header=None).values

        epochs, global_iters = temp.flatten()

        my_ds.epochs_completed = epochs

        gen_loss_record = (pd.read_csv(storing_path + '_gen_loss.csv',
                                       header=None).values).tolist()
        disc_loss_record = (pd.read_csv(storing_path + '_disc_loss.csv',
                                        header=None).values).tolist()
        epoch_record = (pd.read_csv(storing_path + '_epoch_record.csv',
                                    header=None).values).tolist()

        print("State has been restored")

    # Create a DLBot instance

    if use_bot:
        bot = DLBot(token=telegram_token, user_id=telegram_user_id)
        # Activate the bot
        bot.activate_bot()

    print("\nTelegram bot has been activated ")

    iters_per_epoch = my_ds.num_examples / BATCH_SIZE

    total_iters = int(
        np.ceil((desired_epochs * iters_per_epoch) / CRITIC_ITERS))

    critic_iters = np.round((5 / 6) * total_iters)
    gen_iters = np.round((1 / 6) * total_iters)

    ITERS = total_iters

    # Train loop
    with tf.Session() as sess:

        if pre_trained == False:  # false by default:
            sess.run(tf.global_variables_initializer())
        if pre_trained == True:

            session_saver.restore(sess, model_path)
        #
        # DUCK TAPE SOLUTION
        iter_ = 0
        """
        while my_ds.epochs_completed < desired_epochs:
            iter_ +=1
        """
        # r=10**-4.72, max_lr=10**-3.72,
        lr_multiplier: int = 1
        low_lr = 10**-5
        high_lr = 10**-4

        lr1 = low_lr  # lr_multiplier*low_lr
        lr2 = low_lr  #lr_multiplier*high_lr

        gen_lr_ = CyclicLR(base_lr=lr1, max_lr=lr2, step_size=gen_iters)
        disc_lr_ = CyclicLR(base_lr=lr1, max_lr=lr2, step_size=critic_iters)

        for iter_ in range(ITERS):
            batch_data, disc_cost_ = None, None

            previous_epoch = my_ds.epochs_completed

            # train critic
            for i_ in range(CRITIC_ITERS):
                batch_data = my_ds.next_batch(
                    BATCH_SIZE)  # data_gen.__next__()
                disc_cost_, _ = sess.run([disc_cost, disc_train_op],
                                         feed_dict={
                                             real_data: batch_data,
                                             disc_lr: disc_lr_.clr()
                                         })
                disc_lr_.on_batch_end()

            # train generator
            sess.run(gen_train_op, feed_dict={gen_lr: gen_lr_.clr()})
            gen_lr_.on_batch_end()

            gen_cost2 = sess.run(gen_cost)

            current_epoch = my_ds.epochs_completed

            condition2 = current_epoch % 5 == 0
            if current_epoch > previous_epoch and condition2:
                disc_loss_record.append(disc_cost_)
                gen_loss_record.append(gen_cost2)
                epoch_record.append(my_ds.epochs_completed)
                # print("Diff "+str(current_epoch - previous_epoch))

            if (np.mod(iter_, FREQ) == 0) or (iter_ + 1 == ITERS):
                """
                print("===> Debugging")
                print(disc_loss_record)
                print(gen_loss_record)
                """
                if use_bot:
                    bot.loss_hist.append(disc_cost_)

                fake_samples = sess.run(
                    fake_data)  # , feed_dict={real_data: batch_data}
                # print("\n==> Sum-Simplex condition: " +str(np.sum(fake_samples, axis=1)))
                fake_population = np.array([
                    sess.run(fake_data) for k in range(40)
                ]).reshape(40 * batch_size, train_set.shape[1])

                print(fake_population.shape)
                jsd_error = gan_error_all_species(fake_population, k3_test_set)

                print("JSD Error " + str(jsd_error))

                message = "\nEpochs [" + str(
                    my_ds.epochs_completed) + "] Iter: " + str(
                        iter_) + ";\t" + str(np.round(100 * iter_ / ITERS,
                                                      2)) + "% "
                message = message + identifier
                print(message)

                if use_bot:
                    send_bot_message(bot, my_ds, iter_, ITERS, identifier)

                current_epoch = my_ds.epochs_completed

                session_saver.save(sess, model_path)
                save_history(storing_path, gen_loss_record, disc_loss_record,
                             jsd_error, current_epoch, epoch_record, my_ds,
                             iter_, epochs, global_iters, BATCH_SIZE, low_lr,
                             high_lr)

                # save_gen_samples(fake_data, disc_fake ,sess, storing_path, k) # fake_data = Generator_Softmax(BATCH_SIZE)

            utils.tick()  #  _iter[0] += 1

        if iter_ == ITERS:
            session_saver.save(sess, model_path)

        # Create gan samples
        n_samples = len(indices)

        k_iter = n_samples // BATCH_SIZE + 1

        gan_samples_path = storing_path + "gan_samples_" + model_tag + '.csv'

        for k in range(k_iter):
            fake_samples = sess.run(fake_data)

            df = pd.DataFrame(fake_samples)
            with open(gan_samples_path, 'a') as f:
                df.to_csv(f, header=False, index=False)

    # Clear variables valuies

    tf.reset_default_graph()

    current_epoch = my_ds.epochs_completed
    save_history(storing_path, gen_loss_record, disc_loss_record, jsd_error,
                 current_epoch, epoch_record, my_ds, iter_, epochs,
                 global_iters, BATCH_SIZE, low_lr, high_lr)
    if use_bot:
        bot.stop_bot()

    print("Training is done")

    # Duct tapping the size of gan sample set to avoid changing the TF Graph

    temp1 = pd.read_csv(gan_samples_path, header=None).values
    temp1 = temp1[0:n_samples]
    df = pd.DataFrame(temp1)

    with open(gan_samples_path, 'w+') as f:
        df.to_csv(f, header=False, index=False)

    print("Training is done")
Exemple #34
0
def optimize(loss,
             global_step,
             max_grad_norm,
             lr,
             lr_decay,
             sync_replicas=False,
             replicas_to_aggregate=1,
             task_id=0):
    """Builds optimization graph.

  * Creates an optimizer, and optionally wraps with SyncReplicasOptimizer
  * Computes, clips, and applies gradients
  * Maintains moving averages for all trainable variables
  * Summarizes variables and gradients

  Args:
    loss: scalar loss to minimize.
    global_step: integer scalar Variable.
    max_grad_norm: float scalar. Grads will be clipped to this value.
    lr: float scalar, learning rate.
    lr_decay: float scalar, learning rate decay rate.
    sync_replicas: bool, whether to use SyncReplicasOptimizer.
    replicas_to_aggregate: int, number of replicas to aggregate when using
      SyncReplicasOptimizer.
    task_id: int, id of the current task; used to ensure proper initialization
      of SyncReplicasOptimizer.

  Returns:
    train_op
  """
    with tf.name_scope('optimization'):
        # Compute gradients.
        tvars = tf.trainable_variables()
        grads = tf.gradients(
            loss,
            tvars,
            aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)

        # Clip non-embedding grads
        non_embedding_grads_and_vars = [(g, v) for (g, v) in zip(grads, tvars)
                                        if 'embedding' not in v.op.name]
        embedding_grads_and_vars = [(g, v) for (g, v) in zip(grads, tvars)
                                    if 'embedding' in v.op.name]

        ne_grads, ne_vars = zip(*non_embedding_grads_and_vars)
        ne_grads, _ = tf.clip_by_global_norm(ne_grads, max_grad_norm)
        non_embedding_grads_and_vars = zip(ne_grads, ne_vars)

        grads_and_vars = embedding_grads_and_vars + list(
            non_embedding_grads_and_vars)

        # Summarize
        _summarize_vars_and_grads(grads_and_vars)

        # Decaying learning rate
        lr = tf.train.exponential_decay(lr,
                                        global_step,
                                        1,
                                        lr_decay,
                                        staircase=True)
        tf.summary.scalar('learning_rate', lr)
        opt = tf.train.AdamOptimizer(lr)

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            0.999, global_step)

        # Apply gradients
        if sync_replicas:
            opt = tf.train.SyncReplicasOptimizer(
                opt,
                replicas_to_aggregate,
                variable_averages=variable_averages,
                variables_to_average=tvars,
                total_num_replicas=replicas_to_aggregate)
            apply_gradient_op = opt.apply_gradients(grads_and_vars,
                                                    global_step=global_step)
            with tf.control_dependencies([apply_gradient_op]):
                train_op = tf.no_op(name='train_op')

            # Initialization ops
            tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS,
                                 opt.get_chief_queue_runner())
            if task_id == 0:  # Chief task
                local_init_op = opt.chief_init_op
                tf.add_to_collection('chief_init_op', opt.get_init_tokens_op())
            else:
                local_init_op = opt.local_step_init_op
            tf.add_to_collection('local_init_op', local_init_op)
            tf.add_to_collection('ready_for_local_init_op',
                                 opt.ready_for_local_init_op)
        else:
            # Non-sync optimizer
            apply_gradient_op = opt.apply_gradients(grads_and_vars,
                                                    global_step)
            with tf.control_dependencies([apply_gradient_op]):
                train_op = variable_averages.apply(tvars)

        return train_op
    def run_epoch(self,
                  session,
                  data,
                  num_epoch=0,
                  train_writer=None,
                  train_op=None,
                  verbose=2,
                  train=False):
        '''NEED TO ADD BATCH_NORM OR LAYER NORM'''

        config = self.config
        dp = config.dropout
        if train_op is None:
            train_op = tf.no_op()
            dp = 1

        total_steps = len(data[0]) / config.batch_size
        total_loss = []
        accuracy = 0

        # shuffle data
        p = np.random.permutation(len(data[0]))
        tp, ip, tl, il, im = data
        #targets[:config.num_train], inputs[:config.num_train], t_lens[:config.num_train], input_lens[:config.num_train], input_masks[:config.num_train]
        tp, ip, tl, il, im = tp[p], ip[p], tl[p], il[p], im[p]

        print total_steps
        print range(total_steps)

        for step in range(total_steps):
            index = range(step * config.batch_size,
                          (step + 1) * config.batch_size)

            feed = {
                self.target_placeholder: tp[index],
                self.input_placeholder: ip[index],
                self.target_len_placeholder: tl[index],
                self.input_len_placeholder: il[index],
                self.dropout_placeholder: dp
            }
            loss, pred, summary, _ = session.run(
                [self.calculate_loss, self.pred_seq, self.merged, train_op],
                feed_dict=feed)

            if train_writer is not None:
                train_writer.add_summary(summary,
                                         num_epoch * total_steps + step)

            #answers = a[step*config.batch_size:(step+1)*config.batch_size]
            '''IS ACCURACY RIGHT, DOES IT STILL WORK NOW THAT YOU'VE SWITCHED FROM TOKEN TO SEQUENCE'''
            targets = tp[step * config.batch_size:(step + 1) *
                         config.batch_size]
            """
            '''this is just the first element in the batch printed as a sample of how generations are changing''' 
            print "description"
            print ip[index][0]
            for i in ip[index][0]:
                '''
                ss=''
                for j in i:
                    ss+=str(self.source_id_to_vocab[int(j)]+' ')
                print ss
                '''
                #print [" ".join(str(self.source_id_to_vocab[int(j)])) for j in i]
                print [self.source_id_to_vocab[int(j)] for j in i if int(j) is not 0]



            print "pred"
            pred_seq = []
            #print('len(pred)')
            print(len(pred))
            for i in range(len(pred)):
                pred_seq.append(pred[i][0])
            print ["".join(str(self.target_id_to_vocab[int(pred_i)])) for pred_i in pred_seq]
            #print "".join([(str(self.target_id_to_vocab[int(pred_i)])) for pred_i in pred[0]])
            print "".join([(str(self.target_id_to_vocab_w_new_line(int(pred_i)))) for pred_i in pred_seq])

            print "target"
            print targets[0]
            #print "".join([(str(self.target_id_to_vocab[int(target)])) for target in targets[0] if int(target) is not 0])
            print "".join([(str(self.target_id_to_vocab_w_new_line(int(target)))) for target in targets[0] if int(target) is not 0])
            
            '''
            #stop after one iter for only quick check run
            stop_it
            #'''
            """

            accuracy += np.sum(pred == targets) / float(len(targets))

            total_loss.append(loss)
            if verbose and step % verbose == 0:
                sys.stdout.write('\n{} / {} : loss = {}\n\n'.format(
                    step, total_steps, np.mean(total_loss)))
                #sys.stdout.flush()

        if verbose:
            sys.stdout.write('\n')

        return np.mean(total_loss), accuracy / float(total_steps)
Exemple #36
0
def train():
    OUTPUT_NODE = 10  # 10 classes
    LAYER1_NODE = 500
    BATCH_SIZE = 100

    LEARNING_RATE_BASE = 0.8
    LEARNING_RATE_DECAY = 0.99

    REGULARIZATION_RATE = 0.0001
    TRAINING_STEPS = 3000
    MOVING_AVERAGE_DECAY = 0.99

    (x_train, y_train_orig), (x_test, y_test_orig) = load_data()

    print("Original Train data X shape: ", x_train.shape,
          "Training data Y shape: ", y_train_orig.shape)
    print("Original Test data X shape: ", x_test.shape, "Test data Y shape: ",
          y_test_orig.shape)
    #print("before convert y_train_orig :", y_train_orig.shape, "y[0]: ", y_train_orig[0], "y[1]: ", y_train_orig[1],"y[2]: ", y_train_orig[2],)
    y_train = convert_to_one_hot(y_train_orig, 10)
    #print("after convert y_train_one_hot :",y_train.shape)
    y_test = convert_to_one_hot(y_test_orig, 10)

    print("----- Reshape Original Trains Dataset Shape ------")
    x_reshape_train = tf.reshape(x_train, [x_train.shape[0], -1])
    y_reshape_train = tf.transpose(y_train)

    print("Reshape Train data X as: ", x_reshape_train.shape,
          "Training data Y shape: ", y_reshape_train.shape)
    x_test_reshape = tf.reshape(x_test, [x_test.shape[0], -1])
    y_test_reshape = tf.transpose(y_test)
    print("Reshape Test data X as: ", x_test_reshape.shape,
          "Test data Y shape: ", y_test_reshape.shape)

    input_x_flatten_size = x_reshape_train.shape[1]
    input_x_size = tf.convert_to_tensor(input_x_flatten_size, dtype=tf.int32)
    input_x_number_examples = tf.convert_to_tensor(x_reshape_train.shape[0],
                                                   dtype=tf.int32)

    x = tf.placeholder(tf.float32,
                       shape=(None, input_x_flatten_size),
                       name='x-input')
    y_ = tf.placeholder(tf.float32, shape=(None, OUTPUT_NODE), name='y-input')

    weights1 = tf.Variable(tf.truncated_normal([input_x_size, LAYER1_NODE],
                                               stddev=0.1),
                           name="weights1")
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]),
                          name="biases1")

    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE],
                                               stddev=0.1),
                           name="weights2")
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]),
                          name="biases2")

    # Forward propagation result
    y = inference(x, None, weights1, biases1, weights2, biases2)

    # Step of training number
    global_step = tf.Variable(0, trainable=False)

    variabl_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)

    variabl_averages_op = variabl_averages.apply(tf.trainable_variables())
    #print(tf.trainable_variables())
    # Forward propagation using sliding average
    average_y = inference(x, variabl_averages, weights1, biases1, weights2,
                          biases2)

    # loss function
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularization

    # learning rate decay
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, global_step, input_x_number_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY)

    # Note as from https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer
    # global_step: Optional Variable to increment by one after the variables have been updated.
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss, global_step=global_step)

    with tf.control_dependencies([train_step, variabl_averages_op]):
        train_op = tf.no_op(name='train')

    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        # validation data sets
        validates_x = x_test_reshape.eval()
        validates_y = y_test_reshape.eval()
        validates_feed = {x: validates_x, y_: validates_y}

        # test data sets
        test_feed = {x: x_reshape_train.eval(), y_: y_reshape_train.eval()}
        seed = 3
        mini_batches = random_mini_batches(tf.transpose(x_reshape_train),
                                           tf.transpose(y_reshape_train),
                                           BATCH_SIZE, seed)

        for i in range(TRAINING_STEPS):

            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validates_feed)
                print(
                    "After %d training step(s), validation accuracy using average model is %g"
                    % (i, validate_acc))

            k = i % len(mini_batches)
            if k == 0:
                seed = seed + 1
                mini_batches = random_mini_batches(
                    tf.transpose(x_reshape_train),
                    tf.transpose(y_reshape_train), BATCH_SIZE, seed)

            mini_x_batches, mini_y_batches = mini_batches[k]
            sess.run(train_op,
                     feed_dict={
                         x: mini_x_batches,
                         y_: mini_y_batches
                     })

        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print(
            "After %d training step(s), testing accuracy using average model is %g"
            % (i, validate_acc))

    writer = tf.summary.FileWriter("./log", tf.get_default_graph())
    writer.close()
Exemple #37
0
    def _train(self, experience, weights, train_step_counter):
        # Get individual tensors from transitions.
        (time_steps, policy_steps_,
         next_time_steps) = trajectory.to_transition(experience)
        actions = policy_steps_.action

        if self._debug_summaries:
            tf.contrib.summary.histogram('actions', actions)

        action_distribution_parameters = policy_steps_.info

        # Reconstruct per-timestep policy distribution from stored distribution
        #   parameters.
        old_actions_distribution = (
            distribution_spec.nested_distributions_from_specs(
                self._action_distribution_spec,
                action_distribution_parameters))

        # Compute log probability of actions taken during data collection, using the
        #   collect policy distribution.
        act_log_probs = common_utils.log_probability(old_actions_distribution,
                                                     actions,
                                                     self._action_spec)

        # Compute the value predictions for states using the current value function.
        # To be used for return & advantage computation.
        batch_size = nest_utils.get_outer_shape(time_steps,
                                                self._time_step_spec)[0]
        policy_state = self._collect_policy.get_initial_state(
            batch_size=batch_size)

        value_preds, unused_policy_state = self._collect_policy.apply_value_network(
            experience.observation,
            experience.step_type,
            policy_state=policy_state)
        value_preds = tf.stop_gradient(value_preds)

        valid_mask = ppo_utils.make_timestep_mask(next_time_steps)

        if weights is None:
            weights = valid_mask
        else:
            weights *= valid_mask

        returns, normalized_advantages = self.compute_return_and_advantage(
            next_time_steps, value_preds)

        # Loss tensors across batches will be aggregated for summaries.
        policy_gradient_losses = []
        value_estimation_losses = []
        l2_regularization_losses = []
        entropy_regularization_losses = []
        kl_penalty_losses = []

        # For each epoch, create its own train op that depends on the previous one.
        loss_info = tf.no_op()
        for i_epoch in range(self._num_epochs):
            with tf.name_scope('epoch_%d' % i_epoch):
                with tf.control_dependencies(tf.nest.flatten(loss_info)):
                    # Only save debug summaries for first and last epochs.
                    debug_summaries = (self._debug_summaries and
                                       (i_epoch == 0
                                        or i_epoch == self._num_epochs - 1))

                    # Build one epoch train op.
                    loss_info = self.build_train_op(
                        time_steps, actions, act_log_probs, returns,
                        normalized_advantages, action_distribution_parameters,
                        weights, train_step_counter,
                        self._summarize_grads_and_vars,
                        self._gradient_clipping, debug_summaries)

                    policy_gradient_losses.append(
                        loss_info.extra.policy_gradient_loss)
                    value_estimation_losses.append(
                        loss_info.extra.value_estimation_loss)
                    l2_regularization_losses.append(
                        loss_info.extra.l2_regularization_loss)
                    entropy_regularization_losses.append(
                        loss_info.extra.entropy_regularization_loss)
                    kl_penalty_losses.append(loss_info.extra.kl_penalty_loss)

        # After update epochs, update adaptive kl beta, then update observation
        #   normalizer and reward normalizer.
        with tf.control_dependencies(tf.nest.flatten(loss_info)):
            # Compute the mean kl from old.
            batch_size = nest_utils.get_outer_shape(time_steps,
                                                    self._time_step_spec)[0]
            policy_state = self._collect_policy.get_initial_state(batch_size)
            kl_divergence = self._kl_divergence(
                time_steps, action_distribution_parameters,
                self._collect_policy.distribution(time_steps,
                                                  policy_state).action)
            update_adaptive_kl_beta_op = self.update_adaptive_kl_beta(
                kl_divergence)

        with tf.control_dependencies([update_adaptive_kl_beta_op]):
            if self._observation_normalizer:
                update_obs_norm = (self._observation_normalizer.update(
                    time_steps.observation, outer_dims=[0, 1]))
            else:
                update_obs_norm = tf.no_op()
            if self._reward_normalizer:
                update_reward_norm = self._reward_normalizer.update(
                    next_time_steps.reward, outer_dims=[0, 1])
            else:
                update_reward_norm = tf.no_op()

        with tf.control_dependencies([update_obs_norm, update_reward_norm]):
            loss_info = tf.nest.map_structure(tf.identity, loss_info)

        # Make summaries for total loss across all epochs.
        # The *_losses lists will have been populated by
        #   calls to self.build_train_op.
        with tf.name_scope('Losses/'):
            total_policy_gradient_loss = tf.add_n(policy_gradient_losses)
            total_value_estimation_loss = tf.add_n(value_estimation_losses)
            total_l2_regularization_loss = tf.add_n(l2_regularization_losses)
            total_entropy_regularization_loss = tf.add_n(
                entropy_regularization_losses)
            total_kl_penalty_loss = tf.add_n(kl_penalty_losses)
            tf.contrib.summary.scalar('policy_gradient_loss',
                                      total_policy_gradient_loss)
            tf.contrib.summary.scalar('value_estimation_loss',
                                      total_value_estimation_loss)
            tf.contrib.summary.scalar('l2_regularization_loss',
                                      total_l2_regularization_loss)
            if self._entropy_regularization:
                tf.contrib.summary.scalar('entropy_regularization_loss',
                                          total_entropy_regularization_loss)
            tf.contrib.summary.scalar('kl_penalty_loss', total_kl_penalty_loss)

            total_abs_loss = (tf.abs(total_policy_gradient_loss) +
                              tf.abs(total_value_estimation_loss) +
                              tf.abs(total_entropy_regularization_loss) +
                              tf.abs(total_l2_regularization_loss) +
                              tf.abs(total_kl_penalty_loss))

            tf.contrib.summary.scalar('total_abs_loss', total_abs_loss)

        if self._summarize_grads_and_vars:
            with tf.name_scope('Variables/'):
                all_vars = (self._actor_net.trainable_weights +
                            self._value_net.trainable_weights)
                for var in all_vars:
                    tf.contrib.summary.histogram(var.name.replace(':', '_'),
                                                 var)

        return loss_info
 def initialize(self):
   if tf.executing_eagerly():
     return tf.no_op()
   else:
     return self._initializers
Exemple #39
0
def stamp_parameter_in_graph(parameter_name, parameter_type, graph):
    """Stamps a parameter of a given type in the given tf.Graph instance.

  Tensors are stamped as placeholders, sequences are stamped as data sets
  constructed from string tensor handles, and named tuples are stamped by
  independently stamping their elements.

  Args:
    parameter_name: The suggested (string) name of the parameter to use in
      determining the names of the graph components to construct. The names that
      will actually appear in the graph are not guaranteed to be based on this
      suggested name, and may vary, e.g., due to existing naming conflicts, but
      a best-effort attempt will be made to make them similar for ease of
      debugging.
    parameter_type: The type of the parameter to stamp. Must be either an
      instance of computation_types.Type (or convertible to it), or None.
    graph: The instance of tf.Graph to stamp in.

  Returns:
    A tuple (val, binding), where 'val' is a Python object (such as a dataset,
    a placeholder, or a `structure.Struct` that represents a named
    tuple) that represents the stamped parameter for use in the body of a Python
    function that consumes this parameter, and the 'binding' is an instance of
    TensorFlow.Binding that indicates how parts of the type signature relate
    to the tensors and ops stamped into the graph.

  Raises:
    TypeError: If the arguments are of the wrong computation_types.
    ValueError: If the parameter type cannot be stamped in a TensorFlow graph.
  """
    py_typecheck.check_type(parameter_name, str)
    py_typecheck.check_type(graph, tf.Graph)
    if parameter_type is None:
        return (None, None)
    parameter_type = computation_types.to_type(parameter_type)
    if parameter_type.is_tensor():
        with graph.as_default():
            placeholder = tf.compat.v1.placeholder(dtype=parameter_type.dtype,
                                                   shape=parameter_type.shape,
                                                   name=parameter_name)
            binding = pb.TensorFlow.Binding(tensor=pb.TensorFlow.TensorBinding(
                tensor_name=placeholder.name))
            return (placeholder, binding)
    elif parameter_type.is_struct():
        # The parameter_type could be a StructTypeWithPyContainer, however, we
        # ignore that for now. Instead, the proper containers will be inserted at
        # call time by function_utils.wrap_as_zero_or_one_arg_callable.
        if not parameter_type:
            # Stamps dummy element to "populate" graph, as TensorFlow does not support
            # empty graphs.
            dummy_tensor = tf.no_op()
        element_name_value_pairs = []
        element_bindings = []
        for e in structure.iter_elements(parameter_type):
            e_val, e_binding = stamp_parameter_in_graph(
                '{}_{}'.format(parameter_name, e[0]), e[1], graph)
            element_name_value_pairs.append((e[0], e_val))
            element_bindings.append(e_binding)
        return (structure.Struct(element_name_value_pairs),
                pb.TensorFlow.Binding(struct=pb.TensorFlow.StructBinding(
                    element=element_bindings)))
    elif parameter_type.is_sequence():
        with graph.as_default():
            variant_tensor = tf.compat.v1.placeholder(tf.variant, shape=[])
            ds = make_dataset_from_variant_tensor(variant_tensor,
                                                  parameter_type.element)
        return (ds,
                pb.TensorFlow.Binding(sequence=pb.TensorFlow.SequenceBinding(
                    variant_tensor_name=variant_tensor.name)))
    else:
        raise ValueError(
            'Parameter type component {!r} cannot be stamped into a TensorFlow '
            'graph.'.format(parameter_type))
Exemple #40
0
    def create_variables(self):
        self.target_actor = self.actor.copy(scope="target_actor")
        self.target_critic = self.critic.copy(scope="target_critic")

        # FOR REGULAR ACTION SCORE COMPUTATION
        with tf.name_scope("taking_action"):
            # self.observation  = tf.placeholder(tf.float32, (None, self.observation_size), name="observation")
            #            self.actor_val = tf.nn.sigmoid(self.actor(self.observation)) * 40 - 20;
            self.actor_val = self.actor(self.observation_for_act)
            self.actor_action = tf.identity(self.actor_val,
                                            name="actor_action")
#            tf.histogram_summary("actions", self.actor_action)

# FOR PREDICTING TARGET FUTURE REWARDS
        with tf.name_scope("estimating_future_reward"):
            # self.next_observation          = tf.placeholder(tf.float32, (None, self.observation_size), name="next_observation")
            # self.next_observation_mask     = tf.placeholder(tf.float32, (None,), name="next_observation_mask")
            # self.next_action               = self.target_actor(self.next_observation) # ST
            self.next_action = tf.stop_gradient(
                self.target_actor(self.next_observation))  # ST
            #            print "next action: " + str(self.next_action)
            # tf.histogram_summary("target_actions", self.next_action)
            # self.next_value                = self.target_critic([self.next_observation, self.next_action]) # ST
            self.next_value = tf.stop_gradient(
                tf.reshape(
                    self.target_critic(
                        [self.next_observation, self.next_action]),
                    [-1]))  # ST
            # self.rewards                   = tf.placeholder(tf.float32, (None,), name="rewards")
            self.future_reward = self.rewards + self.discount_rate * self.next_observation_mask * self.next_value

        with tf.name_scope("critic_update"):
            ##### ERROR FUNCTION #####
            # self.given_action               = tf.placeholder(tf.float32, (None, self.action_size), name="given_action")
            self.value_given_action = tf.reshape(
                self.critic([self.observation, self.given_action]), [-1])
            # tf.scalar_summary("value_for_given_action", tf.reduce_mean(self.value_given_action))
            temp_diff = self.value_given_action - self.future_reward

            self.critic_error = tf.identity(tf.reduce_mean(
                tf.square(temp_diff)),
                                            name='critic_error')
            ##### OPTIMIZATION #####
            critic_gradients = self.optimizer.compute_gradients(
                self.critic_error, var_list=self.critic.variables())
            # Add histograms for gradients.
            for grad, var in critic_gradients:
                # tf.histogram_summary('critic_update/' + var.name, var)
                if grad is not None:
                    # tf.histogram_summary('critic_update/' + var.name + '/gradients', grad)
                    pass
            self.critic_update = self.optimizer.apply_gradients(
                critic_gradients, name='critic_train_op')
            # tf.scalar_summary("critic_error", self.critic_error)

        with tf.name_scope("actor_update"):
            ##### ERROR FUNCTION #####
            # self.actor_score = self.critic([self.observation, self.actor_action])
            self.actor_score = self.critic(
                [self.observation,
                 self.actor(self.observation)])

            ##### OPTIMIZATION #####
            # here we are maximizing actor score.
            # only optimize actor variables here, while keeping critic constant
            actor_gradients = self.optimizer.compute_gradients(
                tf.reduce_mean(-self.actor_score),
                var_list=self.actor.variables())
            # Add histograms for gradients.
            for grad, var in actor_gradients:
                # tf.histogram_summary('actor_update/' + var.name, var)
                if grad is not None:
                    # tf.histogram_summary('actor_update/' + var.name + '/gradients', grad)
                    pass
            self.actor_update = self.optimizer.apply_gradients(
                actor_gradients, name='actor_train_op')
            # tf.scalar_summary("actor_score", tf.reduce_mean(self.actor_score))

        # UPDATE TARGET NETWORK
        with tf.name_scope("target_network_update"):
            self.target_actor_update = ContinuousDeepQ.update_target_network(
                self.actor, self.target_actor, self.target_actor_update_rate)
            self.target_critic_update = ContinuousDeepQ.update_target_network(
                self.critic, self.target_critic,
                self.target_critic_update_rate)
            self.update_all_targets = tf.group(self.target_actor_update,
                                               self.target_critic_update,
                                               name='target_networks_update')

        # self.summarize = tf.merge_all_summaries()
        self.no_op1 = tf.no_op()
Exemple #41
0
    def eval_metrics_host_call_fn(policy_output,
                                  value_output,
                                  pi_tensor,
                                  policy_cost,
                                  value_cost,
                                  l2_cost,
                                  combined_cost,
                                  step,
                                  est_mode=tf.estimator.ModeKeys.TRAIN):
        policy_entropy = -tf.reduce_mean(
            tf.reduce_sum(policy_output * tf.log(policy_output), axis=1))
        # pi_tensor is one_hot when generated from sgfs (for supervised learning)
        # and soft-max when using self-play records. argmax normalizes the two.
        policy_target_top_1 = tf.argmax(pi_tensor, axis=1)

        policy_output_in_top1 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=1))
        policy_output_in_top3 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=3))

        policy_top_1_confidence = tf.reduce_max(policy_output, axis=1)
        policy_target_top_1_confidence = tf.boolean_mask(
            policy_output,
            tf.one_hot(policy_target_top_1,
                       tf.shape(policy_output)[1]))

        value_cost_normalized = value_cost / params['value_cost_weight']

        with tf.variable_scope("metrics"):
            metric_ops = {
                'policy_cost':
                tf.metrics.mean(policy_cost),
                'value_cost':
                tf.metrics.mean(value_cost),
                'value_cost_normalized':
                tf.metrics.mean(value_cost_normalized),
                'l2_cost':
                tf.metrics.mean(l2_cost),
                'policy_entropy':
                tf.metrics.mean(policy_entropy),
                'combined_cost':
                tf.metrics.mean(combined_cost),
                'policy_accuracy_top_1':
                tf.metrics.mean(policy_output_in_top1),
                'policy_accuracy_top_3':
                tf.metrics.mean(policy_output_in_top3),
                'policy_top_1_confidence':
                tf.metrics.mean(policy_top_1_confidence),
                'policy_target_top_1_confidence':
                tf.metrics.mean(policy_target_top_1_confidence),
                'value_confidence':
                tf.metrics.mean(tf.abs(value_output)),
            }

        if est_mode == tf.estimator.ModeKeys.EVAL:
            return metric_ops

        # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps.
        eval_step = tf.reduce_min(step)

        # Create summary ops so that they show up in SUMMARIES collection
        # That way, they get logged automatically during training
        summary_writer = summary.create_file_writer(FLAGS.work_dir)
        with summary_writer.as_default(), \
                summary.record_summaries_every_n_global_steps(
                    params['summary_steps'], eval_step):
            for metric_name, metric_op in metric_ops.items():
                summary.scalar(metric_name, metric_op[1], step=eval_step)

        # Reset metrics occasionally so that they are mean of recent batches.
        reset_op = tf.variables_initializer(tf.local_variables("metrics"))
        cond_reset_op = tf.cond(
            tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)),
            lambda: reset_op, lambda: tf.no_op())

        return summary.all_summary_ops() + [cond_reset_op]
        summary_writer = tf.summary.FileWriter('/tmp/lstm_logs', session.graph)

        tf.initialize_all_variables().run()  # 对参数变量初始化

        for i in range(config.max_max_epoch):  # 所有文本要重复多次进入模型训练
            # learning rate 衰减
            # 在 遍数小于max epoch时, lr_decay = 1 ; > max_epoch时, lr_decay = 0.5^(i-max_epoch)
            lr_decay = config.lr_decay**max(i - config.max_epoch, 0.0)
            m.assign_lr(session,
                        config.learning_rate * lr_decay)  # 设置learning rate

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity = run_epoch(session,
                                         m,
                                         train_data,
                                         m.train_op,
                                         verbose=True)  # 训练困惑度
            print("Epoch: %d Train Perplexity: %.3f" %
                  (i + 1, train_perplexity))
            valid_perplexity = run_epoch(session, mvalid, valid_data,
                                         tf.no_op())  # 检验困惑度
            print("Epoch: %d Valid Perplexity: %.3f" %
                  (i + 1, valid_perplexity))

        test_perplexity = run_epoch(session, mtest, test_data,
                                    tf.no_op())  # 测试困惑度
        print("Test Perplexity: %.3f" % test_perplexity)

# if __name__ == "__main__":
#     tf.app.run()
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
    input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)


    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables)
                batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        #if FLAGS.restore:
        if True:
            print('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        #else:
            #sess.run(init)
            #if FLAGS.pretrained_model_path is not None:
                #variable_restore_op(sess)

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu * len(gpus))

        start = time.time()
        for step in range(FLAGS.max_steps):
            print(step)
            data = next(data_generator)
            ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0],
                                                                                input_score_maps: data[2],
                                                                                input_geo_maps: data[3],
                                                                                input_training_masks: data[4]})
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start)/10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus))/(time.time() - start)
                start = time.time()
                print('Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format(
                    step, ml, tl, avg_time_per_step, avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0],
                                                                                             input_score_maps: data[2],
                                                                                             input_geo_maps: data[3],
                                                                                             input_training_masks: data[4]})
                summary_writer.add_summary(summary_str, global_step=step)
Exemple #44
0
    def __init__(self, config):
        self.config = config
        self.vfeat_path = config.vfeat_path
        self.tf_record_dir = config.tf_record_dir

        self.max_train_iter = config.max_train_iter

        dataset_str = 'd'
        dataset_str += '_' + '_'.join(
            config.tf_record_dir.replace('data/preprocessed/vqa_v2/',
                                         '').split('/'))
        dataset_str += '_' + config.vfeat_name.replace('.hdf5', '')

        hyper_parameter_str = 'bs{}_lr{}'.format(config.batch_size,
                                                 config.learning_rate)

        if config.ft_vlmap:
            hyper_parameter_str += '_ft_vlmap'

        self.train_dir = './train_dir/vqa_{}_{}_{}_{}_seed{}_{}'.format(
            config.model_type, dataset_str, config.prefix, hyper_parameter_str,
            config.seed, time.strftime("%Y%m%d-%H%M%S"))
        if not os.path.exists(self.train_dir): os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        if config.vlmap_word_weight_dir is not None:
            self.vlmap_word_weight_dir = os.path.join(
                self.train_dir,
                config.vlmap_word_weight_dir.split('/')[-1])
            shutil.copytree(config.vlmap_word_weight_dir,
                            self.vlmap_word_weight_dir)
            config.vlmap_word_weight_dir = self.vlmap_word_weight_dir
        else:
            self.vlmap_word_weight_dir = config.vlmap_word_weight_dir

        # Input
        self.batch_size = config.batch_size
        with tf.name_scope('datasets'):
            self.target_split = tf.placeholder(tf.string)

        with tf.name_scope('datasets/batch'):
            vqa_batch = {
                'train':
                input_ops_vqa.create(self.batch_size,
                                     self.tf_record_dir,
                                     'train',
                                     is_train=True,
                                     scope='train_ops',
                                     shuffle=True),
                'val':
                input_ops_vqa.create(self.batch_size,
                                     self.tf_record_dir,
                                     'val',
                                     is_train=True,
                                     scope='val_ops',
                                     shuffle=False),
                'testval':
                input_ops_vqa.create(self.batch_size,
                                     self.tf_record_dir,
                                     'testval',
                                     is_train=True,
                                     scope='testval_ops',
                                     shuffle=False),
                'test':
                input_ops_vqa.create(self.batch_size,
                                     self.tf_record_dir,
                                     'test',
                                     is_train=True,
                                     scope='test_ops',
                                     shuffle=False)
            }
            batch_opt = {
                tf.equal(self.target_split, 'train'):
                lambda: vqa_batch['train'],
                tf.equal(self.target_split, 'val'):
                lambda: vqa_batch['val'],
                tf.equal(self.target_split, 'testval'):
                lambda: vqa_batch['testval'],
                tf.equal(self.target_split, 'test'):
                lambda: vqa_batch['test'],
            }
            self.batch = tf.case(batch_opt,
                                 default=lambda: vqa_batch['train'],
                                 exclusive=True)

        # Model
        Model = self.get_model_class(config.model_type)
        log.infov('using model class: {}'.format(Model))
        self.model = Model(self.batch, config, is_train=True)

        # Optimizer
        self.global_step = tf.train.get_or_create_global_step(graph=None)
        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            self.learning_rate = tf.train.exponential_decay(
                self.learning_rate,
                global_step=self.global_step,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
                name='decaying_learning_rate')

        # Checkpoint and monitoring
        trainable_vars = tf.trainable_variables()
        train_vars = self.model.filter_train_vars(trainable_vars)
        log.warn('Trainable variables:')
        tf.contrib.slim.model_analyzer.analyze_vars(trainable_vars,
                                                    print_info=True)
        log.warn('Filtered train variables:')
        tf.contrib.slim.model_analyzer.analyze_vars(train_vars,
                                                    print_info=True)

        self.optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            optimizer=tf.train.AdamOptimizer,
            clip_gradients=20.0,
            variables=train_vars,
            increment_global_step=True,
            name='optimizer')

        self.avg_report = {
            'train': {},
            'val': {},
            'testval': {},
        }
        for split in ['train', 'val', 'testval']:
            for key in self.model.report.keys():
                self.avg_report[split][key] = tf.placeholder(tf.float32)
                tf.summary.scalar('average_{}/{}'.format(split, key),
                                  self.avg_report[split][key],
                                  collections=['average_{}'.format(split)])

        self.summary_ops = {
            'train': tf.summary.merge_all(key='train'),
            'val': tf.summary.merge_all(key='val'),
            'testval': tf.summary.merge_all(key='testval'),
            'heavy_train': tf.summary.merge_all(key='heavy_train'),
            'heavy_val': tf.summary.merge_all(key='heavy_val'),
            'heavy_testval': tf.summary.merge_all(key='heavy_testval'),
            'average_train': tf.summary.merge_all(key='average_train'),
            'average_val': tf.summary.merge_all(key='average_val'),
            'average_testval': tf.summary.merge_all(key='average_testval'),
            'no_op': tf.no_op(),
        }

        all_vars = tf.global_variables()
        transfer_vars = self.model.filter_transfer_vars(all_vars)

        self.saver = tf.train.Saver(max_to_keep=100)
        self.checkpoint_loader = tf.train.Saver(max_to_keep=1)
        self.pretrain_loader = tf.train.Saver(var_list=transfer_vars,
                                              max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.train_average_iter = self.config.train_average_iter
        self.val_average_iter = self.config.val_average_iter
        self.heavy_summary_step = self.config.heavy_summary_step
        self.validation_step = self.config.validation_step
        self.checkpoint_step = self.config.checkpoint_step

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=None,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1})

        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        self.ckpt_path = config.checkpoint
        if self.ckpt_path is not None:
            log.info('Checkpoint path: {}'.format(self.ckpt_path))
            self.checkpoint_loader.restore(self.session, self.ckpt_path)
            log.info('Loaded the checkpoint')

        self.pretrained_param_path = config.pretrained_param_path
        if self.pretrained_param_path is not None:
            log.warn('Filtered transfer_vars (loaded from pre-trained param):')
            tf.contrib.slim.model_analyzer.analyze_vars(transfer_vars,
                                                        print_info=True)

            log.info('Pre-trained param path: {}'.format(
                self.pretrained_param_path))
            self.pretrain_loader.restore(self.session,
                                         self.pretrained_param_path)
            log.info('Loaded the pre-trained parameters')
Exemple #45
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)
    unused_video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               train_data_pattern,
                               batch_size=batch_size,
                               num_readers=num_readers,
                               num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1

    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    with tf.name_scope("model"):
        result = model.create_model(model_input,
                                    num_frames=num_frames,
                                    vocab_size=reader.num_classes,
                                    labels=labels_batch)

        for variable in slim.get_model_variables():
            tf.summary.histogram(variable.op.name, variable)

        predictions = result["predictions"]
        if "loss" in result.keys():
            label_loss = result["loss"]
        else:
            label_loss = label_loss_fn.calculate_loss(predictions,
                                                      labels_batch)
        tf.summary.scalar("label_loss", label_loss)

        if "regularization_loss" in result.keys():
            reg_loss = result["regularization_loss"]
        else:
            reg_loss = tf.constant(0.0)

        reg_losses = tf.losses.get_regularization_losses()
        if reg_losses:
            reg_loss += tf.add_n(reg_losses)

        if regularization_penalty != 0:
            tf.summary.scalar("reg_loss", reg_loss)

        # Adds update_ops (e.g., moving average updates in batch normalization) as
        # a dependency to the train_op.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if "update_ops" in result.keys():
            update_ops += result["update_ops"]
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name="gradient_barrier")
                with tf.control_dependencies([barrier]):
                    label_loss = tf.identity(label_loss)

        # Incorporate the L2 weight penalties etc.
        final_loss = regularization_penalty * reg_loss + label_loss
        train_op = slim.learning.create_train_op(
            final_loss,
            optimizer,
            global_step=global_step,
            clip_gradient_norm=clip_gradient_norm)

        tf.add_to_collection("global_step", global_step)
        tf.add_to_collection("loss", label_loss)
        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("input_batch_raw", model_input_raw)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("num_frames", num_frames)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        tf.add_to_collection("train_op", train_op)
Exemple #46
0
def VGG16_run():

    train_loss, train_acc = [], []
    valid_loss, valid_acc = [], []
    test_loss, test_acc = [], []
    # load data
    #Dataset
    total_x, _, total_y=cifar_LoadData.load_training_data()
    test_x, _, test_y=cifar_LoadData.load_test_data()
    #total_y=total_y.astype(np.int)
    #test_y=test_y.astype(np.int)
    
    ## Shuffling & train/validation split
    shuffle_idx = np.arange(total_y.shape[0])
    shuffle_rng = np.random.RandomState(123)
    shuffle_rng.shuffle(shuffle_idx)
    total_x, total_y = total_x[shuffle_idx], total_y[shuffle_idx]
    train_x, train_y = total_x[:int(num_images*(1-validation_ratio)), :, :, :], total_y[:int(num_images*(1-validation_ratio)), :]
    valid_x, valid_y = total_x[int(num_images*(1-validation_ratio)):, :, :, :], total_y[int(num_images*(1-validation_ratio)):, :]

    #reset graph
    tf.reset_default_graph()
    # Graph
    x = tf.placeholder(tf.float32, [None,
                                    img_size,
                                    img_size,
                                    img_channels],
                                    name='x-input')
    y_ = tf.placeholder(tf.float32, [None, num_classes], name='y-input')

    training_phase = tf.placeholder(tf.bool, None, name='training_phase')
    keep_prob =tf.placeholder(tf.float32, None, name='keep_prob')
 
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)

    y=VGG16.VGG_16(x, keep_prob, regularizer)
 
    global_step = tf.Variable(0, trainable=False)
 
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())

    # labels is the label index, not the values
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    #loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    loss = cross_entropy_mean
 
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
                                               global_step,
                                               num_imges_train // BATCH_SIZE,
                                               LEARNING_RATE_DECAY)

    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    #minimize is an combi-operation of compute gradients and apply gradients
    #grads = optimizer.compute_gradients(loss, var_list=tf.trainable_variables())
    #train_step=optimizer.apply_gradients(grads, global_step=global_step)

    # Prediction
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name="train")
 
    saver = tf.train.Saver()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        #start queue runner
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for epoch in range(epochs):

            NumOfBatchTrain = int(num_imges_train) // BATCH_SIZE
            for i in range(NumOfBatchTrain):  
                train_x_batch = train_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :, :,:]
                train_y_batch = train_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
                #train_x_batch, train_y_batch = mnist.train.next_batch(BATCH_SIZE)
                #train_x_batch=np.reshape(train_x_batch, (-1, img_size, img_size, img_channels))

                _, loss_train_batch, step, acc_train_batch = sess.run([train_op, loss, global_step, accuracy], feed_dict={x: train_x_batch, 
                                                                                                                    y_: train_y_batch, 
                                                                                                                    training_phase: True, 
                                                                                                                    keep_prob: 0.5})
                train_loss.append(loss_train_batch)
                train_acc.append(acc_train_batch)
                if (step-1)%100==0:
                    print("training steps: %d , training loss: %g, train accuracy: %g" % (step, loss_train_batch, acc_train_batch))

            #validation in batch
            NumOfBatchValid= int(num_imges_valid) // BATCH_SIZE
            _valid_loss, _valid_acc = [], []

            for i in range(NumOfBatchValid):
                #valid_x_batch, valid_y_batch = mnist.test.next_batch(BATCH_SIZE)
                #valid_x_batch=np.reshape(valid_x_batch, (-1, img_size, img_size, img_channels))
                valid_x_batch = valid_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :, :,:]
                valid_y_batch = valid_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
                loss_val_batch, accuracy_val_batch= sess.run([loss, accuracy], 
                                    feed_dict={x: valid_x_batch, 
                                    y_: valid_y_batch, 
                                    training_phase: False,
                                    keep_prob: 1.0})
                _valid_loss.append(loss_val_batch)
                _valid_acc.append(accuracy_val_batch) 
            valid_loss.append(np.mean(_valid_loss))
            valid_acc.append(np.mean(_valid_acc))
            print("validation accuracy: %g" % (valid_acc[-1]))
            if valid_acc[-1]>0.5:
                saver.save(sess, os.path.join(save_dir, MODEL_NAME), global_step=global_step)

            # test
            NumOfBatchTest = int(num_imges_test) // BATCH_SIZE
            _test_loss, _test_acc = [], []

            for i in range(NumOfBatchTest):
                test_x_batch = test_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :, :,:]
                test_y_batch = test_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
                loss_val_batch, accuracy_val_batch= sess.run([loss, accuracy], 
                                    feed_dict={x: test_x_batch, 
                                    y_: test_y_batch, 
                                    training_phase: False,
                                    keep_prob: 1.0})
                _test_loss.append(loss_val_batch)
                _test_acc.append(accuracy_val_batch) 
            test_loss.append(np.mean(_test_loss))
            test_acc.append(np.mean(_test_acc))
            print("test accuracy: %g" % (test_acc[-1]))

        coord.request_stop()
        coord.join(threads)

        #save loss and accuracy data 
        Path(os.path.join(save_dir, 'accuracy_loss')).mkdir(parents=True, exist_ok=True)
        np.save(os.path.join(save_dir, 'accuracy_loss', 'train_loss'), train_loss)
        np.save(os.path.join(save_dir, 'accuracy_loss', 'train_acc'), train_acc)
        np.save(os.path.join(save_dir, 'accuracy_loss', 'valid_loss'), valid_loss)
        np.save(os.path.join(save_dir, 'accuracy_loss', 'valid_acc'), valid_acc)                                    
Exemple #47
0
 def _predict_proba_op(self, logits, **kwargs):
     return tf.no_op()
Exemple #48
0
def train(mnist):
    with tf.name_scope('input'):  #处理输入的都放在input下面
        x = tf.placeholder(tf.float32,
                           [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS],
                           name="X-input")
        y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input")

    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    '''
        image_size = 224
        images = tf.Variable(tf.random_normal([batch_size,
                                               image_size,
                                               image_size, 3],
                                              dtype=tf.float32,
                                              stddev=1e-1))
    '''
    #keep_prob = tf.placeholder(tf.float32)

    predictions, softmax, fc8, p = tf_vgg.inference_op(x, keep_prob=1.0)

    #print('y shape',y.shape)
    global_step = tf.Variable(0, trainable=False)

    with tf.name_scope("moving_average"):
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

    with tf.name_scope("loss_funtion"):
        print('sotf    ', softmax.shape)
        print('label   ', tf.argmax(y_, 1).shape)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=softmax, labels=tf.argmax(y_, 1))
        #print(cross_entropy)
        cross_entropy_mean = tf.reduce_mean(cross_entropy)
        #loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))
        loss = cross_entropy_mean

    with tf.name_scope("train_step"):
        learning_rate = tf.train.exponential_decay(
            LEARNING_RATE_BASE, global_step,
            mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY)
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
            loss, global_step=global_step)

    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name="train")
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            reshaped_xs = np.reshape(
                xs, (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
            _, loss_value, step = sess.run([train_op, loss, global_step],
                                           feed_dict={
                                               x: reshaped_xs,
                                               y_: ys
                                           })
            if i % 1000 == 0:
                print("after %d training steps,batch is %g" %
                      (step, loss_value))
                saver.save(sess,
                           os.path.join(MODEL_SAVE_PATH, MODEL_NAME),
                           global_step=global_step)

    writer = tf.summary.FileWriter('/nfs/syzhou/github/project/path/to/log',
                                   tf.get_default_graph())
    writer.close()
 def _decay_weights_op(self, var):
     if not self._decay_var_list or var.ref() in self._decay_var_list:
         return var.assign_sub(
             self._get_hyper("weight_decay", var.dtype) * var,
             self._use_locking)
     return tf.no_op()
 def _decay_weights_sparse_op(self, var, indices):
     if not self._decay_var_list or var.ref() in self._decay_var_list:
         update = -self._get_hyper("weight_decay", var.dtype) * tf.gather(
             var, indices)
         return self._resource_scatter_add(var, indices, update)
     return tf.no_op()
    def build_decoder(self, encoder_outputs, encoder_state):

        sos_id_2 = tf.cast(self.char2ind[self.sos], tf.int32)
        eos_id_2 = tf.cast(self.char2ind[self.eos], tf.int32)
        self.output_layer = Dense(self.vocab_size, name='output_projection')

        # Decoder.
        with tf.variable_scope("decoder") as decoder_scope:

            cell, decoder_initial_state = self.build_decoder_cell(
                encoder_outputs,
                encoder_state,
                self.audio_sequence_lengths)

            # Train
            if self.mode != 'INFER':

                helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                    inputs=self.char_embedding,
                    sequence_length=self.char_sequence_lengths,
                    embedding=self.embedding,
                    sampling_probability=0.5,
                    time_major=False)

                # Decoder
                my_decoder = tf.contrib.seq2seq.BasicDecoder(cell,
                                                             helper,
                                                             decoder_initial_state,
                                                             output_layer=self.output_layer)

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    output_time_major=False,
                    maximum_iterations=self.maximum_iterations,
                    swap_memory=False,
                    impute_finished=True,
                    scope=decoder_scope
                )

                sample_id = outputs.sample_id
                logits = outputs.rnn_output


            # Inference
            else:
                start_tokens = tf.fill([self.batch_size], sos_id_2)
                end_token = eos_id_2

                # Beam search
                if self.beam_width > 0:
                    my_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        cell=cell,
                        embedding=self.embedding,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.output_layer,
                    )

                # Greedy
                else:
                    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(self.embedding,
                                                                      start_tokens,
                                                                      end_token)

                    my_decoder = tf.contrib.seq2seq.BasicDecoder(cell,
                                                                 helper,
                                                                 decoder_initial_state,
                                                                 output_layer=self.output_layer)
                if self.inference_targets:
                    maximum_iterations = self.maximum_iterations
                else:
                    maximum_iterations = None

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    maximum_iterations=maximum_iterations,
                    output_time_major=False,
                    impute_finished=False,
                    swap_memory=False,
                    scope=decoder_scope)

                if self.beam_width > 0:
                    logits = tf.no_op()
                    sample_id = outputs.predicted_ids
                else:
                    logits = tf.no_op()
                    sample_id = outputs.sample_id

        return logits, sample_id, final_context_state
Exemple #52
0
class IterationBuilderTest(parameterized.TestCase, tf.test.TestCase):

    # pylint: disable=g-long-lambda
    @parameterized.named_parameters(
        {
            "testcase_name": "single_subnetwork_fn",
            "ensemble_builder": _FakeEnsembleBuilder(),
            "subnetwork_builders": [_FakeBuilder("training")],
            "features": lambda: [[1., -1., 0.]],
            "labels": lambda: [1],
            "want_loss": 1.403943,
            "want_predictions": 2.129,
            "want_best_candidate_index": 0,
        }, {
            "testcase_name":
            "single_subnetwork_with_eval_metrics",
            "ensemble_builder":
            _FakeEnsembleBuilder(eval_metric_ops_fn=lambda:
                                 {"a": (tf.constant(1), tf.constant(2))}),
            "subnetwork_builders": [
                _FakeBuilder("training", ),
            ],
            "mode":
            tf.estimator.ModeKeys.EVAL,
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            1.403943,
            "want_predictions":
            2.129,
            "want_eval_metric_ops": ["a"],
            "want_best_candidate_index":
            0,
        }, {
            "testcase_name":
            "single_subnetwork_with_non_tensor_eval_metric_op",
            "ensemble_builder":
            _FakeEnsembleBuilder(eval_metric_ops_fn=lambda:
                                 {"a": (tf.constant(1), tf.no_op())}),
            "subnetwork_builders": [
                _FakeBuilder("training", ),
            ],
            "mode":
            tf.estimator.ModeKeys.EVAL,
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            1.403943,
            "want_predictions":
            2.129,
            "want_eval_metric_ops": ["a"],
            "want_best_candidate_index":
            0,
        }, {
            "testcase_name": "single_subnetwork_done_training_fn",
            "ensemble_builder": _FakeEnsembleBuilder(),
            "subnetwork_builders": [_FakeBuilder("done")],
            "features": lambda: [[1., -1., 0.]],
            "labels": lambda: [1],
            "want_loss": 1.403943,
            "want_predictions": 2.129,
            "want_best_candidate_index": 0,
            "want_is_over": True,
        }, {
            "testcase_name": "single_dict_predictions_subnetwork_fn",
            "ensemble_builder": _FakeEnsembleBuilder(dict_predictions=True),
            "subnetwork_builders": [_FakeBuilder("training")],
            "features": lambda: [[1., -1., 0.]],
            "labels": lambda: [1],
            "want_loss": 1.403943,
            "want_predictions": {
                "classes": 2,
                "logits": 2.129
            },
            "want_best_candidate_index": 0,
        }, {
            "testcase_name": "previous_ensemble",
            "ensemble_builder": _FakeEnsembleBuilder(),
            "subnetwork_builders": [_FakeBuilder("training")],
            "features": lambda: [[1., -1., 0.]],
            "labels": lambda: [1],
            "previous_ensemble_spec": lambda: tu.dummy_ensemble_spec("old"),
            "want_loss": 1.403943,
            "want_predictions": 2.129,
            "want_best_candidate_index": 1,
        }, {
            "testcase_name":
            "previous_ensemble_is_best",
            "ensemble_builder":
            _FakeEnsembleBuilder(),
            "subnetwork_builders": [_FakeBuilder("training")],
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "previous_ensemble_spec":
            lambda: tu.dummy_ensemble_spec("old", random_seed=12),
            "want_loss":
            -.437,
            "want_predictions":
            .688,
            "want_best_candidate_index":
            0,
        }, {
            "testcase_name":
            "previous_ensemble_spec_and_eval_metrics",
            "ensemble_builder":
            _FakeEnsembleBuilder(eval_metric_ops_fn=lambda:
                                 {"a": (tf.constant(1), tf.constant(2))}),
            "subnetwork_builders": [_FakeBuilder("training")],
            "mode":
            tf.estimator.ModeKeys.EVAL,
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "previous_ensemble_spec":
            lambda: tu.dummy_ensemble_spec(
                "old", eval_metric_ops={"a":
                                        (tf.constant(1), tf.constant(2))}),
            "want_loss":
            1.403943,
            "want_predictions":
            2.129,
            "want_eval_metric_ops": ["a"],
            "want_best_candidate_index":
            1,
        }, {
            "testcase_name":
            "two_subnetwork_fns",
            "ensemble_builder":
            _FakeEnsembleBuilder(),
            "subnetwork_builders": [
                _FakeBuilder("training"),
                _FakeBuilder("training2", random_seed=7)
            ],
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            1.40394,
            "want_predictions":
            2.129,
            "want_best_candidate_index":
            0,
        }, {
            "testcase_name":
            "two_subnetwork_fns_other_best",
            "ensemble_builder":
            _FakeEnsembleBuilder(),
            "subnetwork_builders": [
                _FakeBuilder("training"),
                _FakeBuilder("training2", random_seed=12)
            ],
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            -.437,
            "want_predictions":
            .688,
            "want_best_candidate_index":
            1,
        }, {
            "testcase_name":
            "two_subnetwork_one_training_fns",
            "ensemble_builder":
            _FakeEnsembleBuilder(),
            "subnetwork_builders":
            [_FakeBuilder("training"),
             _FakeBuilder("done", random_seed=7)],
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            1.403943,
            "want_predictions":
            2.129,
            "want_best_candidate_index":
            0,
        }, {
            "testcase_name":
            "two_subnetwork_done_training_fns",
            "ensemble_builder":
            _FakeEnsembleBuilder(),
            "subnetwork_builders":
            [_FakeBuilder("done"),
             _FakeBuilder("done1", random_seed=7)],
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            1.403943,
            "want_predictions":
            2.129,
            "want_best_candidate_index":
            0,
            "want_is_over":
            True,
        }, {
            "testcase_name":
            "two_dict_predictions_subnetwork_fns",
            "ensemble_builder":
            _FakeEnsembleBuilder(dict_predictions=True),
            "subnetwork_builders": [
                _FakeBuilder("training"),
                _FakeBuilder("training2", random_seed=7)
            ],
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            1.404,
            "want_predictions": {
                "classes": 2,
                "logits": 2.129
            },
            "want_best_candidate_index":
            0,
        }, {
            "testcase_name":
            "two_dict_predictions_subnetwork_fns_predict_classes",
            "ensemble_builder":
            _FakeEnsembleBuilder(
                dict_predictions=True,
                export_output_key=tu.ExportOutputKeys.CLASSIFICATION_CLASSES),
            "subnetwork_builders": [
                _FakeBuilder("training"),
                _FakeBuilder("training2", random_seed=7)
            ],
            "mode":
            tf.estimator.ModeKeys.PREDICT,
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            1.404,
            "want_predictions": {
                "classes": 2,
                "logits": 2.129
            },
            "want_best_candidate_index":
            0,
            "want_export_outputs": {
                tu.ExportOutputKeys.CLASSIFICATION_CLASSES: [2.129],
                "serving_default": [2.129],
            },
        }, {
            "testcase_name":
            "two_dict_predictions_subnetwork_fns_predict_scores",
            "ensemble_builder":
            _FakeEnsembleBuilder(
                dict_predictions=True,
                export_output_key=tu.ExportOutputKeys.CLASSIFICATION_SCORES),
            "subnetwork_builders": [
                _FakeBuilder("training"),
                _FakeBuilder("training2", random_seed=7)
            ],
            "mode":
            tf.estimator.ModeKeys.PREDICT,
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_loss":
            1.404,
            "want_predictions": {
                "classes": 2,
                "logits": 2.129
            },
            "want_best_candidate_index":
            0,
            "want_export_outputs": {
                tu.ExportOutputKeys.CLASSIFICATION_SCORES: [2.129],
                "serving_default": [2.129],
            },
        }, {
            "testcase_name":
            "two_dict_predictions_subnetwork_fns_predict_regression",
            "ensemble_builder":
            _FakeEnsembleBuilder(
                dict_predictions=True,
                export_output_key=tu.ExportOutputKeys.REGRESSION),
            "subnetwork_builders": [
                _FakeBuilder("training"),
                _FakeBuilder("training2", random_seed=7)
            ],
            "mode":
            tf.estimator.ModeKeys.PREDICT,
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_predictions": {
                "classes": 2,
                "logits": 2.129
            },
            "want_best_candidate_index":
            0,
            "want_export_outputs": {
                tu.ExportOutputKeys.REGRESSION: 2.129,
                "serving_default": 2.129,
            },
        }, {
            "testcase_name":
            "two_dict_predictions_subnetwork_fns_predict_prediction",
            "ensemble_builder":
            _FakeEnsembleBuilder(
                dict_predictions=True,
                export_output_key=tu.ExportOutputKeys.PREDICTION),
            "subnetwork_builders": [
                _FakeBuilder("training"),
                _FakeBuilder("training2", random_seed=7)
            ],
            "mode":
            tf.estimator.ModeKeys.PREDICT,
            "features":
            lambda: [[1., -1., 0.]],
            "labels":
            lambda: [1],
            "want_predictions": {
                "classes": 2,
                "logits": 2.129
            },
            "want_best_candidate_index":
            0,
            "want_export_outputs": {
                tu.ExportOutputKeys.PREDICTION: {
                    "classes": 2,
                    "logits": 2.129
                },
                "serving_default": {
                    "classes": 2,
                    "logits": 2.129
                },
            },
        })
    def test_build_iteration(self,
                             ensemble_builder,
                             subnetwork_builders,
                             features,
                             labels,
                             want_predictions,
                             want_best_candidate_index,
                             want_eval_metric_ops=(),
                             want_is_over=False,
                             previous_ensemble_spec=lambda: None,
                             want_loss=None,
                             want_export_outputs=None,
                             mode=tf.estimator.ModeKeys.TRAIN):
        global_step = tf.train.create_global_step()
        builder = _IterationBuilder(_FakeCandidateBuilder(), ensemble_builder)
        iteration = builder.build_iteration(
            iteration_number=0,
            subnetwork_builders=subnetwork_builders,
            features=features(),
            labels=labels(),
            mode=mode,
            previous_ensemble_spec=previous_ensemble_spec())
        with self.test_session() as sess:
            init = tf.group(tf.global_variables_initializer(),
                            tf.local_variables_initializer())
            sess.run(init)
            estimator_spec = iteration.estimator_spec
            self.assertAllClose(want_predictions,
                                sess.run(estimator_spec.predictions),
                                atol=1e-3)
            self.assertEqual(set(want_eval_metric_ops),
                             set(estimator_spec.eval_metric_ops.keys()))
            self.assertEqual(want_best_candidate_index,
                             sess.run(iteration.best_candidate_index))
            self.assertEqual(want_is_over, sess.run(iteration.is_over))

            if mode == tf.estimator.ModeKeys.PREDICT:
                self.assertIsNotNone(estimator_spec.export_outputs)
                self.assertAllClose(want_export_outputs,
                                    sess.run(
                                        _export_output_tensors(
                                            estimator_spec.export_outputs)),
                                    atol=1e-3)
                self.assertIsNone(iteration.estimator_spec.train_op)
                self.assertIsNone(iteration.estimator_spec.loss)
                self.assertIsNotNone(want_export_outputs)
                return

            self.assertAlmostEqual(want_loss,
                                   sess.run(iteration.estimator_spec.loss),
                                   places=3)
            self.assertIsNone(iteration.estimator_spec.export_outputs)
            if mode == tf.estimator.ModeKeys.TRAIN:
                sess.run(iteration.estimator_spec.train_op)
                self.assertEqual(1, sess.run(global_step))
                self.assertEqual(1, sess.run(iteration.step))

    @parameterized.named_parameters(
        {
            "testcase_name": "empty_subnetwork_builders",
            "ensemble_builder": _FakeEnsembleBuilder(),
            "subnetwork_builders": [],
            "want_raises": ValueError,
        }, {
            "testcase_name":
            "same_subnetwork_builder_names",
            "ensemble_builder":
            _FakeEnsembleBuilder(),
            "subnetwork_builders":
            [_FakeBuilder("same_name"),
             _FakeBuilder("same_name")],
            "want_raises":
            ValueError,
        }, {
            "testcase_name":
            "same_name_as_previous_ensemble_spec",
            "ensemble_builder":
            _FakeEnsembleBuilder(),
            "previous_ensemble_spec_fn":
            lambda: tu.dummy_ensemble_spec("same_name"),
            "subnetwork_builders": [
                _FakeBuilder("same_name"),
            ],
            "want_raises":
            ValueError,
        }, {
            "testcase_name":
            "predict_invalid",
            "ensemble_builder":
            _FakeEnsembleBuilder(
                dict_predictions=True,
                export_output_key=tu.ExportOutputKeys.INVALID),
            "subnetwork_builders": [
                _FakeBuilder("training"),
                _FakeBuilder("training2", random_seed=7)
            ],
            "mode":
            tf.estimator.ModeKeys.PREDICT,
            "want_raises":
            TypeError,
        })
    def test_build_iteration_error(self,
                                   ensemble_builder,
                                   subnetwork_builders,
                                   want_raises,
                                   previous_ensemble_spec_fn=lambda: None,
                                   mode=tf.estimator.ModeKeys.TRAIN):
        builder = _IterationBuilder(_FakeCandidateBuilder(), ensemble_builder)
        features = [[1., -1., 0.]]
        labels = [1]
        with self.test_session():
            with self.assertRaises(want_raises):
                builder.build_iteration(
                    iteration_number=0,
                    subnetwork_builders=subnetwork_builders,
                    features=features,
                    labels=labels,
                    mode=mode,
                    previous_ensemble_spec=previous_ensemble_spec_fn())
Exemple #53
0
def run_training():
    # Get the sets of images and labels for training, validation, and
    # Tell TensorFlow that the model will be built into the default Graph.

    # Create model directory
    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
    rgb_pre_model_save_dir = "/home/project/I3D/I3D/checkpoints/rgb_imagenet"

    video_path_list = np.load('./data_list/train_data_list.npy')
    label_list = np.load('./data_list/train_label_list.npy')
    with tf.Graph().as_default():
        global_step = tf.get_variable('global_step', [],
                                      dtype=tf.int32,
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        train_input_queue = tf.train.slice_input_producer(
            [video_path_list, label_list], shuffle=True)
        video_path = train_input_queue[0]
        train_label = train_input_queue[1]

        rgb_train_images, _, _ = tf.py_func(
            func=input_data.get_frames,
            inp=[
                video_path, -1, FLAGS.num_frame_per_clib, FLAGS.crop_size,
                FLAGS.sample_rate, False
            ],
            Tout=[tf.float32, tf.double, tf.int64],
        )

        batch_videos, batch_labels = tf.train.batch(
            [rgb_train_images, train_label],
            batch_size=FLAGS.batch_size * gpu_num,
            capacity=200,
            num_threads=20,
            shapes=[(FLAGS.num_frame_per_clib / FLAGS.sample_rate,
                     FLAGS.crop_size, FLAGS.crop_size, 3), ()])
        opt_rgb = tf.train.AdamOptimizer(learning_rate)
        #opt_nonlocal = tf.train.AdamOptimizer(learning_rate*10)
        #opt_rgb = tf.train.MomentumOptimizer(learning_rate, 0.9)
        #opt_rgb = tf.train.GradientDescentOptimizer(learning_rate)
        tower_grads = []
        logits = []
        loss = []
        with tf.variable_scope(tf.get_variable_scope()):
            for gpu_index in range(0, gpu_num):
                with tf.device('/gpu:%d' % gpu_index):
                    with tf.name_scope('GPU_%d' % gpu_index):
                        rgb_logit, _ = InceptionI3d(
                            num_classes=FLAGS.classics,
                            spatial_squeeze=True,
                            final_endpoint='Logits',
                            block_num=FLAGS.block_num)(
                                batch_videos[gpu_index *
                                             FLAGS.batch_size:(gpu_index + 1) *
                                             FLAGS.batch_size, :, :, :, :],
                                True)
                        rgb_loss = tower_loss(
                            rgb_logit,
                            batch_labels[gpu_index *
                                         FLAGS.batch_size:(gpu_index + 1) *
                                         FLAGS.batch_size], FLAGS.weight_decay)
                        tf.get_variable_scope().reuse_variables()
                        rgb_grads = opt_rgb.compute_gradients(rgb_loss)
                tower_grads.append(rgb_grads)
                logits.append(rgb_logit)
                loss.append(rgb_loss)
        logits = tf.concat(logits, 0)
        accuracy = tower_acc(logits, batch_labels)
        grads = average_gradients(tower_grads)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        rgb_variable_map = {}
        i3d_map = {}
        nonlocal_map = {}
        for variable in tf.global_variables():
            if 'NonLocalBlock' in variable.name:
                nonlocal_map[variable.name] = variable
            else:
                i3d_map[variable.name] = variable

            if variable.name.split('/')[0] == 'RGB' and \
                    'Adam' not in variable.name.split('/')[-1] and \
                    'NonLocal' not in variable.name:
                #rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable
                rgb_variable_map[variable.name.replace(':0', '')] = variable

        with tf.control_dependencies(update_ops):
            apply_gradient_rgb = opt_rgb.apply_gradients(
                grads, global_step=global_step)
            if FLAGS.block_num >= 0:
                train_op = tf.group(apply_gradient_rgb)
            else:
                nonlocal_grads = opt_nonlocal.compute_gradients(
                    rgb_loss, var_list=nonlocal_map)
                apply_gradient_nonlocal = opt_nonlocal.apply_gradients(
                    nonlocal_grads, global_step=global_step)
                train_op = tf.group(apply_gradient_rgb,
                                    apply_gradient_nonlocal)
            null_op = tf.no_op()

        # Create a session for running Ops on the Graph.
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        sess.run(init)
        # Create summary writter
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('rgb_loss', tf.reduce_mean(loss))
        tf.summary.scalar('learning_rate', learning_rate)
        merged = tf.summary.merge_all()
    # load pre_train models

    ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        rgb_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    train_writer = tf.summary.FileWriter(
        './visual_logs/%dGPU_sgd%dblock_train_scratch_400000_8_64_0.0001_decay'
        % (gpu_num, FLAGS.block_num), sess.graph)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess, coord)

    for step in range(FLAGS.max_steps):
        start_time = time.time()
        sess.run(train_op)
        duration = time.time() - start_time
        print('Step %d: %.3f sec, end time : after %.3f days' %
              (step, duration, (FLAGS.max_steps - step) * duration / 86400))

        if step % 10 == 0 or (step + 1) == FLAGS.max_steps:
            print('Training Data Eval:')
            summary, acc, loss_rgb = sess.run([merged, accuracy, loss])
            print("accuracy: " + "{:.5f}".format(acc))
            print("rgb_loss: " + "{:.5f}".format(np.mean(loss_rgb)))
            train_writer.add_summary(summary, step)

        if (step + 1) % 2000 == 0 or (step + 1) == FLAGS.max_steps:
            saver.save(sess,
                       os.path.join(model_save_dir, 'model'),
                       global_step=step)

    coord.request_stop()
    coord.join(threads)
    print("done")
Exemple #54
0
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name="x-input")
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input")

    weights1 = tf.Variable(
        tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))

    weights2 = tf.Variable(
        tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    y = inference(x, None, weights1, biases1, weights2, biases2)

    global_step = tf.Variable(0, trainable=False)

    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)

    variable_averages_op = variable_averages.apply(tf.trainable_variables())

    average_y = inference(x, variable_averages, weights1, biases1, weights2,
                          biases2)

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=y, labels=tf.argmax(y_, 1))

    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)

    loss = cross_entropy_mean + regularization

    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step,\
                                               mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY)

    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss, global_step=global_step)

    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name='train')
        correct_prediection = tf.equal(tf.argmax(average_y, 1),
                                       tf.argmax(y_, 1))

        accuracy = tf.reduce_mean(tf.cast(correct_prediection, tf.float32))

    with tf.Session() as sess:
        tf.initialize_all_variables().run()

        validate_feed = {
            x: mnist.validation.images,
            y_: mnist.validation.labels
        }

        test_feed = {x: mnist.test.images, y_: mnist.test.labels}

        for i in range(TRAINING_STEPS):
            if (i % 1000 == 0):
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print(
                    "After %d training step(s), validation accuracy using average model is %g"
                    % (i, validate_acc))

            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x: xs, y_: ys})
        test_acc = sess.run(accuracy, feed_dict=test_feed)

        print(
            "After %d training steps test accuracy using average model is %g" %
            (TRAINING_STEPS, test_acc))
    def optimize(self, G_loss, D_Y_loss, F_loss, D_X_loss, histogram_loss):
        def make_optimizer(loss, variables, name='Adam'):
            """ Adam optimizer with learning rate 0.0002 for the first 100k steps (~100 epochs)
          and a linearly decaying rate that goes to zero over the next 100k steps
      """
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = self.learning_rate
            end_learning_rate = 0.0
            start_decay_step = 100000
            decay_steps = 100000
            beta1 = self.beta1
            learning_rate = (tf.where(
                tf.greater_equal(global_step, start_decay_step),
                tf.train.polynomial_decay(starter_learning_rate,
                                          global_step - start_decay_step,
                                          decay_steps,
                                          end_learning_rate,
                                          power=1.0), starter_learning_rate))
            tf.summary.scalar('learning_rate/{}'.format(name), learning_rate)

            learning_step = (tf.train.AdamOptimizer(
                learning_rate, beta1=beta1,
                name=name).minimize(loss,
                                    global_step=global_step,
                                    var_list=variables))
            return learning_step

        def make_optimizer_H(loss, variables, name='RMSprop'):
            """ Adam optimizer with learning rate 0.0002 for the first 100k steps (~100 epochs)
          and a linearly decaying rate that goes to zero over the next 100k steps
      """
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = self.learning_rate  #if u dont want to use histogram loss,set learning_rate=0
            end_learning_rate = 0.0
            start_decay_step = 100000
            decay_steps = 100000000000
            beta1 = self.beta1
            learning_rate = (tf.where(
                tf.greater_equal(global_step, start_decay_step),
                tf.train.polynomial_decay(starter_learning_rate,
                                          global_step - start_decay_step,
                                          decay_steps,
                                          end_learning_rate,
                                          power=1.0), starter_learning_rate))
            learning_step = (tf.train.AdamOptimizer(
                learning_rate, beta1=beta1,
                name=name).minimize(loss,
                                    global_step=global_step,
                                    var_list=variables))
            return learning_step

        G_optimizer = make_optimizer(G_loss, self.G.variables, name='Adam_G')
        D_Y_optimizer = make_optimizer(D_Y_loss,
                                       self.D_Y.variables,
                                       name='Adam_D_Y')
        F_optimizer = make_optimizer(F_loss, self.F.variables, name='Adam_F')
        D_X_optimizer = make_optimizer(D_X_loss,
                                       self.D_X.variables,
                                       name='Adam_D_X')
        H_optimizer = make_optimizer_H(histogram_loss,
                                       self.G.variables,
                                       name='Adam_H')

        with tf.control_dependencies([
                G_optimizer, D_Y_optimizer, F_optimizer, D_X_optimizer,
                H_optimizer
        ]):
            return tf.no_op(name='optimizers')
    def _build_train_op(self):
        """Builds a training op.

    Returns:
      train_op: An op performing one step of training from replay data.
    """
        batch_size = tf.shape(self._replay.rewards)[0]

        target_quantile_values = tf.stop_gradient(
            self._build_target_quantile_values_op())
        # Reshape to self.num_tau_prime_samples x batch_size x 1 since this is
        # the manner in which the target_quantile_values are tiled.
        target_quantile_values = tf.reshape(
            target_quantile_values,
            [self.num_tau_prime_samples, batch_size, 1])
        # Transpose dimensions so that the dimensionality is batch_size x
        # self.num_tau_prime_samples x 1 to prepare for computation of
        # Bellman errors.
        # Final shape of target_quantile_values:
        # batch_size x num_tau_prime_samples x 1.
        target_quantile_values = tf.transpose(target_quantile_values,
                                              [1, 0, 2])

        # Shape of indices: (num_tau_samples x batch_size) x 1.
        # Expand dimension by one so that it can be used to index into all the
        # quantiles when using the tf.gather_nd function (see below).
        indices = tf.range(self.num_tau_samples * batch_size)[:, None]

        # Expand the dimension by one so that it can be used to index into all the
        # quantiles when using the tf.gather_nd function (see below).
        reshaped_actions = self._replay.actions[:, None]
        reshaped_actions = tf.tile(reshaped_actions, [self.num_tau_samples, 1])
        # Shape of reshaped_actions: (num_tau_samples x batch_size) x 2.
        reshaped_actions = tf.concat([indices, reshaped_actions], axis=1)

        chosen_action_quantile_values = tf.gather_nd(
            self._replay_net_quantile_values, reshaped_actions)
        # Transpose dimensions so that the dimensionality is batch_size x
        # self.num_tau_samples x 1 to prepare for computation of
        # Bellman errors.
        # Reshape to self.num_tau_samples x batch_size x 1 since this is the manner
        # in which the quantile values are tiled.
        chosen_action_quantile_values = tf.reshape(
            chosen_action_quantile_values,
            [self.num_tau_samples, batch_size, 1])
        # Final shape of chosen_action_quantile_values:
        # batch_size x num_tau_samples x 1.
        chosen_action_quantile_values = tf.transpose(
            chosen_action_quantile_values, [1, 0, 2])  #batchsize x quan x 1

        # Shape of bellman_erors and huber_loss:
        # batch_size x num_tau_prime_samples x num_tau_samples x 1.
        bellman_errors = target_quantile_values[:, :,
                                                None, :] - chosen_action_quantile_values[:,
                                                                                         None, :, :]
        # The huber loss (see Section 2.3 of the paper) is defined via two cases:
        # case_one: |bellman_errors| <= kappa
        # case_two: |bellman_errors| > kappa
        huber_loss_case_one = tf.to_float(
            tf.abs(bellman_errors) <= self.kappa) * 0.5 * bellman_errors**2
        huber_loss_case_two = tf.to_float(
            tf.abs(bellman_errors) > self.kappa) * self.kappa * (
                tf.abs(bellman_errors) - 0.5 * self.kappa)
        huber_loss = huber_loss_case_one + huber_loss_case_two

        # Reshape replay_quantiles to batch_size x num_tau_samples x 1
        replay_quantiles = tf.reshape(self._replay_net_quantiles,
                                      [self.num_tau_samples, batch_size, 1])
        replay_quantiles = tf.transpose(replay_quantiles,
                                        [1, 0, 2])  #batchsize x quan x 1

        # Tile by num_tau_prime_samples along a new dimension. Shape is now
        # batch_size x num_tau_prime_samples x num_tau_samples x 1.
        # These quantiles will be used for computation of the quantile huber loss
        # below (see section 2.3 of the paper).
        replay_quantiles = tf.to_float(
            tf.tile(replay_quantiles[:, None, :, :],
                    [1, self.num_tau_prime_samples, 1, 1]))
        # Shape: batch_size x num_tau_prime_samples x num_tau_samples x 1.
        quantile_huber_loss = (tf.abs(
            tf.stop_gradient(replay_quantiles) -
            tf.stop_gradient(tf.to_float(bellman_errors < 0))) *
                               huber_loss) / self.kappa
        # Sum over current quantile value (num_tau_samples) dimension,
        # average over target quantile value (num_tau_prime_samples) dimension.
        # Shape: batch_size x num_tau_prime_samples x 1.
        loss = tf.reduce_sum(quantile_huber_loss, axis=2)
        # Shape: batch_size x 1.
        loss = tf.reduce_mean(loss, axis=1)

        # TODO(kumasaurabh): Add prioritized replay functionality here.
        update_priorities_op = tf.no_op()
        with tf.control_dependencies([update_priorities_op]):
            if self.summary_writer is not None:
                with tf.variable_scope('Losses'):
                    tf.summary.scalar('QuantileLoss', tf.reduce_mean(loss))
            return self.optimizer.minimize(tf.reduce_mean(loss)),\
                    tf.squeeze(chosen_action_quantile_values), \
                    tf.squeeze(replay_quantiles[:,0,:,:])
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')

    # generate params for hidden layer
    weights1 = tf.Variable(
        tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # generate params for output layer
    weights2 = tf.Variable(
        tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # set None to not use average value of parameters
    y = inference(x, None, weights1, biases1, weights2, biases2)

    # define the global training steps
    global_step = tf.Variable(0, trainable=False)
    # init the moving average class
    variable_avg = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,
                                                     global_step)
    variable_avg_op = variable_avg.apply(tf.trainable_variables())
    # use average value of parameters
    avg_y = inference(x, variable_avg, weights1, biases1, weights2, biases2)

    # calculate the cross entropy of forecast (y) and actual (y_)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    # init and use regularizer function
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    # calculate the total loss as cross entropy and reg
    loss = cross_entropy_mean + regularization

    # define learning rate and train step
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step,
                                               mnist.train.num_examples,
                                               LEARNING_RATE_DECAY)
    train_step = tf.train.GradientDescentOptimizer(learning_rate)\
        .minimize(loss, global_step=global_step)
    # update the params and avg value in the same time
    with tf.control_dependencies([train_step, variable_avg_op]):
        train_op = tf.no_op(name='train')

    # calcuate the accuracy
    correct_prediction = tf.equal(tf.argmax(avg_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # start the training process
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        validate_feed = {
            x: mnist.validation.images,
            y_: mnist.validation.labels
        }
        test_feed = {x: mnist.test.images, y_: mnist.test.labels}

        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print('after %d training steps, validation accuracy is %g ' %
                      (i, validate_acc))
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x: xs, y_: ys})

        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print('after %d training steps, test accuracy is %g ' %
              (TRAINING_STEPS, test_acc))
def build_network(d):

    # Hyperparameters
    learning_rate = 2e-5
    parameter_l2norm_scaling = 1e-10
    global_norm_gradient_clipping_ratio = 0.65

    # Define GNN dictionary
    GNN = {}

    # Define placeholder for result values (one per problem)
    instance_val = tf.placeholder(tf.float32, [None], name="instance_val")
    instance_m_list = tf.placeholder(tf.int32, [None],
                                     name="instance_edge_num")
    instance_target = tf.placeholder(tf.int32, [None], name="instance_target")

    # Define INV, a tf function to exchange positive and negative literal embeddings
    def INV(Lh):
        l = tf.shape(Lh)[0]
        n = tf.div(l, tf.constant(2))
        # Send messages from negated literals to positive ones, and vice-versa
        Lh_pos = tf.gather(Lh, tf.range(tf.constant(0), n))
        Lh_neg = tf.gather(Lh, tf.range(n, l))
        Lh_inverted = tf.concat([Lh_neg, Lh_pos], axis=0)
        return Lh_inverted

    #end

    # Define Graph neural network
    gnn = GraphNN(
        {
            "N": d,  # Nodes
            "E": d  # Edges
        },
        {
            "Ms":
            ("N",
             "E"),  # Matrix pointing from nodes to the edges they are sources
            "Mt":
            ("N",
             "E"),  # Matrix pointing from nodes to the edges they are targets 
            "Mw": ("E", "E"),  # Matrix indicating an Edge weight
            "S": ("N", "N"),  # Matrix indicating whether a node is the source
            "T": ("N", "N"),  # Matrix indicating whether a node is the target
        },
        {
            "NsmsgE":
            ("N", "E"
             ),  # Message cast to convert messages from node sources to edges
            "NtmsgE":
            ("N", "E"
             ),  # Message cast to convert messages from node targets to edges
            "EmsgNs":
            ("N", "E"
             ),  # Message cast to convert messages from edges to node sources
            "EmsgNt":
            ("N", "E"
             )  # Message cast to convert messages from edges to node targets
        },
        {
            "N": [{
                "mat": "Ms",
                "msg": "EmsgNs",
                "var": "E"
            }, {
                "mat": "Mt",
                "msg": "EmsgNt",
                "var": "E"
            }, {
                "mat": "S"
            }, {
                "mat": "T"
            }],
            "E": [{
                "mat": "Ms",
                "transpose?": True,
                "msg": "NsmsgE",
                "var": "N"
            }, {
                "mat": "Mt",
                "transpose?": True,
                "msg": "NtmsgE",
                "var": "N"
            }, {
                "mat": "Mw"
            }]
        },
        name="Dijkstra_Quiver",
        float_dtype=tf.float32)

    # Define L_vote
    E_vote_MLP = Mlp(layer_sizes=[d for _ in range(2)],
                     activations=[tf.nn.relu for _ in range(2)],
                     output_size=1,
                     name="E_vote",
                     name_internal_layers=True,
                     kernel_initializer=tf.contrib.layers.xavier_initializer(),
                     bias_initializer=tf.zeros_initializer())

    # Compute the number of variables
    m = tf.shape(gnn.matrix_placeholders["Mw"])[0]
    # Compute number of problems
    p = tf.shape(instance_val)[0]

    # Get the last embeddings
    E_n = gnn.last_states["E"].h
    E_vote_logits = E_vote_MLP(E_n)
    E_vote = tf.nn.sigmoid(E_vote_logits)
    E_objective = tf.sparse_tensor_dense_matmul(gnn.matrix_placeholders["Mw"],
                                                E_vote)

    # Reorganize votes' result to obtain a prediction for each problem instance
    def _vote_while_cond(i, m_acc, predicted_val):
        return tf.less(i, p)

    #end _vote_while_cond

    def _vote_while_body(i, m_acc, predicted_val):
        # Helper for the amount of edges in this problem
        i_m = instance_m_list[i]
        # Gather the edges of that problem
        obj_vals = tf.gather(E_objective, tf.range(m_acc, tf.add(m_acc, i_m)))
        problem_predicted_val = tf.reduce_sum(obj_vals)
        # Update TensorArray
        predicted_val = predicted_val.write(i, problem_predicted_val)
        return tf.add(i, tf.constant(1)), tf.add(m_acc, i_m), predicted_val

    #end _vote_while_body

    predicted_val = tf.TensorArray(size=p, dtype=tf.float32)
    _, _, predicted_val = tf.while_loop(_vote_while_cond, _vote_while_body, [
        tf.constant(0, dtype=tf.int32),
        tf.constant(0, dtype=tf.int32), predicted_val
    ])
    predicted_val = predicted_val.stack()

    # Define loss and %error
    predict_costs = tf.losses.mean_squared_error(labels=instance_val,
                                                 predictions=predicted_val)
    predict_cost = tf.reduce_mean(predict_costs)
    # %Error
    abserror = tf.reduce_mean(
        tf.divide(tf.abs(tf.subtract(instance_val, predicted_val)),
                  instance_val))
    error = tf.reduce_mean(
        tf.divide(tf.subtract(instance_val, predicted_val), instance_val))
    vars_cost = tf.zeros([])
    tvars = tf.trainable_variables()
    for var in tvars:
        vars_cost = tf.add(vars_cost, tf.nn.l2_loss(var))
    #end for
    loss = tf.add(predict_cost, tf.multiply(vars_cost,
                                            parameter_l2norm_scaling))
    optimizer = tf.train.AdamOptimizer(name="Adam",
                                       learning_rate=learning_rate)
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                      global_norm_gradient_clipping_ratio)
    train_step = optimizer.apply_gradients(zip(grads, tvars))

    GNN["gnn"] = gnn
    GNN["instance_val"] = instance_val
    GNN["instance_target"] = instance_target
    GNN["instance_m"] = instance_m_list
    GNN["predicted_val"] = predicted_val
    GNN["loss"] = loss
    GNN["%error"] = error
    GNN["%abserror"] = abserror
    GNN["train_step"] = train_step
    GNN["nop"] = tf.no_op()
    return GNN
Exemple #59
0
 def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels,
                               iteration_step, summary, previous_ensemble):
   return tf.no_op()
Exemple #60
0
def main(_):

    pp.pprint(flags.FLAGS.__flags)

    order = []
    with open('imagenet_64x64_dogs_%s.txt' % FLAGS.order_file) as file_in:
        for line in file_in.readlines():
            order.append(int(line))
    order = np.array(order)

    assert FLAGS.mode == 'wgan-gp'

    NUM_CLASSES = 120
    NUM_TEST_SAMPLES_PER_CLASS = 50
    NUM_TRAIN_SAMPLES_PER_CLASS = 1300  # around 1300

    if not FLAGS.only_gen_no_cls:

        def build_cnn(inputs, is_training):
            train_or_test = {True: 'train', False: 'test'}
            if FLAGS.network_arch == 'resnet':
                logits, end_points = utils_resnet_64x64.ResNet(
                    inputs,
                    train_or_test[is_training],
                    num_outputs=NUM_CLASSES,
                    alpha=0.0,
                    scope=('ResNet-' + train_or_test[is_training]))
            else:
                raise Exception()
            return logits, end_points

        # save all intermediate result in the result_folder
        method_name = '_'.join(
            os.path.basename(__file__).split('.')[0].split('_')[4:])
        method_name += '_gen_%d_and_select' % FLAGS.gen_how_many if FLAGS.gen_more_and_select else ''
        method_name += '_auto-%.1f-%.1f' % (FLAGS.auto_param1, FLAGS.auto_param2) \
            if FLAGS.auto_choose_num_exemplars else ('_%d' % FLAGS.num_exemplars_per_class if not FLAGS.memory_constrained else '')
        method_name += '_%s' % FLAGS.exemplar_select_criterion
        method_name += '_%.1f-%.1f' % (FLAGS.proto_weight, FLAGS.gen_weight)
        method_name += '_icarl_%d' % FLAGS.memory_upperbound if FLAGS.memory_constrained else ''
        method_name += '_reorder' if FLAGS.reorder_exemplars else ''
        method_name += '_smoothing_%.1f' % FLAGS.label_smoothing

        cls_func = '' if FLAGS.use_softmax else '_sigmoid'
        result_folder = os.path.join(
            FLAGS.result_dir, FLAGS.dataset + ('_flip' if FLAGS.flip else '') +
            '_' + FLAGS.order_file, 'nb_cl_' + str(FLAGS.nb_cl),
            'non_truncated' if FLAGS.no_truncate else 'truncated',
            FLAGS.network_arch + cls_func + '_init_' + FLAGS.init_strategy,
            'weight_decay_' + str(FLAGS.weight_decay),
            'base_lr_' + str(FLAGS.base_lr), 'adam_lr_' + str(FLAGS.adam_lr),
            method_name)

        if os.path.exists(result_folder):
            temp_i = 2
            while True:
                result_folder_mod = result_folder + '_run-' + str(temp_i)
                if not os.path.exists(result_folder_mod):
                    result_folder = result_folder_mod
                    break
                temp_i += 1
        os.makedirs(result_folder)
        print('Result folder: %s' % result_folder)

        graph_cls = tf.Graph()
        with graph_cls.as_default():
            '''
            Define variables
            '''
            batch_images = tf.placeholder(tf.float32, shape=[None, 64, 64, 3])
            batch = tf.Variable(0, trainable=False)
            learning_rate = tf.placeholder(tf.float32, shape=[])
            '''
            Network output mask
            '''
            mask_output = tf.placeholder(tf.bool, shape=[NUM_CLASSES])
            '''
            Old and new ground truth
            '''
            one_hot_labels_truncated = tf.placeholder(tf.float32,
                                                      shape=[None, None])
            '''
            Define the training network
            '''
            train_logits, _ = build_cnn(batch_images, True)
            train_masked_logits = tf.gather(train_logits,
                                            tf.squeeze(tf.where(mask_output)),
                                            axis=1)  # masking operation
            train_masked_logits = tf.cond(
                tf.equal(tf.rank(train_masked_logits),
                         1), lambda: tf.expand_dims(train_masked_logits, 1),
                lambda: train_masked_logits
            )  # convert to (N, 1) if the shape is (N,), otherwise softmax would output wrong values
            # Train accuracy(since there is only one class excluding the old recorded responses, this accuracy is not very meaningful)
            train_pred = tf.argmax(train_masked_logits, 1)
            train_ground_truth = tf.argmax(one_hot_labels_truncated, 1)
            correct_prediction = tf.equal(train_pred, train_ground_truth)
            train_accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))
            train_batch_weights = tf.placeholder(tf.float32, shape=[None])

            reg_weights = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            regularization_loss = FLAGS.weight_decay * tf.add_n(reg_weights)
            '''
            More Settings
            '''
            if FLAGS.use_softmax:
                empirical_loss = tf.losses.softmax_cross_entropy(
                    onehot_labels=one_hot_labels_truncated,
                    logits=train_masked_logits,
                    weights=train_batch_weights)
            else:
                empirical_loss = tf.losses.sigmoid_cross_entropy(
                    multi_class_labels=one_hot_labels_truncated,
                    logits=train_masked_logits,
                    weights=train_batch_weights)

            loss = empirical_loss + regularization_loss
            if FLAGS.use_momentum:
                opt = tf.train.MomentumOptimizer(
                    learning_rate, FLAGS.momentum).minimize(loss,
                                                            global_step=batch)
            else:
                opt = tf.train.GradientDescentOptimizer(
                    learning_rate).minimize(loss, global_step=batch)
            '''
            Define the testing network
            '''
            test_logits, _ = build_cnn(batch_images, False)
            test_masked_logits = tf.gather(test_logits,
                                           tf.squeeze(tf.where(mask_output)),
                                           axis=1)
            test_masked_logits = tf.cond(
                tf.equal(tf.rank(test_masked_logits),
                         1), lambda: tf.expand_dims(test_masked_logits, 1),
                lambda: test_masked_logits)
            test_masked_prob = tf.nn.softmax(test_masked_logits)
            test_pred = tf.argmax(test_masked_logits, 1)
            test_accuracy = tf.placeholder(tf.float32)
            '''
            Copy network (define the copying op)
            '''
            if FLAGS.network_arch == 'resnet':
                all_variables = tf.get_collection(tf.GraphKeys.WEIGHTS)
            else:
                raise Exception('Invalid network architecture')
            copy_ops = [
                all_variables[ix + len(all_variables) // 2].assign(var.value())
                for ix, var in enumerate(all_variables[0:len(all_variables) //
                                                       2])
            ]
            '''
            Init certain layers when new classes added
            '''
            init_ops = tf.no_op()
            if FLAGS.init_strategy == 'all':
                init_ops = tf.global_variables_initializer()
            elif FLAGS.init_strategy == 'last':
                if FLAGS.network_arch == 'resnet':
                    init_vars = [
                        var for var in tf.global_variables()
                        if 'fc' in var.name and 'train' in var.name
                    ]
                init_ops = tf.initialize_variables(init_vars)
            '''
            Create session
            '''
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            sess = tf.Session(config=config, graph=graph_cls)
            sess.run(tf.global_variables_initializer())

            saver = tf.train.Saver()
        '''
        Summary
        '''
        train_loss_summary = tf.summary.scalar('train_loss', loss)
        train_acc_summary = tf.summary.scalar('train_accuracy', train_accuracy)
        test_acc_summary = tf.summary.scalar('test_accuracy', test_accuracy)

        summary_dir = os.path.join(result_folder, 'summary')
        if not os.path.exists(summary_dir):
            os.makedirs(summary_dir)
        train_summary_writer = tf.summary.FileWriter(
            os.path.join(summary_dir, 'train'), sess.graph)
        test_summary_writer = tf.summary.FileWriter(
            os.path.join(summary_dir, 'test'))

        iteration = 0
        '''
        Declaration of other vars
        '''
        # Average accuracy on seen classes
        aver_acc_over_time = dict()
        aver_acc_per_class_over_time = dict()
        conf_mat_over_time = dict()

        # Network mask
        mask_output_val = np.zeros([NUM_CLASSES], dtype=bool)
        mask_output_test = np.zeros([NUM_CLASSES], dtype=bool)
        '''
        Exemplars(for ablation study and other purposes)
        '''
        exemplars_dir = os.path.join(result_folder, 'exemplars')
        if not os.path.exists(exemplars_dir):
            os.makedirs(exemplars_dir)
    '''
    Train generative model(DC-GAN)
    '''
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    run_config = tf.ConfigProto(gpu_options=gpu_options,
                                allow_soft_placement=True)
    run_config.gpu_options.allow_growth = True
    graph_gen = tf.Graph()
    sess_wgan = tf.Session(config=run_config, graph=graph_gen)

    acwgan_obj = WGAN64x64(sess_wgan,
                           graph_gen,
                           dataset_name=FLAGS.dataset + '_' + FLAGS.order_file,
                           mode=FLAGS.mode,
                           batch_size=FLAGS.batch_size,
                           dim=FLAGS.dim,
                           output_dim=FLAGS.output_dim,
                           lambda_param=FLAGS.lambda_param,
                           critic_iters=FLAGS.critic_iters,
                           iters=FLAGS.iters,
                           result_dir=FLAGS.result_dir_cwgan,
                           checkpoint_interval=FLAGS.gan_save_interval,
                           adam_lr=FLAGS.adam_lr,
                           use_decay=FLAGS.use_decay,
                           conditional=FLAGS.conditional,
                           acgan=FLAGS.acgan,
                           acgan_scale=FLAGS.acgan_scale,
                           acgan_scale_g=FLAGS.acgan_scale_g,
                           normalization_g=FLAGS.normalization_g,
                           normalization_d=FLAGS.normalization_d,
                           gen_bs_multiple=FLAGS.gen_bs_multiple,
                           nb_cl=FLAGS.nb_cl,
                           n_gpus=FLAGS.n_gpus)

    exemplars = []

    test_images, test_labels, test_one_hot_labels, raw_images_test = imagenet_64x64.load_test_data(
    )

    if not FLAGS.only_gen_no_cls:
        # train and test data of seen classes
        test_x = np.zeros([0, 64, 64, 3], dtype=np.float32)
        test_y = np.zeros([0], dtype=np.float32)
    '''
    Class Incremental Learning
    '''
    print('Starting from category ' + str(FLAGS.from_class_idx + 1) + ' to ' +
          str(FLAGS.to_class_idx + 1))
    print('Adding %d categories every time' % FLAGS.nb_cl)
    assert (FLAGS.from_class_idx % FLAGS.nb_cl == 0)
    for category_idx in range(FLAGS.from_class_idx, FLAGS.to_class_idx + 1,
                              FLAGS.nb_cl):

        to_category_idx = category_idx + FLAGS.nb_cl - 1
        if FLAGS.nb_cl == 1:
            print('Adding Category ' + str(category_idx + 1))
        else:
            print('Adding Category %d-%d' %
                  (category_idx + 1, to_category_idx + 1))

        train_x_gan = np.zeros([0, FLAGS.output_dim], dtype=np.uint8)
        train_y_gan = np.zeros([0], dtype=float)
        test_x_gan = np.zeros([0, FLAGS.output_dim], dtype=np.uint8)
        test_y_gan = np.zeros([0], dtype=float)

        if not FLAGS.only_gen_no_cls:
            # train and test data of seen classes
            train_y_one_hot = np.zeros([0, NUM_CLASSES], dtype=np.float32)

        for category_idx_in_group in range(category_idx, to_category_idx + 1):
            real_category_idx = order[category_idx_in_group]
            real_images_train_cur_cls, raw_images_train_cur_cls = imagenet_64x64.load_train_data(
                real_category_idx, flip=FLAGS.flip)

            # GAN
            train_x_gan = np.concatenate(
                (train_x_gan, raw_images_train_cur_cls))
            train_y_gan_cur_cls = np.ones([len(raw_images_train_cur_cls)]) * (
                category_idx_in_group % FLAGS.nb_cl)
            train_y_gan = np.concatenate((train_y_gan, train_y_gan_cur_cls))

            test_indices_cur_cls = [
                idx for idx in range(len(test_labels))
                if test_labels[idx] == real_category_idx
            ]
            test_x_gan_cur_cls = raw_images_test[test_indices_cur_cls, :]
            test_y_gan_cur_cls = np.ones([len(test_indices_cur_cls)]) * (
                category_idx_in_group % FLAGS.nb_cl)
            test_x_gan = np.concatenate((test_x_gan, test_x_gan_cur_cls))
            test_y_gan = np.concatenate((test_y_gan, test_y_gan_cur_cls))

            # Classification network
            if not FLAGS.only_gen_no_cls:
                train_y_one_hot_cur_cls = np.zeros(
                    [len(raw_images_train_cur_cls), NUM_CLASSES])
                train_y_one_hot_cur_cls[:, category_idx_in_group] = np.ones(
                    len(raw_images_train_cur_cls))

                test_indices_cur_cls = [
                    idx for idx in range(len(test_labels))
                    if test_labels[idx] == real_category_idx
                ]
                test_x_cur_cls = test_images[test_indices_cur_cls, :]
                test_y_cur_cls = np.ones([len(test_indices_cur_cls)
                                          ]) * category_idx_in_group

                test_x = np.concatenate((test_x, test_x_cur_cls))
                test_y = np.concatenate((test_y, test_y_cur_cls))
                train_y_one_hot = np.concatenate(
                    (train_y_one_hot, train_y_one_hot_cur_cls))
        '''
        Train classification model
        '''
        # No need to train the classifier if there is only one class
        if (to_category_idx > 0
                and not FLAGS.only_gen_no_cls) or not FLAGS.use_softmax:

            # init certain layers
            sess.run(init_ops)

            if FLAGS.no_truncate:
                mask_output_val[:] = True
            else:
                mask_output_val[:to_category_idx + 1] = True

            # Test on all seen classes
            mask_output_test[:to_category_idx + 1] = True
            '''
            Generate samples of old classes
            '''
            train_x = np.copy(train_x_gan)
            if FLAGS.no_truncate:
                train_y_truncated = train_y_one_hot[:, :]
            else:
                train_y_truncated = train_y_one_hot[:, :to_category_idx + 1]
            train_weights_val = np.ones(len(train_x))

            for old_category_idx in range(0, category_idx):
                if old_category_idx % FLAGS.nb_cl == 0:
                    # Load old class model
                    if not acwgan_obj.load(
                        (old_category_idx / FLAGS.nb_cl + 1) * FLAGS.nb_cl -
                            1)[0]:
                        raise Exception(
                            "[!] Train a model first, then run test mode")

                num_gen_samples_x_needed = NUM_TRAIN_SAMPLES_PER_CLASS - len(
                    exemplars[old_category_idx])
                if num_gen_samples_x_needed > 0:
                    if FLAGS.gen_more_and_select:
                        gen_samples_x_more, _, _ = acwgan_obj.test(
                            FLAGS.gen_how_many, old_category_idx % FLAGS.nb_cl)
                        gen_samples_x_more_real = imagenet_64x64.convert_images(
                            gen_samples_x_more)
                        gen_samples_prob = sess.run(
                            test_masked_prob,
                            feed_dict={
                                batch_images: gen_samples_x_more_real,
                                mask_output: mask_output_val
                            })
                        gen_samples_scores_cur_cls = gen_samples_prob[:,
                                                                      old_category_idx]
                        top_k_indices = np.argsort(-gen_samples_scores_cur_cls
                                                   )[:num_gen_samples_x_needed]
                        gen_samples_x = gen_samples_x_more[top_k_indices]
                    else:
                        gen_samples_x, _, _ = acwgan_obj.test(
                            num_gen_samples_x_needed,
                            old_category_idx % FLAGS.nb_cl)
                    # import wgan.tflib.save_images
                    # wgan.tflib.save_images.save_images(gen_samples_x[:128].reshape((128, 3, 64, 64)),
                    #                                    'test.jpg')
                    train_x = np.concatenate(
                        (train_x, gen_samples_x, exemplars[old_category_idx]))
                    train_weights_val = np.concatenate(
                        (train_weights_val,
                         np.ones(len(gen_samples_x)) * FLAGS.gen_weight,
                         np.ones(len(exemplars[old_category_idx])) *
                         FLAGS.proto_weight))

                    gen_samples_y = np.ones(
                        (len(gen_samples_x), to_category_idx + 1)) * (
                            (1 - FLAGS.label_smoothing) / to_category_idx)
                    gen_samples_y[:, old_category_idx] = np.ones(
                        (len(gen_samples_x))) * FLAGS.label_smoothing

                    exemplars_y = np.zeros((len(exemplars[old_category_idx]),
                                            to_category_idx + 1))
                    exemplars_y[:, old_category_idx] = np.ones(
                        (len(exemplars[old_category_idx])))

                    train_y_truncated = np.concatenate(
                        (train_y_truncated, gen_samples_y, exemplars_y))

                elif num_gen_samples_x_needed == 0:
                    train_x = np.concatenate(
                        (train_x, exemplars[old_category_idx]))
                    train_weights_val = np.concatenate(
                        (train_weights_val,
                         np.ones(len(exemplars[old_category_idx])) *
                         FLAGS.proto_weight))

                    exemplars_y = np.zeros((len(exemplars[old_category_idx]),
                                            to_category_idx + 1))
                    exemplars_y[:, old_category_idx] = np.ones(
                        (len(exemplars[old_category_idx])))

                    train_y_truncated = np.concatenate(
                        (train_y_truncated, exemplars_y))

            # # DEBUG:
            # train_indices = [idx for idx in range(NUM_SAMPLES_TOTAL) if train_labels[idx] <= category_idx]
            # train_x = raw_images_train[train_indices, :]
            # # Record the response of the new data using the old model(category_idx is consistent with the number of True in mask_output_val_prev)
            # train_y_truncated = train_one_hot_labels[train_indices, :category_idx + 1]

            # Training set
            # Convert the raw images from the data-files to floating-points.
            train_x = imagenet_64x64.convert_images(train_x)

            # Shuffle the indices and create mini-batch
            batch_indices_perm = []

            epoch_idx = 0
            lr = FLAGS.base_lr
            '''
            Training with mixed data
            '''
            while True:
                # Generate mini-batch
                if len(batch_indices_perm) == 0:
                    if epoch_idx >= FLAGS.epochs_per_category:
                        break
                    if epoch_idx in lr_strat:
                        lr /= FLAGS.lr_factor
                        print("NEW LEARNING RATE: %f" % lr)
                    epoch_idx = epoch_idx + 1

                    shuffled_indices = range(train_x.shape[0])
                    np.random.shuffle(shuffled_indices)
                    for i in range(0, len(shuffled_indices),
                                   FLAGS.train_batch_size):
                        batch_indices_perm.append(
                            shuffled_indices[i:i + FLAGS.train_batch_size])
                    batch_indices_perm.reverse()

                popped_batch_idx = batch_indices_perm.pop()

                # Use the random index to select random images and labels.
                train_weights_batch_val = train_weights_val[popped_batch_idx]
                train_x_batch = train_x[popped_batch_idx, :, :, :]
                train_y_batch = [
                    train_y_truncated[k] for k in popped_batch_idx
                ]

                # Train
                train_loss_summary_str, train_acc_summary_str, train_accuracy_val, \
                train_loss_val, train_empirical_loss_val, train_reg_loss_val, _ = sess.run(
                    [train_loss_summary, train_acc_summary, train_accuracy, loss, empirical_loss,
                     regularization_loss, opt], feed_dict={batch_images: train_x_batch,
                                                           one_hot_labels_truncated: train_y_batch,
                                                           mask_output: mask_output_val,
                                                           learning_rate: lr,
                                                           train_batch_weights: train_weights_batch_val})

                # Test
                if iteration % FLAGS.test_interval == 0:
                    sess.run(copy_ops)

                    # Divide and conquer: to avoid allocating too much GPU memory
                    test_pred_val = []
                    for i in range(0, len(test_x), FLAGS.test_batch_size):
                        test_x_batch = test_x[i:i + FLAGS.test_batch_size]
                        test_pred_val_batch = sess.run(test_pred,
                                                       feed_dict={
                                                           batch_images:
                                                           test_x_batch,
                                                           mask_output:
                                                           mask_output_test
                                                       })
                        test_pred_val.extend(test_pred_val_batch)

                    test_accuracy_val = 1. * np.sum(
                        np.equal(test_pred_val, test_y)) / (len(test_pred_val))
                    test_per_class_accuracy_val = np.diag(
                        confusion_matrix(test_y, test_pred_val)) * 2
                    # I simply multiply the correct predictions by 2 to calculate the accuracy since there are 50 samples per class in the test set

                    test_acc_summary_str = sess.run(
                        test_acc_summary,
                        feed_dict={test_accuracy: test_accuracy_val})

                    test_summary_writer.add_summary(test_acc_summary_str,
                                                    iteration)

                    print("TEST: step %d, lr %.4f, accuracy %g" %
                          (iteration, lr, test_accuracy_val))
                    print("PER CLASS ACCURACY: " + " | ".join(
                        str(o) + '%' for o in test_per_class_accuracy_val))

                # Print the training logs
                if iteration % FLAGS.display_interval == 0:
                    train_summary_writer.add_summary(train_loss_summary_str,
                                                     iteration)
                    train_summary_writer.add_summary(train_acc_summary_str,
                                                     iteration)
                    print(
                        "TRAIN: epoch %d, step %d, lr %.4f, accuracy %g, loss %g, empirical %g, reg %g"
                        % (epoch_idx, iteration, lr, train_accuracy_val,
                           train_loss_val, train_empirical_loss_val,
                           train_reg_loss_val))

                iteration = iteration + 1
            '''
            Final test(before the next class is added)
            '''
            sess.run(copy_ops)
            # Divide and conquer: to avoid allocating too much GPU memory
            test_pred_val = []
            for i in range(0, len(test_x), FLAGS.test_batch_size):
                test_x_batch = test_x[i:i + FLAGS.test_batch_size]
                test_pred_val_batch = sess.run(test_pred,
                                               feed_dict={
                                                   batch_images: test_x_batch,
                                                   mask_output:
                                                   mask_output_test
                                               })
                test_pred_val.extend(test_pred_val_batch)

            test_accuracy_val = 1. * np.sum(np.equal(
                test_pred_val, test_y)) / (len(test_pred_val))
            conf_mat = confusion_matrix(test_y, test_pred_val)
            test_per_class_accuracy_val = np.diag(conf_mat)

            # Record and save the cumulative accuracy
            aver_acc_over_time[to_category_idx] = test_accuracy_val
            aver_acc_per_class_over_time[
                to_category_idx] = test_per_class_accuracy_val
            conf_mat_over_time[to_category_idx] = conf_mat

            dump_obj = dict()
            dump_obj['flags'] = flags.FLAGS.__flags
            dump_obj['aver_acc_over_time'] = aver_acc_over_time
            dump_obj[
                'aver_acc_per_class_over_time'] = aver_acc_per_class_over_time
            dump_obj['conf_mat_over_time'] = conf_mat_over_time

            np_file_result = os.path.join(result_folder, 'acc_over_time.pkl')
            with open(np_file_result, 'wb') as file:
                pickle.dump(dump_obj, file)

            visualize_result.vis(np_file_result, 'ImageNetDogs')

        # reorder the exemplars
        if FLAGS.reorder_exemplars:
            for old_category_idx in range(category_idx):

                sess.run(copy_ops)
                # Divide and conquer: to avoid allocating too much GPU memory
                train_prob_cur_cls_exemplars_val = sess.run(
                    test_masked_prob,
                    feed_dict={
                        batch_images:
                        imagenet_64x64.convert_images(
                            exemplars[old_category_idx]),
                        mask_output:
                        mask_output_val
                    })
                train_prob_cur_cls_exemplars_val = train_prob_cur_cls_exemplars_val[:,
                                                                                    old_category_idx]
                reorder_indices = np.argsort(-train_prob_cur_cls_exemplars_val)
                exemplars[old_category_idx] = exemplars[old_category_idx][
                    reorder_indices]

        # select the exemplars
        for category_idx_in_group in range(category_idx, to_category_idx + 1):
            train_indices_cur_cls = [
                idx for idx in range(len(train_y_gan))
                if train_y_gan[idx] == category_idx_in_group % FLAGS.nb_cl
            ]
            train_x_cur_cls = train_x_gan[train_indices_cur_cls]
            train_x_cur_cls_normalized = imagenet_64x64.convert_images(
                train_x_cur_cls)
            sess.run(copy_ops)
            # Divide and conquer: to avoid allocating too much GPU memory
            train_prob_cur_cls_val = sess.run(test_masked_prob,
                                              feed_dict={
                                                  batch_images:
                                                  train_x_cur_cls_normalized,
                                                  mask_output: mask_output_val
                                              })
            train_prob_cur_cls_val = train_prob_cur_cls_val[:,
                                                            category_idx_in_group]

            # use iCaRL-like memory mechanism to save exemplars or not
            if FLAGS.memory_constrained:

                if FLAGS.auto_choose_num_exemplars:  # auto or fixed number of exemplars
                    # check if we can save all new samples as exemplars
                    if NUM_TRAIN_SAMPLES_PER_CLASS > FLAGS.memory_upperbound - sum(
                        [len(exemplars[i]) for i in range(len(exemplars))]):
                        # load inception scores of all classes
                        save_exemplars_ratios = []
                        for i in range(category_idx_in_group + 1):
                            inception_score = acwgan_obj.load_inception_score(
                                i)
                            save_exemplars_ratio = FLAGS.auto_param1 - FLAGS.auto_param2 * inception_score
                            save_exemplars_ratios.append(save_exemplars_ratio)

                        save_exemplars_ratios = np.array(save_exemplars_ratios)
                        keep_exemplars_num = np.floor(
                            save_exemplars_ratios * FLAGS.memory_upperbound /
                            sum(save_exemplars_ratios)).astype(int)
                        for old_category_idx in range(category_idx_in_group):
                            exemplars[old_category_idx] = exemplars[
                                old_category_idx][:keep_exemplars_num[
                                    old_category_idx]]
                        num_exemplars_cur_cls = keep_exemplars_num[-1]
                    else:
                        num_exemplars_cur_cls = NUM_TRAIN_SAMPLES_PER_CLASS

                else:
                    num_exemplars_per_cls = int(FLAGS.memory_upperbound //
                                                (category_idx_in_group + 1))
                    num_exemplars_per_cls = min(num_exemplars_per_cls,
                                                NUM_TRAIN_SAMPLES_PER_CLASS)
                    # remove redundant elements in the memory for previous classes
                    if category_idx_in_group > 0 and len(
                            exemplars[0]) > num_exemplars_per_cls:
                        for old_category_idx in range(category_idx_in_group):
                            exemplars[old_category_idx] = exemplars[
                                old_category_idx][:num_exemplars_per_cls]

                    # add how many new elements in the memory for the current class
                    num_exemplars_cur_cls = num_exemplars_per_cls
                    print(' [*] Store %d exemplars for each class' %
                          num_exemplars_cur_cls)

            else:
                if FLAGS.auto_choose_num_exemplars:  # auto or fixed number of exemplars
                    inception_score = acwgan_obj.load_inception_score(
                        category_idx_in_group)
                    num_exemplars_cur_cls = int(
                        np.floor(FLAGS.auto_param1 -
                                 FLAGS.auto_param2 * inception_score))
                    print(' [*] Inception score %f, store %d exemplars' %
                          (inception_score, num_exemplars_cur_cls))
                else:
                    num_exemplars_cur_cls = FLAGS.num_exemplars_per_class

            selected_indices = np.array(range(len(train_prob_cur_cls_val)))
            if FLAGS.exemplar_select_criterion == 'high':
                selected_indices = train_prob_cur_cls_val.argsort()[:-(
                    num_exemplars_cur_cls + 1):-1]  # select the last 20
            elif FLAGS.exemplar_select_criterion == 'low':
                selected_indices = train_prob_cur_cls_val.argsort(
                )[:num_exemplars_cur_cls]  # select the last 20
            elif FLAGS.exemplar_select_criterion == 'random':
                random_idx = range(len(train_prob_cur_cls_val))
                np.random.shuffle(random_idx)
                selected_indices = random_idx[:num_exemplars_cur_cls]

            exemplars.append(train_x_cur_cls[selected_indices])

            np_file_exemplars = os.path.join(
                exemplars_dir, 'exemplars_%d' % (category_idx_in_group + 1))
            np.save(np_file_exemplars, exemplars)
        '''
        Train generative model(W-GAN)
        '''
        if acwgan_obj.check_model(to_category_idx):
            print(
                " [*] Model of Class %d-%d exists. Skip the training process" %
                (category_idx + 1, to_category_idx + 1))
        else:
            print(
                " [*] Model of Class %d-%d does not exist. Start the training process"
                % (category_idx + 1, to_category_idx + 1))
            acwgan_obj.train(train_x_gan, train_y_gan, test_x_gan, test_y_gan,
                             to_category_idx)

    # Save the final model
    if not FLAGS.only_gen_no_cls:
        checkpoint_dir = os.path.join(result_folder, 'checkpoints')
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver.save(sess, os.path.join(checkpoint_dir, 'model.ckpt'))
        sess.close()