Beispiel #1
0
    def make_model(self):
        with tf.device(self.default_gpu):
            pr = self.pr

            if self.is_training:
                self.make_train_ops()
            else:
                self.make_test_ops(reuse=False)

            self.coord = tf.train.Coordinator()
            self.saver_fast = tf.train.Saver()
            self.saver_slow = tf.train.Saver(max_to_keep=1000)

            self.init_op = tf.group(tf.global_variables_initializer(),
                                    tf.local_variables_initializer())
            self.sess.run(self.init_op)
            tf.train.start_queue_runners(sess=self.sess, coord=self.coord)
            print 'Initializing'

            self.merged_summary = tf.summary.merge_all()
            print 'Tensorboard command:'
            summary_dir = ut.mkdir(pj(pr.summary_dir, ut.simple_timestamp()))
            print 'tensorboard --logdir=%s' % summary_dir
            self.sum_writer = tf.summary.FileWriter(summary_dir,
                                                    self.sess.graph)

            if self.profile:
                #self.run_meta = tf.RunMetadata()
                self.profiler = tf.profiler.Profiler(self.sess.graph)
  def make_train_model(self):
    with tf.device(self.default_gpu):
      pr = self.pr
      # steps
      self.step = tf.get_variable(
        'global_step', [], trainable = False,
        initializer = tf.constant_initializer(0), dtype = tf.int64)
      self.lr = tf.constant(pr.base_lr)

      # model
      scale = pr.gamma ** tf.floor(cast_float(self.step) / float(pr.step_size))
      self.lr_step = pr.base_lr * scale
      #lr = tf.Print(lr, [lr, lr*1e3, scale])
      opt = shift.make_opt(pr.opt_method, self.lr_step, pr)
      self.inputs = read_data(pr, self.gpus)

      gpu_grads, gpu_losses = {}, {}
      for i, gpu in enumerate(self.gpus):
        with tf.device(gpu):
          reuse = (i > 0) 
          ims = self.inputs[i]['ims']
          samples = self.inputs[i]['samples']
          labels = self.inputs[i]['label']

          net = make_net(ims, samples, pr, reuse = reuse, train = self.is_training)
          self.loss = tfu.Loss('loss')
          self.loss.add_loss(shift.slim_losses_with_prefix(None), 'reg')
          self.loss.add_loss_acc(label_loss(net.logits, labels), 'label')
          grads = opt.compute_gradients(self.loss.total_loss())

          ut.add_dict_list(gpu_grads, self.loss.name, grads)
          ut.add_dict_list(gpu_losses, self.loss.name, self.loss)

          if i == 0:
            self.net = net
        
      (gs, vs) = zip(*tfu.average_grads(gpu_grads['loss']))
      if pr.grad_clip is not None:
        gs, _ = tf.clip_by_global_norm(gs, pr.grad_clip)
      gs = [tfu.print_every(gs[0], 100, ['grad norm:', tf.global_norm(gs)])] + list(gs[1:])
      gvs = zip(gs, vs)
      #for g, v in zip(grads, vs):
      # if g[0] is not None:
      #   tf.summary.scalar('%s_grad_norm' % v.name, tf.reduce_sum(g[0]**2)**0.5)
      #   tf.summary.scalar('%s_val_norm' % v.name, tf.reduce_sum(v**2)**0.5)
      #self.train_op = opt.apply_gradients(gvs, global_step = self.step)
      
      bn_ups = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      # self.train_op = tf.group(self.train_op, *bn_ups)

      with tf.control_dependencies(bn_ups):
        self.train_op = opt.apply_gradients(gvs, global_step = self.step)
      
      self.coord = tf.train.Coordinator()
      self.saver_fast = tf.train.Saver()
      self.saver_slow = tf.train.Saver(max_to_keep = 1000)

      #self.init_op = tf.global_variables_initializer()
      if self.is_training:
        self.init_op = tf.group(
          tf.global_variables_initializer(), 
          tf.local_variables_initializer())
        self.sess.run(self.init_op)

      tf.train.start_queue_runners(sess = self.sess, coord = self.coord)

      self.merged_summary = tf.summary.merge_all()
      print 'Tensorboard command:'
      summary_dir = ut.mkdir(pj(pr.summary_dir, ut.simple_timestamp()))
      print 'tensorboard --logdir=%s' % summary_dir
      self.sum_writer = tf.summary.FileWriter(summary_dir, self.sess.graph)

      if self.profile:
        self.profiler = tf.profiler.Profiler(self.sess.graph)
Beispiel #3
0
    def make_model(self):
        with tf.device(self.default_gpu):
            pr = self.pr
            # steps
            self.step = tf.get_variable('global_step', [],
                                        trainable=False,
                                        initializer=tf.constant_initializer(0),
                                        dtype=tf.int64)
            self.lr = tf.constant(pr.base_lr)

            # model
            opt = make_opt(pr.opt_method, pr.base_lr, pr)
            self.inputs = read_data(pr, self.gpus)

            gpu_grads, gpu_losses = {}, {}
            for i, gpu in enumerate(self.gpus):
                with tf.device(gpu):
                    reuse = (i > 0)
                    with tf.device('/cpu:0'):
                        ims = self.inputs[i]['ims']
                        samples_ex = self.inputs[i]['samples']
                        assert pr.both_examples
                        assert not pr.small_augment
                        labels = tf.random_uniform([shape(ims, 0)],
                                                   0,
                                                   2,
                                                   dtype=tf.int64,
                                                   name='labels_sample')
                        samples0 = tf.where(tf.equal(labels, 1),
                                            samples_ex[:, 1], samples_ex[:, 0])
                        samples1 = tf.where(tf.equal(labels, 0),
                                            samples_ex[:, 1], samples_ex[:, 0])
                        labels1 = 1 - labels

                    net0 = make_net(ims,
                                    samples0,
                                    pr,
                                    reuse=reuse,
                                    train=self.is_training)
                    net1 = make_net(None,
                                    samples1,
                                    pr,
                                    im_net=net0.im_net,
                                    reuse=True,
                                    train=self.is_training)
                    labels = tf.concat([labels, labels1], 0)
                    net = ut.Struct(
                        logits=tf.concat([net0.logits, net1.logits], 0),
                        cam=tf.concat([net0.cam, net1.cam], 0),
                        last_conv=tf.concat([net0.last_conv, net1.last_conv],
                                            0))

                    loss = mu.Loss('loss')
                    loss.add_loss(slim_losses_with_prefix(None), 'reg')
                    loss.add_loss_acc(sigmoid_loss(net.logits, labels),
                                      'label')
                    grads = opt.compute_gradients(loss.total_loss())

                    ut.add_dict_list(gpu_grads, loss.name, grads)
                    ut.add_dict_list(gpu_losses, loss.name, loss)
                    #self.loss = loss

                    if i == 0:
                        self.net = net

            self.loss = mu.merge_losses(gpu_losses['loss'])
            for name, val in zip(self.loss.get_loss_names(),
                                 self.loss.get_losses()):
                tf.summary.scalar(name, val)

            if not self.is_training:
                #pr_test = pr.copy()
                pr_test = self.pr_test.copy()
                pr_test.augment_ims = False
                print 'pr_test ='
                print pr_test

                self.test_ims, self.test_samples, self.test_ytids = mu.on_cpu(
                    lambda: shift_dset.make_db_reader(
                        pr_test.test_list,
                        pr_test,
                        pr.test_batch, ['im', 'samples', 'ytid'],
                        one_pass=True))

                if pr_test.do_shift:
                    self.test_labels = tf.random_uniform(
                        [shape(self.test_ims, 0)], 0, 2, dtype=tf.int64)
                    self.test_samples = tf.where(tf.equal(self.test_labels, 1),
                                                 self.test_samples[:, 1],
                                                 self.test_samples[:, 0])
                else:
                    self.test_labels = tf.ones(shape(self.test_ims, 0),
                                               dtype=tf.int64)
                    #self.test_samples = tf.where(tf.equal(self.test_labels, 1), self.test_samples[:, 1], self.test_samples[:, 0])
                    print 'sample shape:', shape(self.test_samples)

                self.test_net = make_net(self.test_ims,
                                         self.test_samples,
                                         pr_test,
                                         reuse=True,
                                         train=self.is_training)

            (gs, vs) = zip(*mu.average_grads(gpu_grads['loss']))
            if pr.grad_clip is not None:
                gs, _ = tf.clip_by_global_norm(gs, pr.grad_clip)
            gs = [
                mu.print_every(gs[0], 100,
                               ['grad norm:', tf.global_norm(gs)])
            ] + list(gs[1:])
            gvs = zip(gs, vs)

            bn_ups = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            if pr.multipass:
                ops = [
                    opt.apply_gradients(gvs, global_step=self.step)
                    for i in xrange(pr.multipass_count)
                ]

                def op_helper(count=[0]):
                    op = ops[count[0] % len(ops)]
                    count[0] += 1
                    return op

                self.train_op = op_helper
            else:
                op = tf.group(opt.apply_gradients(gvs, global_step=self.step),
                              *bn_ups)
                self.train_op = lambda: op

            self.coord = tf.train.Coordinator()
            self.saver = tf.train.Saver()

            self.init_op = tf.group(tf.global_variables_initializer(),
                                    tf.local_variables_initializer())
            self.sess.run(self.init_op)
            tf.train.start_queue_runners(sess=self.sess, coord=self.coord)

            self.merged_summary = tf.summary.merge_all()
            print 'Tensorboard command:'
            summary_dir = ut.mkdir(pj(pr.summary_dir, ut.simple_timestamp()))
            print 'tensorboard --logdir=%s' % summary_dir
            self.sum_writer = tf.summary.FileWriter(summary_dir,
                                                    self.sess.graph)