コード例 #1
0
def main(_):
    #  if FLAGS.num_gpus == 0:
    #    dev = '/cpu:0'
    #  elif FLAGS.num_gpus == 1:
    #    dev = '/gpu:0'
    #  else:
    #    raise ValueError('Only support 0 or 1 gpu.')

    if FLAGS.mode == 'train':
        batch_size = 25
    elif FLAGS.mode == 'eval':
        batch_size = 1


#  if FLAGS.dataset == 'cifar10':
#    num_classes = 10
#  elif FLAGS.dataset == 'cifar100':
#    num_classes = 100

    hps = resnet_model.HParams(batch_size=batch_size,
                               num_classes=3006,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.01,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.0002,
                               relu_leakiness=0.1,
                               optimizer='adam')

    with tf.device('gpu:0'):
        if FLAGS.mode == 'train':
            train(hps)
        elif FLAGS.mode == 'eval':
            runTest(hps)
コード例 #2
0
ファイル: resnet_main.py プロジェクト: ywu40/fdc_resnet_v3
def main(_):
	
	if FLAGS.mode == 'train':
		batch_size = FLAGS.train_batch_size
	elif FLAGS.mode == 'eval':
		batch_size = FLAGS.eval_batch_size
	else:
		raise ValueError('Only support two modes: train or eval')
	
	if FLAGS.dataset == 'cifar10':
		num_classes = 10
	elif FLAGS.dataset == 'cifar100':
		num_classes = 100
	elif FLAGS.dataset == 'fdc':
		num_classes = FLAGS.target_classes
	else:
		raise ValueError(
			'Only support three datasets: cifar10, cifar100 or fdc')
	
	hps = resnet_model.HParams(dataset_name=FLAGS.dataset,
														 batch_size=batch_size,
														 num_classes=num_classes,
														 min_lrn_rate=0.0001,
														 lrn_rate=0.1,
														 num_residual_units=5,
														 use_bottleneck=False,
														 weight_decay_rate=0.0002,
														 relu_leakiness=0.1,
														 optimizer='mom')
	
	if FLAGS.mode == 'train':
		train(hps)
	elif FLAGS.mode == 'eval':
		evaluate(hps)
コード例 #3
0
ファイル: resnet_main.py プロジェクト: newadays/ray
 def __init__(self, data, eval_batch_count):
     hps = resnet_model.HParams(batch_size=100,
                                num_classes=10,
                                min_lrn_rate=0.0001,
                                lrn_rate=0.1,
                                num_residual_units=5,
                                use_bottleneck=False,
                                weight_decay_rate=0.0002,
                                relu_leakiness=0.1,
                                optimizer='mom',
                                num_gpus=0)
     data = ray.get(data)
     total_images = np.concatenate([data[0], data[1], data[2]])
     with tf.Graph().as_default():
         with tf.device('/cpu:0'):
             images, labels = cifar_input.build_input(
                 [total_images, data[3]], hps.batch_size, False)
             self.model = resnet_model.ResNet(hps, images, labels, 'eval')
             self.model.build_graph()
             config = tf.ConfigProto(allow_soft_placement=True)
             sess = tf.Session(config=config)
             self.model.variables.set_session(sess)
             self.coord = tf.train.Coordinator()
             tf.train.start_queue_runners(sess, coord=self.coord)
             init = tf.global_variables_initializer()
             sess.run(init)
             self.best_precision = 0.0
             self.eval_batch_count = eval_batch_count
コード例 #4
0
ファイル: resnet_main.py プロジェクト: xslidi/ResNet
def main(_):
    resnet_model.maybe_download_and_extract()
    if FLAGS.num_gpus == 0:
        dev = '/CPU:0'
    elif FLAGS.num_gpus == 1:
        dev = '/GPU:0'
    else:
        raise ValueError('Only support 0 or 1 gpu')

    # if FLAGS.mode == 'train':
    #     batch_size = 128
    # elif FLAGS.mode == 'eval':
    #     batch_size = FLAGS.eval_batch_size

    if FLAGS.dataset == 'cifar10':
        num_class = 10
    elif FLAGS.dataset == 'cifar100':
        num_class = 100

    hps = resnet_model.HParams(num_class=num_class,
                               lrn_rate=0.1,
                               num_residual_units=6,
                               use_bottleneck=False,
                               weight_decay_rate=0.0005,
                               dropout_rate=0.3,
                               relu_leakiness=0.1,
                               optimizer='mom',
                               width=10,
                               data_dir=FLAGS.train_data_path)

    with tf.device(dev):
        if FLAGS.mode == 'train':
            train(hps)
        elif FLAGS.mode == 'eval':
            test(hps)
コード例 #5
0
ファイル: resnet_main.py プロジェクト: ray-project/sandbox
    def __init__(self, data, dataset, eval_batch_count, eval_dir):
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
        hps = resnet_model.HParams(
            batch_size=100,
            num_classes=100 if dataset == "cifar100" else 10,
            min_lrn_rate=0.0001,
            lrn_rate=0.1,
            num_residual_units=5,
            use_bottleneck=False,
            weight_decay_rate=0.0002,
            relu_leakiness=0.1,
            optimizer="mom",
            num_gpus=0)
        with tf.device("/cpu:0"):
            # Builds the testing network.
            images, labels = cifar_input.build_input(data,
                                                     hps.batch_size, dataset,
                                                     False)
            self.model = resnet_model.ResNet(hps, images, labels, "eval")
            self.model.build_graph()
            config = tf.ConfigProto(allow_soft_placement=True)
            config.gpu_options.allow_growth = True
            sess = tf.Session(config=config)
            self.model.variables.set_session(sess)
            init = tf.global_variables_initializer()
            sess.run(init)

            # Initializing parameters for tensorboard.
            self.best_precision = 0.0
            self.eval_batch_count = eval_batch_count
            self.summary_writer = tf.summary.FileWriter(eval_dir, sess.graph)
        # The IP address where tensorboard logs will be on.
        self.ip_addr = ray.services.get_node_ip_address()
コード例 #6
0
def main(_):
    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    if FLAGS.mode == 'train':
        batch_size = 128
    elif FLAGS.mode == 'eval':
        batch_size = 100

    if FLAGS.dataset == 'cifar10':
        num_classes = 10
    elif FLAGS.dataset == 'cifar100':
        num_classes = 100

    hps = resnet_model.HParams(num_classes=num_classes,
                               lrn_rate=0.1,
                               weight_decay_rate=0.0002,
                               optimizer='mom')

    with tf.device(dev):
        if FLAGS.mode == 'train':
            train(hps)
        elif FLAGS.mode == 'eval':
            evaluate(hps)
コード例 #7
0
ファイル: resnet_main.py プロジェクト: yulihui1993/models
def main(_):
    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    if FLAGS.mode == 'train':
        batch_size = 128
    elif FLAGS.mode == 'eval':
        batch_size = 100

    if FLAGS.dataset == 'cifar10':
        num_classes = 10
    elif FLAGS.dataset == 'cifar100':
        num_classes = 100

    hps = resnet_model.HParams(batch_size=batch_size,
                               num_classes=num_classes,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.0002,
                               relu_leakiness=0.1,
                               optimizer='mom')

    with tf.device(dev):
        if FLAGS.mode == 'train':
            train(hps)
        elif FLAGS.mode == 'eval':
            evaluate(hps)
コード例 #8
0
def main(_):
    #dvice判断
    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    #hparams设置
    hps = resnet_model.HParams(batch_size=FLAGS.batch_size,
                               num_classes=FLAGS.num_classes,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.0002,
                               relu_leakiness=0.1,
                               optimizer='mom')
    #训练模型
    with tf.device(dev):
        if FLAGS.mode == 'train':
            train(hps)
        elif FLAGS.mode == 'eval':
            evaluate(hps)
コード例 #9
0
def main(_):
    decay_steps = int(FLAGS.num_examples_train / FLAGS.batch_size *
                      FLAGS.lr_decay_epoches)
    hps = resnet_model.HParams(
        batch_size=FLAGS.batch_size,
        num_classes=FLAGS.num_classes,
        num_gpus=FLAGS.num_gpus,
        initial_learning_rate=FLAGS.initial_learning_rate,
        lr_decay_steps=decay_steps,
        lr_decay_factor=FLAGS.lr_decay_factor,
        optimizer=FLAGS.optimizer,
        num_layers=FLAGS.num_layers,
        prob_depth=0.5,
        use_bottleneck=True,
        weight_decay_rate=0.0001,
        relu_leakiness=0)

    if not tf.gfile.Exists(FLAGS.train_dir):
        tf.gfile.MakeDirs(FLAGS.train_dir)

    #with tf.device(dev):
    #  if FLAGS.mode == 'train':
    #    train(hps)
    #  elif FLAGS.mode == 'eval':
    #    evaluate(hps)
    train(hps)
コード例 #10
0
ファイル: resnet_main.py プロジェクト: newadays/ray
    def __init__(self, data, num_gpus):
        if num_gpus > 0:
            os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
                [str(i) for i in ray.get_gpu_ids()])
        hps = resnet_model.HParams(batch_size=128,
                                   num_classes=10,
                                   min_lrn_rate=0.0001,
                                   lrn_rate=0.1,
                                   num_residual_units=5,
                                   use_bottleneck=False,
                                   weight_decay_rate=0.0002,
                                   relu_leakiness=0.1,
                                   optimizer='mom',
                                   num_gpus=num_gpus)
        data = ray.get(data)
        total_images = np.concatenate([data[0], data[1], data[2]])
        with tf.Graph().as_default():
            if num_gpus > 0:
                tf.set_random_seed(ray.get_gpu_ids()[0] + 1)
            else:
                tf.set_random_seed(1)

            with tf.device('/gpu:0' if num_gpus > 0 else '/cpu:0'):
                images, labels = cifar_input.build_input(
                    [total_images, data[3]], hps.batch_size, True)
                self.model = resnet_model.ResNet(hps, images, labels, 'train')
                self.model.build_graph()
                config = tf.ConfigProto(allow_soft_placement=True)
                sess = tf.Session(config=config)
                self.model.variables.set_session(sess)
                self.coord = tf.train.Coordinator()
                tf.train.start_queue_runners(sess, coord=self.coord)
                init = tf.global_variables_initializer()
                sess.run(init)
コード例 #11
0
def main(_):
  if FLAGS.dataset == 'cifar10':
    num_classes = 10
  elif FLAGS.dataset == 'cifar100':
    num_classes = 100

  hps = resnet_model.HParams(num_classes=num_classes,
                             lrn_rate=0.1,
                             weight_decay_rate=0.002,
                             optimizer='mom')

  # add cluster information
  if FLAGS.job_name is None or FLAGS.job_name == "":
    raise ValueError("Must specify an explicit `job_name`")
  if FLAGS.task_index is None or FLAGS.task_index =="":
    raise ValueError("Must specify an explicit `task_index`")

  print("job name = %s" % FLAGS.job_name)
  print("task index = %d" % FLAGS.task_index)

  #Construct the cluster and start the server
  ps_spec = FLAGS.ps_hosts.split(",")
  worker_spec = FLAGS.worker_hosts.split(",")

  # Get the number of workers.
  num_workers = len(worker_spec)
  FLAGS.replicas_to_aggregate = num_workers

  cluster = tf.train.ClusterSpec({
      "ps": ps_spec,
      "worker": worker_spec})

  if not FLAGS.existing_servers:
    # Not using existing servers. Create an in-process server.
    server = tf.train.Server(
        cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)
    if FLAGS.job_name == "ps":
      server.join()

  if FLAGS.num_gpus > 0:
    # Avoid gpu allocation conflict: now allocate task_num -> #gpu
    # for each worker in the corresponding machine
    gpu = (FLAGS.task_index % FLAGS.num_gpus)
    worker_device = "/job:worker/task:%d/gpu:%d" % (FLAGS.task_index, gpu)
  elif FLAGS.num_gpus == 0:
    # Just allocate the CPU to worker server
    cpu = 0
    worker_device = "/job:worker/task:%d/cpu:%d" % (FLAGS.task_index, cpu)

  with tf.device(
      tf.train.replica_device_setter(
          worker_device=worker_device,
          # ps_device="/job:ps/cpu:0",
          cluster=cluster)):

    if FLAGS.mode == 'train':
      train(hps, server)
コード例 #12
0
def main(_):
    if FLAGS.model == '':
        raise Exception('--model must be specified.')

    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    if FLAGS.batch_size == -1:
        if FLAGS.mode == 'train':
            batch_size = 128
        elif FLAGS.mode == 'eval':
            # SimonChange: default batch_size from 100 to FLAGS.batch_size
            batch_size = FLAGS.batch_size
    else:
        batch_size = FLAGS.batch_size

    if FLAGS.dataset == 'cifar10':
        num_classes = 10
    elif FLAGS.dataset == 'cifar100':
        num_classes = 100

    if FLAGS.model == 'resnet20':
        num_residual_units = 3
    elif FLAGS.model == 'resnet56':
        num_residual_units = 9
    elif FLAGS.model == 'resnet164' and FLAGS.use_bottleneck:
        num_residual_units = 18
    elif FLAGS.model == 'resnet164' and not FLAGS.use_bottleneck:
        num_residual_units = 27
    else:
        raise Exception(
            "Invalid model -- only resnet20, resnet56 and resnet164 supported")

    data_format = FLAGS.data_format

    hps = resnet_model.HParams(batch_size=batch_size,
                               num_classes=num_classes,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=num_residual_units,
                               use_bottleneck=FLAGS.use_bottleneck,
                               weight_decay_rate=0.0005,
                               relu_leakiness=0.1,
                               optimizer='mom',
                               data_format=data_format)

    with tf.device(dev):
        if FLAGS.mode == 'train':
            train(hps)
        elif FLAGS.mode == 'eval':
            evaluate(hps)
コード例 #13
0
ファイル: main.py プロジェクト: liuyanqi/caffe
def main(_):
    hps = resnet_model.HParams(batch_size=FLAGS.epoch,
                               num_classes=num_classes,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.0002,
                               relu_leakiness=0.1,
                               optimizer='sgd')

    train(hps)
コード例 #14
0
def main(_):

    hps = resnet_model.HParams(batch_size=100,
                               num_classes=10,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.0002,
                               relu_leakiness=0.1)

    evaluate(hps)
コード例 #15
0
def getQfeature(filepath):

    image = read(filepath)
    labels = [3]
    hps = resnet_model.HParams(batch_size=1,
                               num_classes=4,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.0002,
                               relu_leakiness=0.1,
                               optimizer='mom')
    model = resnet_model.ResNet(hps, image, labels, FLAGS.mode)
    model.build_graph()

    logits = tf.get_default_graph().get_tensor_by_name("logit/xw_plus_b:0")
    print(logits)

    logits_norm = tf.nn.l2_normalize(logits, 1)

    # Run our model
    steps = 1  # *** Maybe exist some duplicate image features, next dict op will clear it.
    # Restore the moving average version of the learned variables for better effect.
    # for name in variables_to_restore:
    # 	print(name)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # Restore model from checkpoint.
        # Note!: checkpoint file not a single file, so don't use like this:
        # saver.restore(sess, '/path/to/model.ckpt-1000.index') xxx
        # Don't forget launch queue, use coordinator to avoid harmless 'Enqueue operation was cancelled ERROR'(of course you can also just start)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # ckpt correspond to 'checkpoint' file.
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        # model_checkpoint_path looks something like: /path/to/model.ckpt-1000
        print(ckpt.model_checkpoint_path)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

            # fc1_list=fc2_list=fc3_list=[] # the same object!
        logits_list = []
        _logits = sess.run([logits_norm])  # return nd-array
        print('................')
        print(_logits)
        print('................')
        put_2darray(_logits, logits_list)

        return logits_list
コード例 #16
0
def main(_):
    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    elif FLAGS.num_gpus > 1:
        devices = ['/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3']
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    if FLAGS.mode == 'train':
        batch_size = 128
    elif FLAGS.mode == 'eval':
        batch_size = 100

    if FLAGS.dataset == 'cifar10':
        num_classes = 10
    elif FLAGS.dataset == 'cifar100':
        num_classes = 100
    elif FLAGS.dataset == 'amazon':
        num_classes = 33
    elif FLAGS.dataset == 'naver':
        num_classes = 27

    hps = resnet_model.HParams(batch_size=batch_size,
                               num_classes=num_classes,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.0002,
                               relu_leakiness=0.1,
                               optimizer='mom')

    if FLAGS.num_gpus > 1:
        for i in range(FLAGS.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % ("gpu", i)) as scope:
                    if FLAGS.mode == 'train':
                        train(hps)
                    elif FLAGS.mode == 'eval':
                        evaluate(hps)
    else:
        with tf.device(dev):
            if FLAGS.mode == 'train':
                train(hps)
            elif FLAGS.mode == 'eval':
                evaluate(hps)
コード例 #17
0
ファイル: resnet_main.py プロジェクト: watsonyanghx/ResNet
def main(_):
    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    if FLAGS.mode == 'train':
        batch_size = 128
    elif FLAGS.mode == 'eval':
        batch_size = 100
    elif FLAGS.mode == 'infer':
        batch_size = 100

    # Change values bellow based on your own setting.
    hps = resnet_model.HParams(batch_size=batch_size,
                               image_size=32,
                               depth=3,
                               num_classes=10,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.01,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.004,
                               relu_leakiness=0.1,
                               optimizer='mom',
                               fine_tune=False)

    with tf.device(dev):
        if FLAGS.mode == 'train':
            X_train = helper.load_data(FLAGS.train_data_path)
            y_train = helper.load_data(FLAGS.train_labels_path)
            y_train = y_train - 1

            train(hps, X_train, y_train)
        elif FLAGS.mode == 'eval':
            X_val = helper.load_data(FLAGS.eval_data_path)
            y_val = helper.load_data(FLAGS.eval_labels_path)
            y_val = y_val - 1

            evaluate(hps, X_val, y_val)
        elif FLAGS.mode == 'infer':
            X_infer = helper.load_data(FLAGS.infer_data_path)
            y_infer = np.ones((X_infer.shape[0], ))

            infer(hps, X_infer, y_infer)
コード例 #18
0
    def __init__(self, data, dataset, num_gpus):
        if num_gpus > 0:
            os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
                [str(i) for i in ray.get_gpu_ids()])
        hps = resnet_model.HParams(
            batch_size=128,
            num_classes=100 if dataset == "cifar100" else 10,
            min_lrn_rate=0.0001,
            lrn_rate=0.1,
            num_residual_units=5,
            use_bottleneck=False,
            weight_decay_rate=0.0002,
            relu_leakiness=0.1,
            optimizer="mom",
            num_gpus=num_gpus)

        # We seed each actor differently so that each actor operates on a
        # different subset of data.
        if num_gpus > 0:
            tf.set_random_seed(ray.get_gpu_ids()[0] + 1)
        else:
            # Only a single actor in this case.
            tf.set_random_seed(1)

        input_images = data[0]
        input_labels = data[1]
        with tf.device("/gpu:0" if num_gpus > 0 else "/cpu:0"):
            # Build the model.
            images, labels = cifar_input.build_input([input_images,
                                                      input_labels],
                                                     hps.batch_size, dataset,
                                                     False)
            self.model = resnet_model.ResNet(hps, images, labels, "train")
            self.model.build_graph()
            config = tf.ConfigProto(allow_soft_placement=True)
            sess = tf.Session(config=config)
            self.model.variables.set_session(sess)
            self.coord = tf.train.Coordinator()
            tf.train.start_queue_runners(sess, coord=self.coord)
            init = tf.global_variables_initializer()
            sess.run(init)
            self.steps = 10
コード例 #19
0
def main(_):
    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    if FLAGS.mode == 'train':
        batch_size = 128
    elif FLAGS.mode == 'eval':
        batch_size = 100

    if FLAGS.dataset == 'cifar10':
        num_classes = 10
    elif FLAGS.dataset == 'cifar100':
        num_classes = 100

    # print('log_root', FLAGS.log_root)
    # print('train_dir', FLAGS.train_dir)

    hps = resnet_model.HParams(
        batch_size=batch_size,
        num_classes=num_classes,
        min_lrn_rate=0.0001,
        lrn_rate=0.1,
        # num_residual_units=5,
        num_residual_units=3,
        use_bottleneck=False,
        weight_decay_rate=0.0002,
        relu_leakiness=0.1,
        optimizer='mom')

    with tf.device(dev):
        if FLAGS.mode == 'train':
            train_start = time.time()
            train(hps)
            train_duration = time.time() - train_start
        elif FLAGS.mode == 'eval':
            evaluate(hps)
    print('train=%.4fh' % (train_duration / 3600))
def _my_model_fn(features, labels, mode, params):
    del params  # unused, but needed for TPU training

    #
    # Model - Here we use pre-built 'resnet_model'
    #
    model_params = resnet_model.HParams(
        batch_size=int(
            batch_size /
            FLAGS.num_replica),  # because batch is divided by TPU replicas
        num_classes=10,
        min_lrn_rate=0.0001,
        lrn_rate=0.1,
        num_residual_units=5,  # 5 x (3 x sub 2) + 2 = 32 layers
        use_bottleneck=False,
        weight_decay_rate=0.0002,
        relu_leakiness=0.1,
        optimizer='mom')
    train_model = resnet_model.ResNet(model_params, features, labels, 'train')
    train_model.build_graph(tpu_opt=True)

    # create evaluation metrices
    #truth = tf.argmax(train_model.labels, axis=1)
    #predictions = tf.argmax(train_model.predictions, axis=1)
    #precision = tf.reduce_mean(
    #    tf.to_float(tf.equal(predictions, truth)),
    #    name="precision")
    #accuracy = tf.metrics.accuracy(truth, predictions)
    #tf.summary.scalar('accuracy', accuracy[1]) # output to TensorBoard

    # define operations (Here we assume only training operation !)
    #prediction_outputs = {
    #    "precision": precision
    #}
    return tpu_estimator.TPUEstimatorSpec(
        mode=mode,
        loss=train_model.cost,
        train_op=train_model.train_op,
        #predictions=prediction_outputs,
        eval_metrics=(metric_fn, [train_model.labels,
                                  train_model.predictions]))
コード例 #21
0
ファイル: resnet_main.py プロジェクト: zchen0211/tf-tutorial
def main(_):
    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    # if FLAGS.mode == 'train':
    #   batch_size = 128
    # elif FLAGS.mode == 'eval':
    #    batch_size = 100
    batch_size = FLAGS.batch_size

    # if FLAGS.dataset == 'cifar10':
    num_classes = 10
    # elif FLAGS.dataset == 'cifar100':
    #  num_classes = 100

    hps = resnet_model.HParams(batch_size=batch_size,
                               num_classes=num_classes,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=0.0002,
                               relu_leakiness=0.1,
                               optimizer='mom')
    '''sess = tf.Session()
  tf.train.start_queue_runners(sess=sess)
  custom_runner.start_threads(sess)
  images_batch, labels_batch = sess.run([images, labels])
  print(images_batch.shape)
  print(labels_batch.shape)'''

    with tf.device(dev):
        if FLAGS.mode == 'train':
            train(hps)
        elif FLAGS.mode == 'eval':
            evaluate(hps)
コード例 #22
0
ファイル: resnet_main.py プロジェクト: zbxzc35/sortpool2d
def main(_):
    if FLAGS.num_gpus == 0:
        dev = '/cpu:0'
    elif FLAGS.num_gpus == 1:
        dev = '/gpu:0'
    else:
        raise ValueError('Only support 0 or 1 gpu.')

    batch_size = 128

    if FLAGS.dataset == 'cifar10':
        num_classes = 10
    elif FLAGS.dataset == 'cifar100':
        num_classes = 100

    weight_decay_rate = FLAGS.weight_decay
    pool_type = FLAGS.pool_type
    hps = resnet_model.HParams(batch_size=batch_size,
                               num_classes=num_classes,
                               min_lrn_rate=0.0001,
                               lrn_rate=0.1,
                               num_residual_units=5,
                               use_bottleneck=False,
                               weight_decay_rate=weight_decay_rate,
                               relu_leakiness=0.1,
                               optimizer='mom',
                               pool_type=pool_type)
    if not os.path.exists(FLAGS.result_path):
        os.makedirs(FLAGS.result_path)
    config_str = json.dumps(hps._asdict())
    config_file = os.path.join(FLAGS.result_path, 'config')
    config_file_object = open(config_file, 'w')
    config_file_object.write(config_str)
    config_file_object.close()

    with tf.device(dev):
        train(hps)
コード例 #23
0
def train_resnet_baseline(max_step_run):
    """Trains the resnet baseline model.

  Args:
    max_step_run: The maximum number of gradient steps.
  """
    if not os.path.exists(FLAGS.train_log_dir):
        os.makedirs(FLAGS.train_log_dir)
    g = tf.Graph()

    with g.as_default():
        with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)):
            tf_global_step = tf.train.get_or_create_global_step()

            # pylint: disable=line-too-long
            images, one_hot_labels, num_samples_per_epoch, num_of_classes = cifar_data_provider.provide_resnet_data(
                FLAGS.dataset_name,
                'train',
                FLAGS.batch_size,
                dataset_dir=FLAGS.data_dir)

            hps = resnet_model.HParams(batch_size=FLAGS.batch_size,
                                       num_classes=num_of_classes,
                                       min_lrn_rate=0.0001,
                                       lrn_rate=FLAGS.learning_rate,
                                       num_residual_units=9,
                                       use_bottleneck=False,
                                       weight_decay_rate=0.0002,
                                       relu_leakiness=0.1,
                                       optimizer='mom')

            images.set_shape([FLAGS.batch_size, 32, 32, 3])
            tf.logging.info('num_of_example={}'.format(num_samples_per_epoch))

            # Define the model:
            resnet = resnet_model.ResNet(hps,
                                         images,
                                         one_hot_labels,
                                         mode='train')
            logits = resnet.build_model()

            # Specify the loss function:
            total_loss = tf.nn.softmax_cross_entropy_with_logits(
                labels=one_hot_labels, logits=logits)
            total_loss = tf.reduce_mean(total_loss, name='xent')
            total_loss += resnet.decay()  # decay
            tf.add_to_collection('total_loss', total_loss)

            decay_steps = int(num_samples_per_epoch / FLAGS.batch_size *
                              FLAGS.num_epochs_per_decay)

            boundaries = [19531, 25000, 30000]
            values = [FLAGS.learning_rate * t for t in [1, 0.1, 0.01, 0.001]]
            lr = tf.train.piecewise_constant(tf_global_step, boundaries,
                                             values)
            slim.summaries.add_scalar_summary(lr,
                                              'learning_rate',
                                              print_summary=True)

            lr = tf.train.exponential_decay(FLAGS.learning_rate,
                                            tf_global_step,
                                            decay_steps,
                                            FLAGS.learning_rate_decay_factor,
                                            staircase=True)

            slim.summaries.add_scalar_summary(total_loss,
                                              'total_loss',
                                              print_summary=True)

            # Set up training.
            trainable_variables = tf.trainable_variables()

            grads = tf.gradients(total_loss, trainable_variables)

            optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9)

            apply_op = optimizer.apply_gradients(zip(grads,
                                                     trainable_variables),
                                                 global_step=tf_global_step,
                                                 name='train_step')

            train_ops = [apply_op] + resnet.extra_train_ops
            train_op = tf.group(*train_ops)

            saver = tf.train.Saver(max_to_keep=10,
                                   keep_checkpoint_every_n_hours=24)

            # Run training.
            slim.learning.train(train_op=train_op,
                                train_step_fn=resnet_train_step,
                                logdir=FLAGS.train_log_dir,
                                master=FLAGS.master,
                                saver=saver,
                                is_chief=FLAGS.task == 0,
                                number_of_steps=max_step_run,
                                save_summaries_secs=FLAGS.save_summaries_secs,
                                save_interval_secs=FLAGS.save_interval_secs)
コード例 #24
0
def train_resnet_mentormix(max_step_run):
    """Trains the mentornet with the student resnet model.

  Args:
    max_step_run: The maximum number of gradient steps.
  """
    if not os.path.exists(FLAGS.train_log_dir):
        os.makedirs(FLAGS.train_log_dir)
    g = tf.Graph()

    with g.as_default():
        with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)):
            tf_global_step = tf.train.get_or_create_global_step()

            (images, one_hot_labels, num_samples_per_epoch,
             num_of_classes) = cifar_data_provider.provide_resnet_data(
                 FLAGS.dataset_name,
                 'train',
                 FLAGS.batch_size,
                 dataset_dir=FLAGS.data_dir)

            hps = resnet_model.HParams(batch_size=FLAGS.batch_size,
                                       num_classes=num_of_classes,
                                       min_lrn_rate=0.0001,
                                       lrn_rate=FLAGS.learning_rate,
                                       num_residual_units=5,
                                       use_bottleneck=False,
                                       weight_decay_rate=0.0002,
                                       relu_leakiness=0.1,
                                       optimizer='mom')

            images.set_shape([FLAGS.batch_size, 32, 32, 3])

            # Define the model:
            resnet = resnet_model.ResNet(hps,
                                         images,
                                         one_hot_labels,
                                         mode='train')
            with tf.variable_scope('ResNet32'):
                logits = resnet.build_model()

            # Specify the loss function:
            loss = tf.nn.softmax_cross_entropy_with_logits(
                labels=one_hot_labels, logits=logits)

            dropout_rates = utils.parse_dropout_rate_list(
                FLAGS.example_dropout_rates)
            example_dropout_rates = tf.convert_to_tensor(
                dropout_rates, np.float32, name='example_dropout_rates')

            loss_p_percentile = tf.convert_to_tensor(np.array(
                [FLAGS.loss_p_percentile] * 100),
                                                     np.float32,
                                                     name='loss_p_percentile')

            loss = tf.reshape(loss, [-1, 1])

            epoch_step = tf.to_int32(
                tf.floor(tf.divide(tf_global_step, max_step_run) * 100))

            zero_labels = tf.zeros([tf.shape(loss)[0], 1], tf.float32)

            mentornet_net_hparams = utils.get_mentornet_network_hyperparameter(
                FLAGS.trained_mentornet_dir)

            # In the simplest case, this function can be replaced with a thresholding
            # function. See loss_thresholding_function in utils.py.
            v = utils.mentornet(epoch_step,
                                loss,
                                zero_labels,
                                loss_p_percentile,
                                example_dropout_rates,
                                burn_in_epoch=FLAGS.burn_in_epoch,
                                mentornet_net_hparams=mentornet_net_hparams,
                                avg_name='individual')

            v = tf.stop_gradient(v)
            loss = tf.stop_gradient(tf.identity(loss))
            logits = tf.stop_gradient(tf.identity(logits))

            # Perform MentorMix
            images_mix, labels_mix = utils.mentor_mix_up(
                images, one_hot_labels, v, FLAGS.mixup_alpha)
            resnet = resnet_model.ResNet(hps,
                                         images_mix,
                                         labels_mix,
                                         mode='train')
            with tf.variable_scope('ResNet32', reuse=True):
                logits_mix = resnet.build_model()

            loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_mix,
                                                           logits=logits_mix)
            decay_loss = resnet.decay()

            # second weighting
            if FLAGS.second_reweight:
                loss = tf.reshape(loss, [-1, 1])
                v = utils.mentornet(
                    epoch_step,
                    loss,
                    zero_labels,
                    loss_p_percentile,
                    example_dropout_rates,
                    burn_in_epoch=FLAGS.burn_in_epoch,
                    mentornet_net_hparams=mentornet_net_hparams,
                    avg_name='mixed')
                v = tf.stop_gradient(v)
                weighted_loss_vector = tf.multiply(loss, v)
                loss = tf.reduce_mean(weighted_loss_vector)
                # reproduced with the following decay loss which should be 0.
                decay_loss = tf.losses.get_regularization_loss()
                decay_loss = decay_loss * (tf.reduce_sum(v) / FLAGS.batch_size)

            # Log data utilization
            data_util = utils.summarize_data_utilization(
                v, tf_global_step, FLAGS.batch_size)

            loss = tf.reduce_mean(loss)
            slim.summaries.add_scalar_summary(tf.reduce_mean(loss),
                                              'mentormix/mix_loss')

            weighted_total_loss = loss + decay_loss

            slim.summaries.add_scalar_summary(weighted_total_loss,
                                              'total_loss')
            tf.add_to_collection('total_loss', weighted_total_loss)

            # Set up the moving averages:
            moving_average_variables = tf.trainable_variables()
            moving_average_variables = tf.contrib.framework.filter_variables(
                moving_average_variables, exclude_patterns=['mentornet'])

            variable_averages = tf.train.ExponentialMovingAverage(
                0.9999, tf_global_step)
            tf.add_to_collection(
                tf.GraphKeys.UPDATE_OPS,
                variable_averages.apply(moving_average_variables))

            decay_steps = FLAGS.num_epochs_per_decay * num_samples_per_epoch / FLAGS.batch_size
            lr = tf.train.exponential_decay(FLAGS.learning_rate,
                                            tf_global_step,
                                            decay_steps,
                                            FLAGS.learning_rate_decay_factor,
                                            staircase=True)
            lr = tf.squeeze(lr)
            slim.summaries.add_scalar_summary(lr, 'learning_rate')

            # Specify the optimization scheme:
            with tf.control_dependencies([weighted_total_loss, data_util]):
                # Set up training.
                trainable_variables = tf.trainable_variables()
                trainable_variables = tf.contrib.framework.filter_variables(
                    trainable_variables, exclude_patterns=['mentornet'])

                grads = tf.gradients(weighted_total_loss, trainable_variables)
                optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9)

                apply_op = optimizer.apply_gradients(
                    zip(grads, trainable_variables),
                    global_step=tf_global_step,
                    name='train_step')

                train_ops = [apply_op
                             ] + resnet.extra_train_ops + tf.get_collection(
                                 tf.GraphKeys.UPDATE_OPS)
                train_op = tf.group(*train_ops)

            # Parameter restore setup
            if FLAGS.trained_mentornet_dir is not None:
                ckpt_model = FLAGS.trained_mentornet_dir
                if os.path.isdir(FLAGS.trained_mentornet_dir):
                    ckpt_model = tf.train.latest_checkpoint(ckpt_model)

                # Fix the mentornet parameters
                variables_to_restore = slim.get_variables_to_restore(
                    include=['mentornet', 'mentornet_inputs'])
                iassign_op1, ifeed_dict1 = tf.contrib.framework.assign_from_checkpoint(
                    ckpt_model, variables_to_restore)

                # Create an initial assignment function.
                def init_assign_fn(sess):
                    tf.logging.info('Restore using customer initializer %s',
                                    '.' * 10)
                    sess.run(iassign_op1, ifeed_dict1)
            else:
                init_assign_fn = None

            tf.logging.info('-' * 20 + 'MentorMix' + '-' * 20)
            tf.logging.info('loss_p_percentile=%3f', FLAGS.loss_p_percentile)
            tf.logging.info('mixup_alpha=%d', FLAGS.mixup_alpha)
            tf.logging.info('-' * 20)

            saver = tf.train.Saver(max_to_keep=10,
                                   keep_checkpoint_every_n_hours=24)

            # Run training.
            slim.learning.train(train_op=train_op,
                                train_step_fn=resnet_train_step,
                                logdir=FLAGS.train_log_dir,
                                master=FLAGS.master,
                                is_chief=FLAGS.task == 0,
                                saver=saver,
                                number_of_steps=max_step_run,
                                init_fn=init_assign_fn,
                                save_summaries_secs=FLAGS.save_summaries_secs,
                                save_interval_secs=FLAGS.save_interval_secs)
コード例 #25
0
        for batch in tl.iterate.minibatches(inputs=self.x_test,
                                            targets=self.y_test,
                                            batch_size=50,
                                            shuffle=False):
            feed_dict_eval = {img: batch[0]}
            preds = sess.run(model.predict, feed_dict=feed_dict_eval)
            for pred in preds:
                test_predicted.append(pred)

        csv_content = [["ID", "Label"]]
        for ind, data in enumerate(test_predicted):
            csv_content.append([ind + 1, data + 1])
        with open("cifar_prediction.csv", "w") as f:
            writer = csv.writer(f)
            writer.writerows(csv_content)


run = CNNEnv()

hps = resnet_model.HParams(batch_size=run.batch_num,
                           num_classes=run.nb_classes,
                           min_lrn_rate=0.0001,
                           lrn_rate=args.lr,
                           num_residual_units=args.n_resid_units,
                           use_bottleneck=False,
                           weight_decay_rate=0.0002,
                           relu_leakiness=0.1,
                           optimizer='mom')

run.train(hps)
コード例 #26
0
def _my_model_fn(features, labels, mode):
    """ device is automatically detected and assigned """
    #device = '/job:localhost/replica:0/task:0/device:GPU:0'
    #with tf.device(device):

    #
    # Model - Here we use pre-built 'resnet_model'
    #
    params = resnet_model.HParams(
        batch_size=batch_size,
        num_classes=10,
        min_lrn_rate=0.0001,
        lrn_rate=0.1,
        num_residual_units=5, # 5 x (3 x sub 2) + 2 = 32 layers
        use_bottleneck=False,
        weight_decay_rate=0.0002,
        relu_leakiness=0.1,
        optimizer='mom')
    train_model = resnet_model.ResNet(
        params,
        features,
        labels,
        'train')
    train_model.build_graph()

    # create evaluation metrices
    """ Please umcomment """
    """ when you output precision and accuracy to TensorBoard or use INFER """
    #truth = tf.argmax(train_model.labels, axis=1)
    #predictions = tf.argmax(train_model.predictions, axis=1)
    #precision = tf.reduce_mean(tf.to_float(tf.equal(predictions, truth)))
    #accuracy = tf.metrics.accuracy(truth, predictions)
    #tf.summary.scalar('precision', precision) # output to TensorBoard
    #tf.summary.scalar('accuracy', accuracy[1]) # output to TensorBoard

    # define operations
    if mode == tf.estimator.ModeKeys.TRAIN:
        """ We don't use tf.train.LoggingTensorHook because it doesn't work when distributed tensorflow. """
        #logging_hook = tf.train.LoggingTensorHook(
        #    tensors={
        #        'step': train_model.global_step,
        #        'loss': train_model.cost,
        #        'lrn_rate': train_model.lrn_rate,
        #        'precision': precision
        #    },
        #    every_n_iter=10) # log output every 10 steps
        class _CustomLogHook(tf.train.SessionRunHook):
            def before_run(self, run_context):
                return tf.train.SessionRunArgs(
                    fetches = [train_model.global_step, train_model.cost])
            def after_run(self, run_context, run_values):
                if run_values.results[0] % 10 == 0: # log output every 10 steps
                    print('step:%d  loss:%.2f' % (run_values.results[0], run_values.results[1]))
        return tf.estimator.EstimatorSpec(
            mode,
            loss=train_model.cost,
            train_op=train_model.train_op,
            #training_chief_hooks=[logging_hook])
            training_chief_hooks=[_CustomLogHook()])
    if mode == tf.estimator.ModeKeys.EVAL:
        eval_metric_ops = {
#            'accuracy': accuracy
        }
        return tf.estimator.EstimatorSpec(
            mode,
            loss=train_model.cost,
            eval_metric_ops=eval_metric_ops)
    """ Please umcomment when you use INFER """
コード例 #27
0
    def fit(self, X_train, Y_train):
        x = tf.placeholder(tf.float32,
                           [None, self.image_size, self.image_size, 1],
                           name='input')
        y = tf.placeholder(tf.float32, [None, self.num_classes])

        hps = resnet_model.HParams(batch_size=self.batch_size,
                                   num_classes=self.num_classes,
                                   num_residual_units=self.num_residual_units,
                                   use_bottleneck=self.is_bottlneck,
                                   relu_leakiness=self.relu_leakiness,
                                   weight_decay_rate=self.weight_decay)
        model = resnet_model.ResNet(hps, x, y)
        predict = model.out
        output = tf.nn.softmax(predict, name='output')

        with tf.name_scope("cross_ent"):
            cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=output,
                                                        labels=y))
            cost += model._decay()

        var_list = [v for v in tf.trainable_variables()]

        with tf.name_scope("train"):
            gradients = tf.gradients(cost, var_list)
            gradients = list(zip(gradients, var_list))
            optimizer = tf.train.MomentumOptimizer(self.learning_rate, 0.9)
            train_op = optimizer.apply_gradients(grads_and_vars=gradients)

        with tf.name_scope("accuracy"):
            prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
            accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))

        saver = tf.train.Saver()
        #Initialize the data generator seperately for the training set,didn't initialize validation set
        train_generator = ImageDataGenerator(X_train,
                                             Y_train,
                                             shuffle=True,
                                             scale_size=(self.image_size,
                                                         self.image_size),
                                             nb_classes=self.num_classes)
        # Get the number of training steps per epoch
        train_batches_per_epoch = np.floor(self.data_size /
                                           self.batch_size).astype(np.int16)

        # Start Tensorflow session
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())
            #writer.add_graph(sess.graph)

            if not self.restore_checkpoint == '':
                saver.restore(sess, self.restore_checkpoint)

            print("{} Start training...".format(datetime.now()))
            #print("{} Open Tensorboard :tensorboard --logdir {} --host localhost --port 6006".format(datetime.now(),self.filewriter_path))
            for epoch in range(self.num_epochs):
                step = 1
                while step < train_batches_per_epoch:
                    # Get a batch of images and labels
                    batch_xs, batch_ys = train_generator.next_batch(
                        self.batch_size)
                    # And run the training op
                    feed_dict = {x: batch_xs, y: batch_ys}
                    sess.run(train_op, feed_dict=feed_dict)

                    # Generate summary with the current batch of data and write to file
                    if step % self.display_step == 0:
                        # loss, acc, s = sess.run([cost, accuracy, merged_summary], feed_dict=feed_dict)
                        loss, acc = sess.run([cost, accuracy],
                                             feed_dict=feed_dict)
                        #writer.add_summary(s, epoch * train_batches_per_epoch + step)
                        print(
                            "Iter {}/{}, training mini-batch loss = {:.5f}, training accuracy = {:.5f}"
                            .format(step * self.batch_size,
                                    train_batches_per_epoch * self.batch_size,
                                    loss, acc))
                    step += 1
                train_generator.reset_pointer()
        '''
コード例 #28
0
# mode = 'train' or 'eval'
dataset = 'cifar10'
mode = 'train'

if mode == 'train':
    num_iterations = 56000
    batch_size = 128
else:
    num_iterations = 100
    batch_size = 100

if dataset == 'cifar10':
    num_classes = 10
else:
    num_classes = 100

# set hyperparameters of resnet
hps = resnet_model.HParams(batch_size=batch_size,
                           num_classes=num_classes,
                           init_lr=0.1,
                           num_residual_units=5,
                           use_bottleneck=False,
                           weight_decay_rate=0.0001,
                           relu_leakiness=0.0,
                           optimizer='momentum')

if mode == 'train':
    train(hps, num_iterations, dataset)
else:
    evaluate(hps, num_iterations, dataset)
コード例 #29
0
def train_resnet_mentornet(max_step_run):
  """Trains the mentornet with the student resnet model.

  Args:
    max_step_run: The maximum number of gradient steps.
  """
  if not os.path.exists(FLAGS.train_log_dir):
    os.makedirs(FLAGS.train_log_dir)
  g = tf.Graph()

  with g.as_default():
    with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)):
      tf_global_step = tf.train.get_or_create_global_step()

      # pylint: disable=line-too-long
      images, one_hot_labels, clean_images, clean_one_hot_labels, num_samples_per_epoch, num_of_classes = cifar_data_provider.my_provide_resnet_data(
          FLAGS.dataset_name,
          'train',
          FLAGS.batch_size,
          dataset_dir=FLAGS.data_dir)

      hps = resnet_model.HParams(
          batch_size=FLAGS.batch_size,
          num_classes=num_of_classes,
          min_lrn_rate=0.0001,
          lrn_rate=FLAGS.learning_rate,
          num_residual_units=9,
          use_bottleneck=False,
          weight_decay_rate=0.0002,
          relu_leakiness=0.1,
          optimizer='mom')

      images.set_shape([FLAGS.batch_size, 32, 32, 3])
      tf.logging.info('num_of_example=%s', num_samples_per_epoch)

      # Define the model:
      resnet = resnet_model.ResNet(hps, images, one_hot_labels, mode='train')
      logits = resnet.build_model()

      # Specify the loss function:
      loss = tf.nn.softmax_cross_entropy_with_logits(
          labels=one_hot_labels, logits=logits)

      dropout_rates = utils.parse_dropout_rate_list(FLAGS.example_dropout_rates)
      example_dropout_rates = tf.convert_to_tensor(
          dropout_rates, np.float32, name='example_dropout_rates')

      loss_p_percentile = tf.convert_to_tensor(
          np.array([FLAGS.loss_p_percentile] * 100),
          np.float32,
          name='loss_p_percentile')

      loss = tf.reshape(loss, [-1, 1])

      epoch_step = tf.to_int32(
          tf.floor(tf.divide(tf_global_step, max_step_run) * 100))

      zero_labels = tf.zeros([tf.shape(loss)[0], 1], tf.float32)

      v = utils.mentornet(
          epoch_step,
          loss,
          zero_labels,
          loss_p_percentile,
          example_dropout_rates,
          burn_in_epoch=FLAGS.burn_in_epoch,
          fixed_epoch_after_burn_in=FLAGS.fixed_epoch_after_burn_in,
          loss_moving_average_decay=FLAGS.loss_moving_average_decay)

      tf.stop_gradient(v)

      # Split v into clean data & noise data part
      is_clean = tf.reshape(tf.reduce_all(tf.equal(one_hot_labels, clean_one_hot_labels), axis=1), [-1,1])
      clean_v = tf.boolean_mask(v, is_clean)
      noise_v = tf.boolean_mask(v, ~is_clean)
      tf.add_to_collection('v', v)
      tf.add_to_collection('v', clean_v)
      tf.add_to_collection('v', noise_v)

      slim.summaries.add_histogram_summary(tf.boolean_mask(v, is_clean), 'clean_v')
      slim.summaries.add_histogram_summary(tf.boolean_mask(v, ~is_clean), 'noisy_v')

      # Log data utilization
      data_util = utils.summarize_data_utilization(v, tf_global_step,
                                                   FLAGS.batch_size)
      decay_loss = resnet.decay()
      weighted_loss_vector = tf.multiply(loss, v)

      weighted_loss = tf.reduce_mean(weighted_loss_vector)

      slim.summaries.add_scalar_summary(
          tf.reduce_mean(loss), 'mentornet/orig_loss')
      slim.summaries.add_scalar_summary(weighted_loss,
                                        'mentornet/weighted_loss')

      # Normalize the decay loss based on v
      weighed_decay_loss = decay_loss * (tf.reduce_sum(v) / FLAGS.batch_size)

      weighted_total_loss = weighted_loss + weighed_decay_loss

      slim.summaries.add_scalar_summary(weighted_total_loss,
                                        'mentornet/total_loss')

      slim.summaries.add_scalar_summary(weighted_total_loss, 'total_loss')
      tf.add_to_collection('total_loss', weighted_total_loss)

      boundaries = [19531, 25000, 30000]
      values = [FLAGS.learning_rate * t for t in [1, 0.1, 0.01, 0.001]]
      lr = tf.train.piecewise_constant(tf_global_step, boundaries, values)
      slim.summaries.add_scalar_summary(lr, 'learning_rate')

      # Specify the optimization scheme:
      with tf.control_dependencies([weighted_total_loss, data_util]):
        # Set up training.
        trainable_variables = tf.trainable_variables()
        trainable_variables = tf.contrib.framework.filter_variables(
            trainable_variables, exclude_patterns=['mentornet'])

        grads = tf.gradients(weighted_total_loss, trainable_variables)
        optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9)

        apply_op = optimizer.apply_gradients(
            zip(grads, trainable_variables),
            global_step=tf_global_step,
            name='train_step')

        train_ops = [apply_op] + resnet.extra_train_ops
        train_op = tf.group(*train_ops)

      # Parameter restore setup
      if FLAGS.trained_mentornet_dir is not None:
        ckpt_model = FLAGS.trained_mentornet_dir
        if os.path.isdir(FLAGS.trained_mentornet_dir):
          ckpt_model = tf.train.latest_checkpoint(ckpt_model)

        # Fix the mentornet parameters
        variables_to_restore = slim.get_variables_to_restore(
            # TODO(lujiang): mentornet_inputs or mentor_inputs?
            include=['mentornet', 'mentornet_inputs'])
        iassign_op1, ifeed_dict1 = tf.contrib.framework.assign_from_checkpoint(
            ckpt_model, variables_to_restore)

        # Create an initial assignment function.
        def init_assign_fn(sess):
          tf.logging.info('Restore using customer initializer %s', '.' * 10)
          sess.run(iassign_op1, ifeed_dict1)
      else:
        init_assign_fn = None

      tf.logging.info('-' * 20 + 'MentorNet' + '-' * 20)
      tf.logging.info('loaded pretrained mentornet from %s', ckpt_model)
      tf.logging.info('loss_p_percentile=%3f', FLAGS.loss_p_percentile)
      tf.logging.info('burn_in_epoch=%d', FLAGS.burn_in_epoch)
      tf.logging.info('fixed_epoch_after_burn_in=%s',
                      FLAGS.fixed_epoch_after_burn_in)
      tf.logging.info('loss_moving_average_decay=%3f',
                      FLAGS.loss_moving_average_decay)
      tf.logging.info('example_dropout_rates %s', ','.join(
          str(t) for t in dropout_rates))
      tf.logging.info('-' * 20)

      saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=24)

      # Run training.
      slim.learning.train(
          train_op=train_op,
          train_step_fn=resnet_train_step,
          logdir=FLAGS.train_log_dir,
          master=FLAGS.master,
          is_chief=FLAGS.task == 0,
          saver=saver,
          number_of_steps=max_step_run,
          init_fn=init_assign_fn,
          save_summaries_secs=FLAGS.save_summaries_secs,
          save_interval_secs=FLAGS.save_interval_secs)
コード例 #30
0
MODE = 'train'
LOG_ROOT='../results/resnet_model'
DATASET='cifar100'
DEV = '/gpu:0'
tf.reset_default_graph()


# construct train model and session
batch_size_train = 128
batch_size_test = 100
hps_train = resnet_model.HParams(batch_size=batch_size_train,
                                num_classes=NUM_CLASSES,
                                min_lrn_rate=0.0001,
                                lrn_rate=0.1,
                                mom=0.9,
                                clip_norm_base=10.0,
                                num_residual_units=5,
                                use_bottleneck=True,
                                weight_decay_rate=0.0002,
                                relu_leakiness=0.1,
                                optimizer='YF', model_scope='train')
# specify how much memory to use on each GPU
gpu_mem_portion=0.5
n_core = 16
with tf.variable_scope("train"), tf.device(DEV):
  model_train = get_model(hps_train, DATASET, TRAIN_DATA_PATH, mode='train')
init_op = tf.global_variables_initializer()
sess = GetTrainingSession(model_train, gpu_mem_portion=gpu_mem_portion)


# run steps