def test_inception_v1(img_dir):
    """
    Test Inception-V1 with a single image.
    :param img_dir: Path of the image to be classified
    :return: classification result and probability of a single image
    """
    img = cv2.imread(img_dir)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224)) / 255
    img = img.reshape((1, 224, 224, 3))

    tf.reset_default_graph()
    inputs = tf.placeholder(name='input_images',
                            shape=[None, 224, 224, 3],
                            dtype=tf.float32)
    with slim.arg_scope(inception_v1_arg_scope()):
        _, _ = inception_v1(inputs, 1001, is_training=False)

    with tf.Session() as sess:
        tf.train.Saver().restore(sess, './models/inception_v1.ckpt')
        inputs = sess.graph.get_tensor_by_name('input_images:0')
        outputs = sess.graph.get_tensor_by_name(
            'InceptionV1/Logits/SpatialSqueeze:0')
        pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0]
        prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0]

        pred, prob = sess.run([pred, prob], feed_dict={inputs: img})
        name = label_dict[pred]

    print('Result of Inception-V1:', name, prob)
    return name, prob
예제 #2
0
def make_inceptionv1bn_multi_embeddings(batch_imgs,
                                        embedding_dims,
                                        n_heads,
                                        phase_is_train,
                                        uniform_bias=False,
                                        weight_decay=0.00004,
                                        pooling='avg'):
    # Slim output layer names
    # 'Mixed_3b', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'
    emb_info = [
        'Mixed_3b', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d',
        'Mixed_4e', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'
    ]
    if (n_heads == 1):
        emb_info = ['Mixed_5c']

    left_embedding = embedding_dims
    with slim.arg_scope(
            inception_v1.inception_v1_arg_scope(weight_decay=weight_decay)):
        net, endpoints = inception_v1.inception_v1(
            batch_imgs,
            num_classes=0,
            dropout_keep_prob=
            1.0,  # output before dropout layer is returned if num_classes is 0
            is_training=phase_is_train)
        for i in range(n_heads):
            emb_dim = int(math.ceil(
                left_embedding /
                float(n_heads -
                      i)))  # put the residual in the preceding embeddings
            left_embedding -= emb_dim
            with tf.variable_scope('loss%d' % i) as scope:
                emb1 = tf.reduce_mean(endpoints[emb_info[i]], [1, 2])
                final_emb = emb1
                if (pooling == 'avgnmax'):
                    emb2 = tf.reduce_max(endpoints[emb_info[i]], [1, 2])
                    final_emb = tf.concat([emb1, emb2], 1)
                endpoints['emb_%d' % i] = slim.fully_connected(
                    final_emb, emb_dim, activation_fn=None)
                endpoints['embedding%d' % i] = tf.nn.l2_normalize(
                    endpoints['emb_%d' % i], dim=1)

        with tf.variable_scope('fc_embedding') as scope:
            embs = [endpoints['embedding%d' % i] for i in range(n_heads)]
            endpoints['fc_embedding'] = tf.concat(embs, 1) / np.sqrt(n_heads)

#    print('Endpoints')
#    for k,v in endpoints.items():
#        print((k,v))
    return endpoints, None
예제 #3
0
def run_benchmark():
    """Run the benchmark on Inception_V1."""
    with tf.Graph().as_default():
        # Generate some dummy images.
        image_size = 224
        # Note that our padding definition is slightly different the cuda-convnet.
        # In order to force the model to start with the same activations sizes,
        # we add 3 to the image_size and employ VALID padding above.
        images = tf.Variable(tf.random_normal(
            [FLAGS.batch_size, 3, image_size, image_size],
            dtype=tf.float32,
            stddev=1e-1),
                             trainable=False)

        labels = tf.Variable(tf.ones([FLAGS.batch_size], dtype=tf.int32),
                             trainable=False)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits, end_points = inception_v1.inception_v1(images)
        # Build an initialization operation.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph.
        config = tf.ConfigProto()
        # config.gpu_options.allocator_type = 'BFC'
        sess = tf.Session(config=config)
        sess.run(init)

        # Run the forward benchmark.
        time_tensorflow_run(sess, logits, "Forward")

        # Add a simple objective so we can calculate the backward pass.
        objective = loss(logits, labels)

        # Variables to train.
        variables_to_train = tf.trainable_variables()

        # Compute the gradient with respect to all the parameters.
        grad = tf.gradients(objective, variables_to_train)
        # Run the backward benchmark.
        time_tensorflow_run(sess, grad, "Forward-backward")
예제 #4
0
def train(dataset_dir,
          base_lr=0.01,
          max_steps=30000,
          train_log_dir='./logs/inception_v1_Momentum_0.01',
          preprocessing_name='inception_v1'):
    """
    :param dataset_dir: 存放数据的根目录
    :param base_lr: 学习率
    :param max_steps: 最大迭代次数
    :param train_log_dir: 模型文件的存放位置
    :param preprocessing_name: 所使用的预处理名称
    :return:
    """
    dataset = convert_flower_to_tfrecord.read_tfrecords(
        split_name='train', dataset_dir=dataset_dir)

    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        num_readers=
        4,  # The number of parallel readers that read data from the dataset.
        common_queue_capacity=20 * batch_size,
        common_queue_min=10 * batch_size)

    [images, labels] = provider.get(['image', 'label'])

    images_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name, is_training=True)
    images = images_preprocessing_fn(images, resize_height, resize_width)

    # 得到batch_size大小的图像和标签
    train_batch_images, train_batch_labels = tf.train.batch(
        [images, labels],
        batch_size=batch_size,
        num_threads=4,  # The number of threads used to create the batches.
        capacity=5 * batch_size)

    # 对标签进行one-hot编码
    train_batch_labels = tf.one_hot(train_batch_labels,
                                    num_classes,
                                    on_value=1,
                                    off_value=0)

    with slim.arg_scope(inception_v1.inception_v1_arg_scope()
                        ):  # inception_v1.inception_v1_arg_scope()括号不能掉,表示一个函数
        out, end_points = inception_v1.inception_v1(
            inputs=input_images,
            num_classes=num_classes,
            is_training=is_training,
            dropout_keep_prob=keep_prob)

    # 计算loss,accuracy,选择优化器
    loss = tf.losses.softmax_cross_entropy(onehot_labels=input_labels,
                                           logits=out)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)),
                tf.float32)) * 100.0

    optimizer = tf.train.MomentumOptimizer(learning_rate=base_lr,
                                           momentum=0.9)  # 这里可以使用不同的优化函数

    # 在定义训练的时候, 注意到我们使用了`batch_norm`层时,需要更新每一层的`average`和`variance`参数,
    # 正常的训练过程不包括更新,需要我们去手动像下面这样更新
    with tf.control_dependencies(tf.get_collection(
            tf.GraphKeys.UPDATE_OPS)):  # 执行完更新操作之后,再进行训练操作
        train_op = slim.learning.create_train_op(total_loss=loss,
                                                 optimizer=optimizer)

    saver = tf.train.Saver()
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for steps in np.arange(max_steps):
            sess.run([images, labels])
            input_batch_images, input_batch_labels = sess.run(
                [train_batch_images, train_batch_labels])
            _, train_loss = sess.run(
                [train_op, loss],
                feed_dict={
                    input_images: input_batch_images,
                    input_labels: input_batch_labels,
                    keep_prob: 0.8,
                    is_training: True
                })
            # 得到训练过程中的loss, accuracy值
            if steps % 50 == 0 or (steps + 1) == max_steps:
                train_acc = sess.run(accuracy,
                                     feed_dict={
                                         input_images: input_batch_images,
                                         input_labels: input_batch_labels,
                                         keep_prob: 1.0,
                                         is_training: False
                                     })
                print('Step: %d, loss: %.4f, accuracy: %.4f' %
                      (steps, train_loss, train_acc))

            # 每隔2000步储存一下模型文件
            if steps % 2000 == 0 or (steps + 1) == max_steps:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=steps)

        coord.request_stop()
        coord.join(threads)
예제 #5
0
def compute_feature_of_batch_ts_with_cnn(file_path_of_ts, file_path_of_feature,
                                         cnn_model_name,
                                         file_path_of_pretrained_model):
    r'''
    compute feature of somme time series with pretrained CNN
    :param file_path_of_ts: file path of time series
    :param file_path_of_feature: file path of saving feature
    :param cnn_model_name: name of CNN model
    :param file_path_of_pretrained_model: file path of pretrained CNN
    :return: ''
    '''
    #tf.reset_default_graph()
    #read data
    data = pd.read_csv(file_path_of_ts)
    #data=data.sample(20)
    #change dataframe to list
    id_list = data.iloc[:, 0].tolist()
    data_list = change_dataframe_to_dict_(data)

    model = cnn_model_name
    checkpoint_file = file_path_of_pretrained_model

    # I only have these because I thought some take in size of (299,299), but maybe not
    if 'inception' in model: height, width, channels = 224, 224, 3
    if 'resnet' in model: height, width, channels = 224, 224, 3
    if 'vgg' in model: height, width, channels = 224, 224, 3

    if model == 'inception_resnet_v2': height, width, channels = 299, 299, 3

    x = tf.placeholder(tf.float32, shape=(1, height, width, channels))

    # load up model specific stuff
    if model == 'inception_v1':
        #from inception_v1 import *
        from nets import inception_v1

        arg_scope = inception_v1.inception_v1_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = inception_v1.inception_v1(x,
                                                           is_training=False,
                                                           num_classes=None)
            features = end_points['AvgPool_0a_7x7']
            # print('logits')
            # print(logits.shape)
            # print('features')
            # print(features.shape)
    elif model == 'inception_v2':
        #from inception_v2 import *
        from nets import inception_v2

        arg_scope = inception_v2.inception_v2_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = inception_v2(x,
                                              is_training=False,
                                              num_classes=None)
            features = end_points['AvgPool_1a']
    elif model == 'inception_v3':
        #from inception_v3 import *
        from nets import inception_v3

        arg_scope = inception_v3.inception_v3_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = inception_v3(x,
                                              is_training=False,
                                              num_classes=None)
            features = end_points['AvgPool_1a']
    elif model == 'inception_resnet_v2':
        #from inception_resnet_v2 import *
        from nets import inception_resnet_v2

        arg_scope = inception_resnet_v2.inception_resnet_v2_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = inception_resnet_v2(x,
                                                     is_training=False,
                                                     num_classes=1001)
            features = end_points['PreLogitsFlatten']
    elif model == 'resnet_v1_50':
        #from resnet_v1 import *

        from nets import resnet_v1

        arg_scope = resnet_v1.resnet_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = resnet_v1.resnet_v1_50(x,
                                                        is_training=False,
                                                        num_classes=1000)
            features = end_points['global_pool']
    elif model == 'resnet_v1_101':
        #from resnet_v1 import *
        from nets import resnet_v1

        arg_scope = resnet_v1.resnet_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = resnet_v1.resnet_v1_101(x,
                                                         is_training=False,
                                                         num_classes=1000)
            features = end_points['global_pool']
    elif model == 'vgg_16':
        #from vgg import *
        from nets import vgg

        arg_scope = vgg.vgg_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = vgg.vgg_16(x, is_training=False)
            features = end_points['vgg_16/fc8']
    elif model == 'vgg_19':
        #from vgg import *
        from nets import vgg

        arg_scope = vgg.vgg_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = vgg.vgg_19(x, is_training=False)
            features = end_points['vgg_19/fc8']
    #cpu_config = tf.ConfigProto(intra_op_parallelism_threads = 8, inter_op_parallelism_threads = 8, device_count = {'CPU': 3})
    #sess = tf.Session(config = cpu_config)
    sess = tf.Session()
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_file)
    feature_list = []
    count_temp = 0

    for i in range(len(data_list)):
        count_temp = count_temp + 1
        #imaging ts
        ts_dict = data_list[i]
        ts = ts_dict['ts']
        id = ts_dict['id']
        new_ts = min_max_transform(ts)
        normalized = np.array(new_ts)
        fig, ax = plt.subplots()
        #plt.imshow(recurrence_plot.rec_plot(normalized), cmap=plt.cm.gray)
        plt.imshow(recurrence_plot.rec_plot(normalized))
        ax.set_xticks([])
        ax.set_yticks([])
        #print(id)
        path = "inception-v1/" + id + ".jpg"
        plt.savefig(path)
        plt.close(fig)
        #compute feature
        # #begin to compute features
        image = misc.imread(path)
        #from matplotlib.pyplot import imread
        #image=imread(path)
        # print('image')
        # print(image.size)
        image = misc.imresize(image, (height, width))
        image = np.expand_dims(image, 0)
        feature = np.squeeze(sess.run(features, feed_dict={x: image}))
        feature_list.append(feature)
        # print('feature-test')
        # print(feature)
        os.remove(path)
        if count_temp % 100 == 0:
            print(count_temp)
        #begin to process parellel result and write_to_csv
    feature_array = np.array(feature_list)

    feature_df = pd.DataFrame(feature_array)
    # print(feature_df.shape)
    # print(len(id_list))
    #add id
    feature_df.insert(loc=0, column='id', value=id_list)
    # print(feature_final_df.shape)
    # print(feature_final_df.head())
    feature_df.to_csv(file_path_of_feature, index=False)
    gc.collect()
print('constructing model')
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

#build the graph
img_place_holder = tf.placeholder(
    tf.float32, [None, default_image_size, default_image_size, 3])
label_place_holder = tf.placeholder(tf.float32, [None, num_training_category])
alpha_place_holder = tf.placeholder(tf.float32, shape=())
lr_place_holder = tf.placeholder(tf.float32, shape=())

#build backbone, InceptionV1
if args.base_network == 'InceptionV1':
    with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
        net_output, _ = inception_v1.inception_v1(
            img_place_holder, embedding_dim=args.embedding_dim, use_bn=args.bn)
        test_net_output, _ = inception_v1.inception_v1(
            img_place_holder,
            embedding_dim=args.embedding_dim,
            reuse=True,
            is_training=False,
            use_bn=args.bn)  #
else:
    print('Unknown network.')
    quit()

#build final classifier
with tf.variable_scope('retrieval'):
    retrieval_layer = layers.retrieval_layer_2(embedding_dim,
                                               num_training_category)
예제 #7
0
def main():
    data_path = '<train-CARLA-VP.tfrecords>'

    model_type = 'vgg-16'
    train_dir = '<saved_model_path>'
    est_label = 'horvpz'

    num_bins = 500

    sphere_params = np.load('<carlavp_label_to_horvpz_fov_pitch.npz>')
    all_bins = sphere_params['all_bins']
    all_sphere_centres = sphere_params['all_sphere_centres']
    all_sphere_radii = sphere_params['all_sphere_radii']

    if est_label == 'horfov':
        fov_bins = np.arange(15, 115, 100 / num_bins)
        half_fov_bin_size = (fov_bins[1] - fov_bins[0]) / 2

    if model_type == 'inceptionv4':
        net_width = 299
        net_height = 299
    else:
        net_width = 224
        net_height = 224
    if model_type == 'vgg-m':
        model = pickle.load(open("<vggm-tf.p>", "rb"))
        average_image = np.load('<vgg_average_image.npy>')
    elif model_type == 'resnet50' or model_type == 'vgg-16' or model_type == 'resnet101':
        _R_MEAN = 123.68
        _G_MEAN = 116.78
        _B_MEAN = 103.94
        resnet_average_channels = np.array(np.concatenate(
            (np.tile(_R_MEAN, (net_height, net_width, 1)),
             np.tile(_G_MEAN, (net_height, net_width, 1)),
             np.tile(_B_MEAN, (net_height, net_width, 1))),
            axis=2),
                                           dtype=np.float32)
    elif model_type == 'inceptionv1' or model_type == 'inceptionv4':
        print("Nothing needs to be initialized for this cnn model")
    else:
        print("ERROR: No such CNN exists")
    if est_label == 'horfov':
        no_params_model = 3
    elif est_label == 'horvpz':
        no_params_model = 4
    else:
        print("ERROR: No such 'est_label'")

    max_batch_size = 60

    total_examples = sum(1 for _ in tf.python_io.tf_record_iterator(data_path))
    print("Total examples: ", total_examples)

    divs = np.array(list(factors(total_examples)))
    sorted_divs = divs[divs.argsort()]
    batch_size = sorted_divs[sorted_divs < max_batch_size][-1]
    print("Batch Size:", batch_size)

    ct = np.arange(11, 12, 4)

    best_avg_man_loss = np.inf

    for en, consider_top in enumerate(ct):

        total_manhattan_loss = np.zeros(5)

        with tf.Graph().as_default():
            tf.logging.set_verbosity(tf.logging.INFO)
            filename_queue = tf.train.string_input_producer([data_path])
            image, label, carla_width, carla_height = util_tfio.general_read_and_decode(
                filename_queue, num_classes=8, dtype=tf.float64)

            image = tf.image.resize_images(
                image, [net_width, net_height],
                method=tf.image.ResizeMethod.BILINEAR)

            if model_type == 'vgg-m':
                image = image - average_image
            elif model_type == 'resnet50' or model_type == 'vgg-16' or model_type == 'resnet101':
                image = image - resnet_average_channels
            elif model_type == 'inceptionv1' or model_type == 'inceptionv4':
                image = tf.cast(image, tf.float32) * (1. / 255)
                image = (image - 0.5) * 2
            else:
                print("ERROR: No such CNN exists")

            images, labels, carla_widths, carla_heights = tf.train.batch(
                [image, label, carla_width, carla_height],
                batch_size=batch_size,
                num_threads=1,
                capacity=5 * batch_size)

            print(images)

            if model_type == 'vgg-m':
                logits = vgg_m.cnn_vggm(images,
                                        num_classes=num_bins * no_params_model,
                                        model=model)
            elif model_type == 'resnet50':
                with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope:
                    logits, _ = resnet_v1.resnet_v1_50(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False,
                        global_pool=True)  # , reuse=True)#
            elif model_type == 'resnet101':
                with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope:
                    logits, _ = resnet_v1.resnet_v1_101(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False,
                        global_pool=True)  # , reuse=True)#
            elif model_type == 'vgg-16':
                with slim.arg_scope(vgg.vgg_arg_scope()) as scope:
                    logits, _ = vgg.vgg_16(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False
                    )  # , global_pool=False)#, reuse=True)#
            elif model_type == 'inceptionv1':
                with slim.arg_scope(
                        inception_v1.inception_v1_arg_scope()) as scope:
                    logits, _ = inception_v1.inception_v1(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False
                    )  # , global_pool=False)#, reuse=True)#
            elif model_type == 'inceptionv4':
                with slim.arg_scope(
                        inception_v4.inception_v4_arg_scope()) as scope:
                    logits, _ = inception_v4.inception_v4(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False
                    )  # , global_pool=False)#, reuse=True)#
            else:
                print("ERROR: No such CNN exists")

            checkpoint_path = train_dir
            init_fn = slim.assign_from_checkpoint_fn(
                checkpoint_path, slim.get_variables_to_restore())

            print("--------------------------------------------------------")
            print("No. of examples not evaluated because of batch size:",
                  np.mod(total_examples, batch_size))
            print("--------------------------------------------------------")

            with tf.Session() as sess:
                with slim.queues.QueueRunners(sess):
                    sess.run(tf.initialize_local_variables())
                    init_fn(sess)

                    for loop_no in range(
                            int(np.floor(total_examples / batch_size))):
                        np_rawpreds, np_images_raw, np_labels, np_width, np_height = sess.run(
                            [
                                logits, images, labels, carla_widths,
                                carla_heights
                            ])

                        for i in range(batch_size):
                            pred_indices = np.zeros(no_params_model,
                                                    dtype=np.int32)
                            output_vals = np_rawpreds[i, :].squeeze().reshape(
                                no_params_model, -1)

                            for ln in range(no_params_model):
                                predsoft = my_softmax(
                                    output_vals[ln, :][np.newaxis]).squeeze()

                                topindices = predsoft.argsort(
                                )[::-1][:consider_top]
                                probsindices = predsoft[topindices] / np.sum(
                                    predsoft[topindices])
                                pred_indices[ln] = np.abs(
                                    int(
                                        np.round(
                                            np.sum(probsindices *
                                                   topindices))))

                            if est_label == 'horfov':
                                estimated_input_points = get_horvpz_from_projected_4indices_modified(
                                    np.hstack(
                                        (pred_indices[:2], 0, 0)), all_bins,
                                    all_sphere_centres, all_sphere_radii)
                                my_fov = fov_bins[
                                    pred_indices[2]] + half_fov_bin_size
                                fx, fy, roll_from_horizon, my_tilt = get_intrinisic_extrinsic_params_from_horfov(
                                    img_dims=(np_width[i], np_height[i]),
                                    horizonvector=estimated_input_points,
                                    fov=my_fov,
                                    net_dims=(net_width, net_height))

                            elif est_label == 'horvpz':
                                estimated_input_points = get_horvpz_from_projected_4indices_modified(
                                    pred_indices[:4], all_bins,
                                    all_sphere_centres, all_sphere_radii)
                                fx, fy, roll_from_horizon, my_tilt = \
                                    get_intrinisic_extrinsic_params_from_horizonvector_vpz(
                                        img_dims=(np_width[i], np_height[i]),
                                        horizonvector_vpz=estimated_input_points,
                                        net_dims=(net_width, net_height))

                            my_fov_fx = degrees(
                                np.arctan(np_width[i] / (2 * fx)) * 2)
                            my_fov_fy = degrees(
                                np.arctan(np_width[i] / (2 * fy)) * 2)
                            my_tilt = -degrees(my_tilt)
                            roll_from_horizon = roll_from_horizon

                            gt_label = np_labels[i, :].reshape(4, -1)
                            gt_fov = gt_label[3, 0]
                            gt_pitch = gt_label[3, 1]
                            gt_roll = degrees(
                                atan((gt_label[1, 1] - gt_label[0, 1]) /
                                     (gt_label[1, 0] - gt_label[0, 0])))

                            manhattan_loss = [
                                np.abs(my_fov_fx - gt_fov),
                                np.abs(my_fov_fy - gt_fov),
                                np.abs(((my_fov_fx + my_fov_fy) / 2) - gt_fov),
                                np.abs(my_tilt - gt_pitch),
                                np.abs(roll_from_horizon - gt_roll)
                            ]

                            total_manhattan_loss += manhattan_loss

        avg_manhattan_loss = total_manhattan_loss / total_examples

        print("ct:", consider_top, "Average manhattan loss per scalar: ",
              avg_manhattan_loss)
        print(
            "-------------------------------------------------------------------"
        )

        this_loss = np.mean(
            np.hstack((avg_manhattan_loss[1], avg_manhattan_loss[3:])))
        if this_loss < best_avg_man_loss:
            best_avg_man_loss = this_loss
            display_loss = [
                consider_top, -1, avg_manhattan_loss[1], avg_manhattan_loss[3],
                avg_manhattan_loss[4]
            ]

    print("Best loss:", display_loss)
예제 #8
0
 def network_fn(images, **kwargs):
   with slim.arg_scope(arg_scope):
     return inception_v1.inception_v1(images, num_classes, is_training=is_training)
예제 #9
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    dropout_val = 0.8

    is_flip = True

    is_smoothing = True

    maintain_aspect_ratio = True

    min_perc = 0.90
    is_random_crops = False

    max_rotation = 0

    num_bins = 500
    no_output_params = 4
    num_classes = no_output_params * num_bins

    eval_num_classes = 7 * num_bins

    num_samples = sum(
        1 for _ in tf.python_io.tf_record_iterator(FLAGS.dataset_dir))
    print("No. of training examples: ", num_samples)

    assert max_rotation >= 0

    print('---------------------------------------------------------')
    print('Make sure that no. of training samples is actually ' +
          str(num_samples))
    print('---------------------------------------------------------')

    if FLAGS.model_name == 'inception-v4':
        net_width = 299
        net_height = 299
    else:
        net_width = 224
        net_height = 224

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        global_step = slim.create_global_step()

        data_path = FLAGS.dataset_dir
        filename_queue = tf.train.string_input_producer([data_path])
        image, label, carla_width, carla_height = util_tfio.general_read_and_decode(
            filename_queue, num_classes=8, dtype=tf.float64)
        print(image)
        print(label)

        # --------------------------------------------------------------------------------------------------------------------
        degree_angle = tf.random_uniform([],
                                         minval=-max_rotation,
                                         maxval=max_rotation,
                                         dtype=tf.float32)
        radian_angle = util_tfgeometry.tf_deg2rad(degree_angle)

        label = tf.reshape(label, (4, 2))
        # my_fov = label[3, 0]
        # my_pitch = label[3, 1]

        label = label[:3, :]

        if is_flip:
            image, bool_flip = util_tfimage.random_flip_left_right(image)

            def flip_gt():
                return tf.stack(
                    ([[
                        tf.cast(carla_width, label.dtype) - label[1, 0],
                        label[1, 1]
                    ],
                      [
                          tf.cast(carla_width, label.dtype) - label[0, 0],
                          label[0, 1]
                      ],
                      [
                          tf.cast(carla_width, label.dtype) - label[2, 0],
                          label[2, 1]
                      ]]))

            def gt():
                return label

            label = tf.cond(bool_flip, flip_gt, gt)

        if max_rotation > 0:
            # image rotation is buggy on GPU
            with tf.device('/cpu:0'):
                image = tf.contrib.image.rotate(image,
                                                radian_angle,
                                                interpolation='BILINEAR')
            max_width, max_height = util_tfgeometry.rotatedRectWithMaxArea_tf(
                carla_width, carla_height, radian_angle)
            max_height = tf.cast(tf.floor(max_height), tf.int32)
            max_width = tf.cast(tf.floor(max_width), tf.int32)
            print("max_width, height", max_width, max_height)
            image = tf.image.resize_image_with_crop_or_pad(
                image, target_height=max_height, target_width=max_width)

            rot_vps = util_tfgeometry.rotate_vps(
                (carla_width / 2, carla_height / 2), label,
                tf.cast(radian_angle, dtype=tf.float64))
            crop_rot_vps = util_tfgeometry.center_crop_vps(
                rot_vps,
                orig_dims=(carla_width, carla_height),
                crop_dims=(max_width, max_height))
        else:
            max_width = carla_width
            max_height = carla_height
            crop_rot_vps = label

        if maintain_aspect_ratio:
            image, max_width, max_height = util_tfimage.square_random_crop(
                image, max_width, max_height)

        if not is_random_crops:
            image = tf.image.resize_images(
                image, [net_width, net_height],
                method=tf.image.ResizeMethod.BILINEAR)

            float_max_height = tf.cast(max_height, tf.float64)
            float_max_width = tf.cast(max_width, tf.float64)
            final_vps = util_tfgeometry.resize_vps(
                crop_rot_vps,
                orig_dims=(float_max_width, float_max_height),
                resize_dims=(net_width, net_height))
        else:
            rand_perc = tf.random_uniform([], minval=min_perc, maxval=1.0)
            crop_height = tf.maximum(
                net_height,
                tf.cast(tf.floor(rand_perc * tf.cast(max_height, tf.float32)),
                        dtype=tf.int32))
            crop_width = tf.maximum(
                net_width,
                tf.cast(tf.floor(rand_perc * tf.cast(max_width, tf.float32)),
                        dtype=tf.int32))
            image, off_height, off_width = vgg_preprocessing._custom_random_crop(
                [image], crop_height, crop_width)[0]
            image = tf.image.resize_images(
                image, [net_width, net_height],
                method=tf.image.ResizeMethod.BILINEAR)

            temp_final_vps = util_tfgeometry.offset_vps(
                crop_rot_vps, off_height, off_width)
            float_crop_height = tf.cast(crop_height, tf.float64)
            float_crop_width = tf.cast(crop_width, tf.float64)
            final_vps = util_tfgeometry.resize_vps(
                temp_final_vps,
                orig_dims=(float_crop_width, float_crop_height),
                resize_dims=(net_width, net_height))

        image = util_tfimage.distort_color(image,
                                           color_ordering=tf.random_uniform(
                                               [],
                                               minval=0,
                                               maxval=4,
                                               dtype=tf.int32),
                                           fast_mode=False)

        # Value here, before pre-processing below will be 0-255
        if FLAGS.model_name == 'vgg-m':
            model = pickle.load(open("<vggm-tf.p>", "rb"))
            average_image = np.load('<vgg_average_image.npy>')
            image = image - average_image
        elif FLAGS.model_name == 'resnet-50' or FLAGS.model_name == 'resnet-101' or FLAGS.model_name == 'vgg-16':
            image = vgg_preprocessing.my_preprocess_image(image)
        elif FLAGS.model_name == 'mobilenet-v1' or FLAGS.model_name == 'inception-v1' or \
                FLAGS.model_name == 'inception-v4':
            image = tf.cast(image, tf.float32) * (1. / 255)
            image = (image - 0.5) * 2
        else:
            sys.exit("Invalid value for model name!")

        label = tf.reshape(final_vps, (3, 2))
        all_label = tf.concat([label, [[0], [0], [0]]], axis=1)

        output_label, output_indices = util_tfprojection.get_all_projected_from_3vps_modified_tf(
            all_label,
            no_bins=num_bins,
            img_dims=(net_width, net_height),
            verbose=False)

        if is_smoothing:
            stddev = 0.5

            max_indices = tf.argmax(output_label, axis=1)

            normalized = tf.distributions.Normal(
                loc=tf.reshape(tf.cast(max_indices, dtype=tf.float64),
                               (no_output_params, 1)),
                scale=tf.constant(stddev, dtype=tf.float64))

            probs = normalized.prob(
                tf.tile(
                    tf.reshape(
                        tf.cast(tf.range(output_label.shape[1]),
                                dtype=tf.float64), (1, -1)),
                    (no_output_params, 1)))

            act_normalized = probs / tf.reduce_sum(
                probs, axis=1, keepdims=True)
            label = tf.reshape(act_normalized, [-1])
        else:
            label = tf.reshape(output_label, [-1])

        print("SHAPE AT END:", image, label)
        # --------------------------------------------------------------------------------------------------------------------

        # shuffle requires 'min_after_dequeue' parameter (min to keep in queue)
        images, labels = tf.train.shuffle_batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=6 * FLAGS.batch_size,
            min_after_dequeue=4 * FLAGS.batch_size)

        labels = tf.stop_gradient(labels)

        ###########################
        # Reading evaluation data #
        ###########################
        if FLAGS.model_name == 'inception-v4':
            eval_path = ''
        else:
            eval_path = '<eval-CARLA-VP.tfrecords'

        eval_max_batch_size = min(50, FLAGS.batch_size)
        no_eval_examples = sum(
            1 for _ in tf.python_io.tf_record_iterator(eval_path))
        divs = np.array(list(factors(no_eval_examples)))
        sorted_divs = divs[divs.argsort()]
        eval_batch_size = sorted_divs[sorted_divs < eval_max_batch_size][-1]
        print("EVALUATION BATCH SIZE:", eval_batch_size)
        print("Number of examples in evaluation dataset: ", no_eval_examples)
        eval_filename_queue = tf.train.string_input_producer(
            [eval_path])  # , num_epochs=2)

        e_image, e_label = util_tfio.read_and_decode_evaluation(
            eval_filename_queue, eval_num_classes, net_height, net_width)
        print("eval_num_classes:", eval_num_classes)

        # Value here, before pre-processing below will be 0-255
        if FLAGS.model_name == 'vgg-m':
            e_image = e_image - average_image
        elif FLAGS.model_name == 'resnet-50' or FLAGS.model_name == 'resnet-101' or FLAGS.model_name == 'vgg-16':
            e_image = vgg_preprocessing.my_preprocess_image(e_image)
        elif FLAGS.model_name == 'mobilenet-v1' or FLAGS.model_name == 'inception-v1' or \
                FLAGS.model_name == 'inception-v4':
            e_image = tf.cast(e_image, tf.float32) * (1. / 255)
            e_image = (e_image - 0.5) * 2
        else:
            sys.exit("Invalid value for model name!")

        e_images, e_labels = tf.train.batch(
            [e_image, e_label],
            batch_size=eval_batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * eval_batch_size)
        # --------------------------

        print("PREFETCH_QUEUE, CAPACITY:", FLAGS.batch_size, ", NUM_THREADS:",
              FLAGS.num_preprocessing_threads)
        batch_queue = slim.prefetch_queue.prefetch_queue(
            [images, labels],
            capacity=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads)

        images, labels = batch_queue.dequeue()

        if FLAGS.model_name == 'vgg-m':
            logits = vgg_m.cnn_vggm(images,
                                    num_classes=num_classes,
                                    model=model)

            eval_logits = vgg_m.cnn_vggm(e_images,
                                         num_classes=num_classes,
                                         model=model,
                                         reuse=True)
        elif FLAGS.model_name == 'vgg-16':
            with slim.arg_scope(vgg.vgg_arg_scope()):
                logits, end_points = vgg.vgg_16(images,
                                                num_classes=num_classes,
                                                is_training=True,
                                                dropout_keep_prob=dropout_val)

                eval_logits, _ = vgg.vgg_16(e_images,
                                            num_classes=num_classes,
                                            is_training=False,
                                            reuse=True)
        elif FLAGS.model_name == 'resnet-50':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                logits, end_points = resnet_v1.resnet_v1_50(
                    images, num_classes=num_classes, is_training=True)

                eval_logits, _ = resnet_v1.resnet_v1_50(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        elif FLAGS.model_name == 'resnet-101':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                logits, end_points = resnet_v1.resnet_v1_101(
                    images, num_classes=num_classes, is_training=True)

                eval_logits, _ = resnet_v1.resnet_v1_101(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        elif FLAGS.model_name == 'inception-v1':
            with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
                logits, end_points = inception_v1.inception_v1(
                    images,
                    num_classes=num_classes,
                    is_training=True,
                    dropout_keep_prob=dropout_val)

                eval_logits, _ = inception_v1.inception_v1(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        elif FLAGS.model_name == 'inception-v4':
            with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
                logits, end_points = inception_v4.inception_v4(
                    images,
                    num_classes=num_classes,
                    is_training=True,
                    dropout_keep_prob=dropout_val)

                eval_logits, _ = inception_v4.inception_v4(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        elif FLAGS.model_name == 'mobilenet-v1':
            with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
                logits, end_points = mobilenet_v1.mobilenet_v1(
                    images,
                    num_classes=num_classes,
                    is_training=True,
                    dropout_keep_prob=dropout_val)

                eval_logits, _ = mobilenet_v1.mobilenet_v1(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        else:
            sys.exit("Invalid value for model name!")

        jumps = int(num_classes / no_output_params)
        classification_loss_1 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=labels[:, :jumps],
                                                    logits=logits[:, :jumps]))
        classification_loss_2 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=labels[:, jumps:2 * jumps],
                logits=logits[:, jumps:2 * jumps]))
        classification_loss_3 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=labels[:, 2 * jumps:3 * jumps],
                logits=logits[:, 2 * jumps:3 * jumps]))
        classification_loss_4 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=labels[:, 3 * jumps:4 * jumps],
                logits=logits[:, 3 * jumps:4 * jumps]))

        ##############################################################################################
        # try implementing L1 loss among both here to help visualize comparison with validation loss

        logits_ind = tf.argmax(tf.reshape(logits,
                                          (-1, no_output_params, num_bins)),
                               axis=2)
        labels_ind = tf.argmax(tf.reshape(labels,
                                          (-1, no_output_params, num_bins)),
                               axis=2)
        print("Logits_ind shape:", logits_ind.shape)

        train_l1_loss = tf.reduce_sum(tf.abs(logits_ind - labels_ind))

        regularization_loss = tf.add_n(slim.losses.get_regularization_losses())
        total_loss = (classification_loss_1 + classification_loss_2 +
                      classification_loss_3 + classification_loss_4 +
                      regularization_loss)

        print("After classification loss:")
        print(logits.shape)
        print(labels.shape)
        print("---------------------------------------")

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for losses.
        # for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
        for loss in tf.get_collection(tf.GraphKeys.LOSSES):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #########################################
        # Configure the optimization procedure. #
        #########################################
        learning_rate = tf.placeholder(tf.float32,
                                       shape=[],
                                       name="learning_rate")
        optimizer = util_tftraining.configure_optimizer(learning_rate,
                                                        FLAGS=FLAGS)

        print("learning rate tensor:", learning_rate)

        # Variables to train.
        variables_to_train = util_tftraining.get_variables_to_train(
            FLAGS=FLAGS)

        print("-----------------------------------------")
        print("variables to train: ", variables_to_train)
        print("-----------------------------------------")

        train_op = slim.learning.create_train_op(
            total_loss=total_loss,
            optimizer=optimizer,
            variables_to_train=variables_to_train,
            global_step=global_step)

        if classification_loss_1 is not None:
            tf.summary.scalar('Losses/classification_loss_1',
                              classification_loss_1)
        if classification_loss_2 is not None:
            tf.summary.scalar('Losses/classification_loss_2',
                              classification_loss_2)
        if classification_loss_3 is not None:
            tf.summary.scalar('Losses/classification_loss_3',
                              classification_loss_3)
        if classification_loss_4 is not None:
            tf.summary.scalar('Losses/classification_loss_4',
                              classification_loss_4)

        if regularization_loss is not None:
            tf.summary.scalar('Losses/regularization_loss',
                              regularization_loss)

        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Merge all summaries together.
        tf.summary.merge(list(summaries), name='summary_op')

        session_config = tf.ConfigProto()
        session_config.allow_soft_placement = True
        session_config.gpu_options.allow_growth = True

        init_fn = util_tftraining.get_init_fn(FLAGS=FLAGS)

        print("Before learning.train", flush=True)
        print("---------------------------------------------------")
        print("---------------------------------------------------")

        early_stop_epochs = 10
        no_steps_in_epoch = int(np.ceil(num_samples / FLAGS.batch_size))
        scaffold = tf.train.Scaffold(saver=tf.train.Saver(
            max_to_keep=early_stop_epochs + 3))

        show_eval_loss_every_steps = no_steps_in_epoch / 5
        save_checkpoint_every_steps = no_steps_in_epoch / 5

        with tf.train.MonitoredTrainingSession(
                master='',
                is_chief=True,
                checkpoint_dir=FLAGS.train_dir,
                scaffold=scaffold,
                hooks=None,
                chief_only_hooks=None,
                save_checkpoint_steps=save_checkpoint_every_steps,
                save_summaries_secs=FLAGS.save_summaries_secs,
                config=session_config,
                stop_grace_period_secs=120,
                log_step_count_steps=0,
                max_wait_secs=10) as mon_sess:

            print("-----------------------------------------")
            if init_fn is not None:
                init_fn(mon_sess)
                print("Succesfully loaded model")
            else:
                print("A model already exists in the 'train_dir' path")
            print("-----------------------------------------")

            last_sum_train_loss = 0
            last_sum_tl1_loss = 0
            best_sum_train_loss = np.inf
            step_no = 0
            current_lr = FLAGS.learning_rate

            no_params = 7
            consider_params = 4

            consider_top = 11

            best_eval_wa = np.inf
            best_eval_epoch = 0

            while True:
                _, train_loss, tl1_loss = mon_sess.run(
                    [train_op, total_loss, train_l1_loss],
                    feed_dict={learning_rate: current_lr})
                last_sum_train_loss += train_loss
                last_sum_tl1_loss += tl1_loss

                epoch_no = int(
                    np.floor((step_no * FLAGS.batch_size) / num_samples))

                if np.mod(step_no, FLAGS.log_every_n_steps) == 0:
                    print("Epoch {}, Step {}, lr={:0.5f}, Loss: {}".format(
                        epoch_no, step_no, current_lr, train_loss),
                          flush=True)

                # calculating evaluation loss alongside as well
                if np.mod(step_no, show_eval_loss_every_steps) == 0:
                    print("--In eval block--")

                    total_l1_loss = 0
                    total_wa_loss = 0

                    for loop_no in range(
                            int(np.floor(no_eval_examples / eval_batch_size))):
                        np_rawpreds, np_labels = mon_sess.run(
                            [eval_logits, e_labels])

                        for i in range(eval_batch_size):

                            predicted_label = np.argmax(
                                np_rawpreds[i, :].reshape(consider_params, -1),
                                axis=1)
                            gt_label = np.argmax(np_labels[i, :].reshape(
                                no_params, -1)[:consider_params, :],
                                                 axis=1)

                            l1_loss = np.sum(np.abs(predicted_label -
                                                    gt_label))

                            wa = 0
                            for ln in range(consider_params):
                                predsoft = my_softmax(
                                    np_rawpreds[i, :].reshape(
                                        consider_params,
                                        -1)[ln, :][np.newaxis])
                                predsoft = predsoft.squeeze()
                                labsoft = np_labels[i, :].reshape(
                                    no_params, -1)[ln, :]
                                topindices = predsoft.argsort(
                                )[::-1][:consider_top]
                                probsindices = predsoft[topindices] / np.sum(
                                    predsoft[topindices])
                                wa += np.abs(
                                    int(
                                        np.round(
                                            np.sum(probsindices *
                                                   topindices))) -
                                    labsoft.argmax())

                            total_l1_loss += l1_loss
                            total_wa_loss += wa

                    avg_manhattan_loss = total_l1_loss / no_eval_examples
                    avg_wa_loss = total_wa_loss / no_eval_examples

                    print(
                        "-------------------------------------------------------------------"
                    )
                    print("Average manhattan loss per scalar:",
                          avg_manhattan_loss / consider_params)
                    print(
                        "Average manhattan loss(Weighted avg. top 10 bins)per scalar:",
                        avg_wa_loss / consider_params)
                    print(
                        "-------------------------------------------------------------------",
                        flush=True)

                    if avg_wa_loss < best_eval_wa:
                        best_eval_wa = avg_wa_loss
                        best_eval_epoch = epoch_no

                    if avg_wa_loss > best_eval_wa and (
                            epoch_no - best_eval_epoch
                    ) > early_stop_epochs and current_lr < 1e-3 and epoch_no > 10:
                        print("STOPPING TRAINING at epoch: ", epoch_no,
                              ", best epoch was:", best_eval_epoch, "(step: ",
                              best_eval_epoch * num_samples / FLAGS.batch_size,
                              ")")
                        print("Current eval_wa:", avg_wa_loss,
                              ", best eval_wa:", best_eval_wa)
                        break

                    if step_no > 0:
                        last_sum_train_loss /= show_eval_loss_every_steps
                        last_sum_tl1_loss /= (no_steps_in_epoch *
                                              FLAGS.batch_size *
                                              no_output_params)
                        if last_sum_train_loss > best_sum_train_loss:
                            if current_lr > FLAGS.end_learning_rate:
                                print("Dividing learning rate by 10.0")
                                current_lr /= 10.0
                                best_sum_train_loss = last_sum_train_loss
                            else:
                                print(
                                    "Already reached lowest possible lr i.e. ",
                                    current_lr)
                        else:
                            best_sum_train_loss = last_sum_train_loss

                        print("last_sum_train_loss:", last_sum_train_loss)
                        print("L1_train_loss:", last_sum_tl1_loss)
                        last_sum_train_loss = 0
                        last_sum_tl1_loss = 0
                #########################################################################################

                step_no += 1

                if FLAGS.max_number_of_steps is not None:
                    if step_no >= FLAGS.max_number_of_steps:
                        break

            print("Final Step {}, Loss: {}".format(step_no, train_loss))

        print("---------------------The End-----------------------")
        print("---------------------------------------------------")
        print("---------------------------------------------------")
예제 #10
0
    if not os.path.isfile(tfmodel + '.meta'):
        raise IOError(
            ('{:s} not found.\nDid you run ./data/script/'
             'fetch_faster_rcnn_models.sh?').format(tfmodel + '.meta'))

    # set config
    tfconfig = tf.ConfigProto(allow_soft_placement=True)
    tfconfig.gpu_options.allow_growth = True

    # init session
    sess = tf.Session(config=tfconfig)
    # load network
    if demonet == 'vgg16':
        net = vgg16(batch_size=1)
    elif demonet == 'inception_v1':
        net = inception_v1(batch_size=1)
    else:
        raise NotImplementedError

    net.create_architecture(sess,
                            "TEST",
                            8,
                            tag='default',
                            anchor_scales=[1, 2, 3])
    saver = tf.train.Saver()
    saver.restore(sess, tfmodel)

    print('Loaded network {:s}'.format(tfmodel))

    output_graph_def = tf.graph_util.convert_variables_to_constants(
        sess, sess.graph.as_graph_def(), [
예제 #11
0
                                         features=(tf.float32, [224, 224, 3]),
                                         labels=(tf.int32, [1]),
                                         batch_size=options.batchSize,
                                         validation_dataset=val_data)

    images, labels = dataset.tensors

    # As sequence file's label is one-based, so labels need to subtract 1.
    zero_based_label = labels - 1

    is_training = tf.placeholder(dtype=tf.bool, shape=())

    with slim.arg_scope(inception_v1.inception_v1_arg_scope(weight_decay=0.0,
                                                            use_batch_norm=False)):
        logits, end_points = inception_v1.inception_v1(images,
                                                       dropout_keep_prob=0.6,
                                                       num_classes=1000,
                                                       is_training=is_training)

    loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                                 labels=zero_based_label))

    iterationPerEpoch = int(ceil(float(1281167) / options.batchSize))
    if options.maxEpoch:
        maxIteration = iterationPerEpoch * options.maxEpoch
    else:
        maxIteration = options.maxIteration
    warmup_iteration = options.warmupEpoch * iterationPerEpoch

    if warmup_iteration == 0:
        warmupDelta = 0.0
    else:
예제 #12
0
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    tf.logging.set_verbosity(tf.logging.INFO)

    full_start = timer()
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    all_images_taget_class, all_images_true_label = load_target_class(
        FLAGS.input_dir)

    if not os.path.exists(FLAGS.output_dir):
        os.mkdir(FLAGS.output_dir)

    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)
        target_class_input = tf.placeholder(tf.int32, shape=[FLAGS.batch_size])
        momentum = FLAGS.momentum
        eps = 2.0 * FLAGS.max_epsilon / 255.0
        alpha = 0.2
        num_classes = 1000
        num_classes_a = 1001
        # image = x_input

        image = input_diversity(x_input)
        # image = batch_dct2d(image)
        """
        224 input
        """

        processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50')
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50(
                processed_imgs_res_v1_50,
                num_classes=num_classes,
                is_training=False,
                reuse=tf.AUTO_REUSE)

        processed_imgs_res_v1_101 = preprocess_for_model(
            image, 'resnet_v1_101')
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits_res_v1_101, end_points_res_v1_101 = resnet_v1.resnet_v1_101(
                processed_imgs_res_v1_101,
                num_classes=num_classes,
                is_training=False,
                reuse=tf.AUTO_REUSE)

        processed_res_v1 = preprocess_for_model(image, 'resnet_v1_152')
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits_res_v1_152, end_points_res_v1 = resnet_v1.resnet_v1_152(
                processed_res_v1,
                num_classes=num_classes,
                is_training=False,
                scope='resnet_v1_152',
                reuse=tf.AUTO_REUSE)

        processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16')
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(
                processed_imgs_vgg_16,
                num_classes=num_classes,
                is_training=False,
                scope='vgg_16')

        processed_imgs_vgg_19 = preprocess_for_model(image, 'vgg_19')
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits_vgg_19, end_points_vgg_19 = vgg.vgg_19(
                processed_imgs_vgg_19,
                num_classes=num_classes,
                is_training=False,
                scope='vgg_19')

        logits_clean_a = (logits_res_v1_50 + logits_res_v1_101 +
                          logits_res_v1_152 + logits_vgg_16 +
                          logits_vgg_19) / 5.0

        processed_imgs_inception_v1 = preprocess_for_model(
            image, 'inception_v1')
        with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
            logits_inception_v1, end_points_inception_v1 = inception_v1.inception_v1(
                processed_imgs_inception_v1,
                num_classes=num_classes_a,
                is_training=False,
                reuse=tf.AUTO_REUSE)
        """
        299 input
        """

        x_div = preprocess_for_model(image, 'inception_v3')
        with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
            logits_inc_v3, end_points_inc_v3 = inception_v3.inception_v3(
                x_div,
                num_classes=num_classes_a,
                is_training=False,
                scope='InceptionV3')

        with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
            logits_inc_v4, end_points_inc_v4 = inception_v4.inception_v4(
                x_div,
                num_classes=num_classes_a,
                is_training=False,
                scope='InceptionV4')

        with slim.arg_scope(
                inception_resnet_v2.inception_resnet_v2_arg_scope()):
            logits_inc_res_v2, end_points_inc_res_v2 = inception_resnet_v2.inception_resnet_v2(
                x_div,
                num_classes=num_classes_a,
                is_training=False,
                scope='InceptionResnetV2')

        logits_clean_b = (logits_inc_v3 + logits_inc_v4 + logits_inc_res_v2 +
                          logits_inception_v1) / 4.0
        """
        add adv model
        """
        with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
            logits_adv_v3, end_points_adv_v3 = inception_v3.inception_v3(
                x_div,
                num_classes=num_classes_a,
                is_training=False,
                scope='AdvInceptionV3')

        with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
            logits_ens3_adv_v3, end_points_ens3_adv_v3 = inception_v3.inception_v3(
                x_div,
                num_classes=num_classes_a,
                is_training=False,
                scope='Ens3AdvInceptionV3')

        with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
            logits_ens4_adv_v3, end_points_ens4_adv_v3 = inception_v3.inception_v3(
                x_div,
                num_classes=num_classes_a,
                is_training=False,
                scope='Ens4AdvInceptionV3')

        with slim.arg_scope(
                inception_resnet_v2.inception_resnet_v2_arg_scope()):
            logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2(
                x_div,
                num_classes=num_classes_a,
                is_training=False,
                scope='EnsAdvInceptionResnetV2')

        with slim.arg_scope(
                inception_resnet_v2.inception_resnet_v2_arg_scope()):
            logits_adv_res_v2, end_points_adv_res_v2 = inception_resnet_v2.inception_resnet_v2(
                x_div,
                num_classes=num_classes_a,
                is_training=False,
                scope='AdvInceptionResnetV2')

        logits_ens_a = (logits_adv_v3 + logits_ens3_adv_v3 + logits_ens4_adv_v3
                        + logits_ensadv_res_v2 + logits_adv_res_v2) / 5.0
        logits_ens_aux = (end_points_adv_v3['AuxLogits'] +
                          end_points_ens3_adv_v3['AuxLogits'] +
                          end_points_ens4_adv_v3['AuxLogits'] +
                          end_points_adv_res_v2['AuxLogits'] +
                          end_points_ensadv_res_v2['AuxLogits']) / 5.0

        label_test = tf.argmax(logits_adv_v3, axis=1)
        """
        ensemble model loss
        """
        clean_logits = (logits_clean_a + logits_clean_b[:, 1:1001]) / 2.0
        adv_logits = logits_ens_a[:, 1:1001] + logits_ens_aux[:, 1:1001]
        logits = (clean_logits + adv_logits) / 2.0

        ens_labels = tf.argmax(logits, axis=1)

        one_hot = tf.one_hot(target_class_input, num_classes)

        loss_adv_v3 = tf.losses.softmax_cross_entropy(one_hot,
                                                      logits_adv_v3[:, 1:1001],
                                                      label_smoothing=0.0,
                                                      weights=1.0)
        loss_ens3_adv_v3 = tf.losses.softmax_cross_entropy(
            one_hot,
            logits_ens3_adv_v3[:, 1:1001],
            label_smoothing=0.0,
            weights=1.0)
        loss_ens4_adv_v3 = tf.losses.softmax_cross_entropy(
            one_hot,
            logits_ens4_adv_v3[:, 1:1001],
            label_smoothing=0.0,
            weights=1.0)
        loss_ensadv_res_v2 = tf.losses.softmax_cross_entropy(
            one_hot,
            logits_ensadv_res_v2[:, 1:1001],
            label_smoothing=0.0,
            weights=1.0)
        loss_adv_res_v2 = tf.losses.softmax_cross_entropy(
            one_hot,
            logits_adv_res_v2[:, 1:1001],
            label_smoothing=0.0,
            weights=1.0)

        loss_res_v1_101 = tf.losses.softmax_cross_entropy(one_hot,
                                                          logits_res_v1_101,
                                                          label_smoothing=0.0,
                                                          weights=1.0)
        loss_res_v1_50 = tf.losses.softmax_cross_entropy(one_hot,
                                                         logits_res_v1_50,
                                                         label_smoothing=0.0,
                                                         weights=1.0)
        loss_vgg_16 = tf.losses.softmax_cross_entropy(one_hot,
                                                      logits_vgg_16,
                                                      label_smoothing=0.0,
                                                      weights=1.0)
        loss_res_v1_152 = tf.losses.softmax_cross_entropy(one_hot,
                                                          logits_res_v1_152,
                                                          label_smoothing=0.0,
                                                          weights=1.0)

        total_loss = tf.losses.softmax_cross_entropy(one_hot,
                                                     logits,
                                                     label_smoothing=0.0,
                                                     weights=1.0)
        noise = tf.gradients(total_loss, x_input)[0]

        kernel = gkern(15, FLAGS.sig).astype(np.float32)
        stack_kernel = np.stack([kernel, kernel, kernel]).swapaxes(2, 0)
        stack_kernel = np.expand_dims(stack_kernel, 3)

        noise = tf.nn.depthwise_conv2d(noise,
                                       stack_kernel,
                                       strides=[1, 1, 1, 1],
                                       padding='SAME')
        # [batch, out_height, out_width, in_channels * channel_multiplier]

        noise = noise / tf.reshape(
            tf.contrib.keras.backend.std(tf.reshape(noise,
                                                    [FLAGS.batch_size, -1]),
                                         axis=1), [FLAGS.batch_size, 1, 1, 1])
        # noise = momentum * grad + noise
        noise = noise / tf.reshape(
            tf.contrib.keras.backend.std(tf.reshape(noise,
                                                    [FLAGS.batch_size, -1]),
                                         axis=1), [FLAGS.batch_size, 1, 1, 1])

        s1 = tf.train.Saver(slim.get_model_variables(scope='InceptionV1'))
        s2 = tf.train.Saver(slim.get_model_variables(scope='InceptionV3'))
        s3 = tf.train.Saver(slim.get_model_variables(scope='InceptionV4'))

        s4 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_50'))
        s5 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_101'))
        s6 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_152'))

        s7 = tf.train.Saver(slim.get_model_variables(scope='vgg_16'))
        s8 = tf.train.Saver(slim.get_model_variables(scope='vgg_19'))
        s9 = tf.train.Saver(
            slim.get_model_variables(scope='InceptionResnetV2'))

        s10 = tf.train.Saver(
            slim.get_model_variables(scope='AdvInceptionResnetV2'))
        s11 = tf.train.Saver(
            slim.get_model_variables(scope='Ens3AdvInceptionV3'))
        s12 = tf.train.Saver(
            slim.get_model_variables(scope='Ens4AdvInceptionV3'))
        s13 = tf.train.Saver(
            slim.get_model_variables(scope='EnsAdvInceptionResnetV2'))
        s14 = tf.train.Saver(slim.get_model_variables(scope='AdvInceptionV3'))
        print('Created Graph')

        with tf.Session() as sess:
            s1.restore(sess, FLAGS.checkpoint_path_inception_v1)
            s2.restore(sess, FLAGS.checkpoint_path_inception_v3)
            s3.restore(sess, FLAGS.checkpoint_path_inception_v4)

            s4.restore(sess, FLAGS.checkpoint_path_resnet_v1_50)
            s5.restore(sess, FLAGS.checkpoint_path_resnet_v1_101)
            s6.restore(sess, FLAGS.checkpoint_path_resnet_v1_152)

            s7.restore(sess, FLAGS.checkpoint_path_vgg_16)
            s8.restore(sess, FLAGS.checkpoint_path_vgg_19)
            s9.restore(sess, FLAGS.checkpoint_path_inception_resnet_v2)

            s10.restore(sess, FLAGS.checkpoint_path_adv_inception_resnet_v2)
            s11.restore(sess, FLAGS.checkpoint_path_ens3_adv_inception_v3)
            s12.restore(sess, FLAGS.checkpoint_path_ens4_adv_inception_v3)
            s13.restore(sess,
                        FLAGS.checkpoint_path_ens_adv_inception_resnet_v2)
            s14.restore(sess, FLAGS.checkpoint_path_adv_inception_v3)

            print('Initialized Models')
            processed = 0.0
            defense, tgt, untgt, final = 0.0, 0.0, 0.0, 0.0
            idx = 0
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                target_class_for_batch = (
                    [all_images_taget_class[n[:-4]] for n in filenames] + [0] *
                    (FLAGS.batch_size - len(filenames)))
                true_label_for_batch = (
                    [all_images_true_label[n[:-4]] for n in filenames] + [0] *
                    (FLAGS.batch_size - len(filenames)))

                x_max = np.clip(images + eps, -1.0, 1.0)
                x_min = np.clip(images - eps, -1.0, 1.0)
                adv_img = np.copy(images)

                for i in range(FLAGS.iterations):
                    # loss_set = sess.run([loss_adv_v3,loss_ens3_adv_v3,loss_ens4_adv_v3,loss_ensadv_res_v2,
                    #                                               loss_adv_res_v2,loss_res_v1_101,loss_res_v1_50,loss_vgg_16,loss_res_v1_152],
                    #                                              feed_dict={x_input: batch_NLM(adv_img),
                    #                                                         target_class_input: target_class_for_batch})
                    # print ("loss:",loss_set)

                    # label_ens_model = sess.run([a,b,c,d],feed_dict={x_input: adv_img,target_class_input: target_class_for_batch})
                    # print ("label_ens_model:",label_ens_model)
                    # print (target_class_for_batch,true_label_for_batch)
                    adv_img = batch_NLM(adv_img) if i % 5 == 0 else adv_img

                    ens_loss, pred, grad, pred_adv_v3 = sess.run(
                        [total_loss, ens_labels, noise, label_test],
                        feed_dict={
                            x_input: adv_img,
                            target_class_input: target_class_for_batch
                        })
                    adv_img = adv_img - alpha * np.clip(np.round(grad), -2, 2)
                    adv_img = np.clip(adv_img, x_min, x_max)

                    print("{} \t total_loss {}".format(i, ens_loss))
                    print('prediction   :', pred)
                    print('target_label :', target_class_for_batch)
                    print('true_label   :', true_label_for_batch)

                    # print ("{} \t total_loss {} predction {} \t  target class {} \t true label  {} \t ".format(i,ens_loss,pred,target_class_for_batch,true_label_for_batch))

                    # print ("model predction {} \t  target class {} \t true label  {} \t ".format(pred,target_class_for_batch,true_label_for_batch))

                print(
                    "final prediction {} \t  target class {} \t true label  {} \t "
                    .format(pred, target_class_for_batch,
                            true_label_for_batch))

                processed += FLAGS.batch_size
                tgt += sum(
                    np.equal(np.array(pred), np.array(target_class_for_batch)))
                defense += sum(
                    np.equal(np.array(pred), np.array(true_label_for_batch)))
                untgt = processed - tgt - defense
                print("processed {} \t acc {} {} \t tgt {} {} \t untgt {} {} ".
                      format(processed, defense, defense / processed, tgt,
                             tgt / processed, untgt, untgt / processed))

                full_end = timer()
                print("DONE: Processed {} images in {} sec".format(
                    processed, full_end - full_start))

                save_images(adv_img, filenames, FLAGS.output_dir)
            print("DONE: Processed {} images in {} sec".format(
                processed, full_end - full_start))