def augmentation_demo(filename, it=20, mean_RGB=None):
    """
    Little demo to show how data augmentation is performed on a single image.

    Parameters
    ----------
    filename : str
        Path of the image
    it : int
        Number of examples of data augmentation

    """
    if mean_RGB is None:
        mean_RGB = np.array([107.59348955,  112.1047813,   80.9982362])
    else:
        mean_RGB = np.array(mean_RGB)  
    batch = data_augmentation([filename]*it, mean_RGB=mean_RGB)
    
    plt.ion()
    fig, [ax1, ax2] = plt.subplots(1, 2, num=1)
    ax1.set_title('Original image')
    ax2.set_title('Transformed image')
    image = Image.open(filename)
    ax1.imshow(np.asarray(image))
    
    mean_RGB = mean_RGB.astype(np.float32)
    for im in batch:
        im = im[::-1, :, :]
        im = np.transpose(im, (1, 2, 0))
        im = im + mean_RGB[None, None, :]
        ax2.imshow(im.astype(np.uint8))
        plt.waitforbuttonpress(1)
def batch_fetching(image_list, image_labels, output):

    for i, im in enumerate(image_list):
        print 'Image number: {}'.format(i)
        try:
            batch_tmp = data_augmentation([im]*10, mode='test', filemode='url')
        except Exception:
            print 'Error at Image {}'.format(i)
            continue
        output['batch'].append(batch_tmp)
        output['labels'].append(image_labels[i])
def batch_fetching(image_list, image_labels, output):

    for i, im in enumerate(image_list):
        print 'Image number: {}'.format(i)
        batch_tmp = []
        for subim in im.split(' '):
            try:
                subbatch_tmp = data_augmentation([subim]*10, mode='test', filemode='url')
                batch_tmp.append(subbatch_tmp)
            except Exception:
                print 'Error at Image {}'.format(i)
                continue
        if len(batch_tmp) > 1:  # enough image queries have been successful as to have a multiimage prediction
            output['batch'].append(batch_tmp)
            output['labels'].append(image_labels[i])
            output['im_per_obs'].append(len(batch_tmp))
def single_prediction(test_func, im_list, aug_params=None, crop_mode='random'):
    """
    Function for identying a SINGLE plant with one or more images.
    It combines the predictions for all the images to output the best possible 
    labels overall.

    Parameters
    ----------
    test_func : theano function
        Function to make predictions
    im_list : list
        List of image filepaths or urls.
    aug_params : dict, None, optional
        Parameters for data augmentation.
    crop_mode : {'random','standard'}
        Modality of croppping. Random usually works better.

    Returns
    -------
    Arrays with top 5 predicted labels numbers and their corresponding probabilities.

    """
    if aug_params is None:
        aug_params = {}
    aug_params.pop('mode', None)
    pred = []
    for i, im in enumerate(im_list):
        print 'Image number: {}'.format(i)
        try:
            if crop_mode == 'random':
                batch = data_augmentation([im] * 10, mode='test', **aug_params)
            if crop_mode == 'standard':
                batch = standard_tencrop_batch(im, **aug_params)
        except Exception:
            print 'Error at Image {}'.format(i)
            continue
        pred_raw = test_func(
            batch)  # probabilities for all labels for all 10 crops
        pred_tmp = np.sum(pred_raw,
                          axis=0) / 10.  # mean probabilities across crops
        pred.append(pred_tmp)
    pred_prob = np.sum(pred, axis=0) / len(
        im_list)  # mean probabilities across images
    args = pred_prob.argsort()[-5:][::-1]  # top5 predicted labels
    pred_lab = args
    return np.array(pred_lab), np.array(pred_prob[args])
Exemple #5
0
def test_predictions(test_func, im_list, aug_params=None, crop_mode='random'):
    """
    Function for testing single images with random ten crop.

    Parameters
    ----------
    test_func : theano function
        Function to make predictions.
    im_list : list
        List of image filepaths or urls.
    aug_params : dict, None, optional
        Parameters for data augmentation.
    crop_mode : {'random','standard'}
        Modality of croppping. Random usually works better.

    Returns
    -------
    Arrays with top 5 predicted labels numbers and their corresponding probabilities.

    """
    if aug_params is None:
        aug_params = {}
    aug_params.pop('mode', None)
    pred_lab, pred_prob = [], []
    for i, im in enumerate(im_list):
        print 'Image number: {}'.format(i)
        try:
            if crop_mode == 'random':
                batch = data_augmentation([im] * 10, mode='test', **aug_params)
            elif crop_mode == 'standard':
                batch = standard_tencrop_batch(im, **aug_params)
        except Exception:
            print 'Error at Image {}'.format(i)
            pred_lab.append([0] * 5)
            pred_prob.append([0] * 5)
            continue
        pred_raw = test_func(
            batch)  # probabilities for all labels for all 10 crops
        pred_tmp = np.sum(pred_raw,
                          axis=0) / 10.  # mean probabilities across crops
        args = pred_tmp.argsort()[-5:][::-1]  # top5 predicted labels
        pred_lab.append(args)
        pred_prob.append(pred_tmp[args])
    return np.array(pred_lab), np.array(pred_prob)
    def train_and_save(self, X_train, y_train, num_epochs=420, lamda=1e-4):
        img_size = global_vals.resized_image_size
        num_classes = global_vals.num_classes
        X = tf.placeholder(tf.float32, [None, img_size, img_size, 3],
                           name='input_x')
        y = tf.placeholder(tf.float32, [None, num_classes], name='input_y')
        lam = tf.placeholder(tf.float32, name='lambda')
        with tf.variable_scope('conv1_layer'):
            conv1 = tf.contrib.layers.conv2d(X,
                                             num_outputs=256,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')

        with tf.variable_scope('primary_layer'):
            primary_caps, activation = capslayer.layers.primaryCaps(
                conv1,
                filters=32,
                kernel_size=9,
                strides=2,
                out_caps_shape=[8, 1],
                method='logistic')
        with tf.variable_scope('digit_layer'):
            primary_caps = tf.reshape(primary_caps,
                                      shape=[self.batch_size, -1, 8, 1])
            self.digit_caps, self.activation = capslayer.layers.fully_connected(
                primary_caps,
                activation,
                num_outputs=self.num_classes,
                out_caps_shape=[16, 1],
                routing_method='DynamicRouting')

        # input: [None,-1]
        # output: [None,global_vals.output_dim_vectors]
        dim_vectors = global_vals.output_dim_vectors
        W_fc = tf.Variable(
            tf.truncated_normal(
                shape=[self.activation.get_shape().as_list()[1], dim_vectors],
                stddev=0.1))
        b_fc = tf.Variable(tf.constant(0.0, shape=[dim_vectors]))
        z_fc = tf.nn.relu(tf.matmul(self.activation, W_fc) + b_fc,
                          name='output_vector')

        # softmax layer
        # output: [None,num_classes]
        W_fc2 = tf.Variable(
            tf.truncated_normal(shape=[dim_vectors, self.num_classes],
                                stddev=0.1))
        b_fc2 = tf.Variable(tf.constant(0.0, shape=[self.num_classes]))
        z_fc2 = tf.nn.relu(tf.matmul(z_fc, W_fc2) + b_fc2, name='output_layer')
        prob = tf.nn.softmax(z_fc2, name='probability')

        # cost function
        regularizer = tf.contrib.layers.l2_regularizer(1e-4)
        regulazation = regularizer(W_fc) + regularizer(W_fc2)
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=y, logits=z_fc2)) + regulazation

        train = tf.train.AdadeltaOptimizer().minimize(cost)
        pred = tf.argmax(prob, axis=1, output_type='int32', name='predict')
        correct_prediction = tf.equal(
            pred, tf.argmax(y, axis=1, output_type='int32'))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.set_random_seed(2018)

        init = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init)
            for epoch in range(num_epochs):
                train_generator = data_utils.data_augmentation()
                minibatch_cost = None
                batches = 0
                for X_data, y_data in train_generator.flow(
                        X_train, y_train, batch_size=self.batch_size):
                    _, minibatch_cost = sess.run([train, cost],
                                                 feed_dict={
                                                     X: X_data,
                                                     y: y_data,
                                                     lam: lamda
                                                 })
                    batches += 1
                    # print('batches:',batches)
                    if batches >= X_train.shape[0]:
                        break
                if epoch % 10 == 0:
                    print(str((time.strftime('%Y-%m-%d %H:%M:%S'))))
                    print('cost after epoch {}:{}'.format(
                        epoch, minibatch_cost))

            # 这个accuracy是前面的accuracy,tensor.eval()和Session.run区别很小
            train_acc = accuracy.eval(feed_dict={
                X: X_train[:100],
                y: y_train[:100],
                lam: lamda
            })
            print('train accuracy', train_acc)

            # save model
            saver = tf.train.Saver({
                'W_fc': W_fc,
                'b_fc': b_fc,
                'W_fc2': W_fc2,
                'b_fc2': b_fc2
            })
            if not os.path.exists('model'):
                os.mkdir('model')
            saver.save(sess, os.path.join('model', 'caps_model.ckpt'))
            # 将训练好的模型保存为.pb文件,方便在Android studio中使用
            output_graph_def = graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_node_names=['predict'])
            with tf.gfile.FastGFile(
                    os.path.join('model', 'gesture_caps.pb'),
                    mode='wb') as f:  # ’wb’中w代表写文件,b代表将数据以二进制方式写入文件。
                f.write(output_graph_def.SerializeToString())
def cnn_model(X_train, y_train, keep_prob=0.8, lamda=1e-4, num_epochs=450):
    print('X_train shape:', X_train.shape)
    print('y_train shape:', y_train.shape)
    X = tf.placeholder(tf.float32, [None, 64, 64, 3], name='input_x')
    y = tf.placeholder(tf.float32, [None, global_vals.num_classes],
                       name='input_y')
    kp = tf.placeholder_with_default(1.0, shape=(), name='keep_prob')
    lam = tf.placeholder(tf.float32, name='lambda')

    # conv1
    # input: [None,64,64,3]
    # output: [None,32,32,32]
    W_conv1 = weight_variable([5, 5, 3, 32])
    b_conv1 = bias_variable([32])
    z1 = tf.nn.relu(conv2d(X, W_conv1) + b_conv1)
    maxpool1 = max_pool_2x2(z1)

    # conv2
    # output: [None,16,16,64]
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    z2 = tf.nn.relu(conv2d(maxpool1, W_conv2) + b_conv2)
    maxpool2 = max_pool_2x2(z2)

    # full connection1
    # output: [None,200]
    W_fc1 = weight_variable([16 * 16 * 64, global_vals.output_dim_vectors])
    b_fc1 = bias_variable([global_vals.output_dim_vectors])
    maxpool2_flat = tf.reshape(maxpool2, [-1, 16 * 16 * 64])
    z_fc1 = tf.nn.relu(tf.matmul(maxpool2_flat, W_fc1) + b_fc1,
                       name='output_vector')
    z_fc1_drop = tf.nn.dropout(z_fc1, keep_prob=kp)

    # softmax layer
    # output: [None,num_classes]
    W_fc2 = weight_variable(
        [global_vals.output_dim_vectors, global_vals.num_classes])
    b_fc2 = bias_variable([global_vals.num_classes])
    z_fc2 = tf.add(tf.matmul(z_fc1_drop, W_fc2), b_fc2, name='outlayer')
    prob = tf.nn.softmax(z_fc2, name='probability')

    # cost function
    regularizer = tf.contrib.layers.l2_regularizer(lam)
    regularization = regularizer(W_fc1) + regularizer(W_fc2)
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y,
                                                logits=z_fc2)) + regularization

    train = tf.train.AdamOptimizer().minimize(cost)
    # output_type='int32', name="predict"
    # The output node named 'predict' which can be saved as a .pb file
    pred = tf.argmax(prob, 1, output_type='int32', name='predict')
    correct_prediction = tf.equal(pred, tf.argmax(y, 1, output_type='int32'))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.set_random_seed(2018)  # to keep consistent results

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(num_epochs):
            train_generator = data_utils.data_augmentation()
            minibatch_cost = None
            batches = 0
            for X_data, y_data in train_generator.flow(X_train, y_train):
                _, minibatch_cost = sess.run([train, cost],
                                             feed_dict={
                                                 X: X_data,
                                                 y: y_data,
                                                 kp: keep_prob,
                                                 lam: lamda
                                             })
                batches += 1
                if batches >= X_train.shape[
                        0]:  # that is 32 duplicates per image
                    break
            if epoch % 10 == 0:
                print(str((time.strftime('%Y-%m-%d %H:%M:%S'))))
                print('cost after epoch {} :{}'.format(epoch, minibatch_cost))

        train_acc = accuracy.eval(feed_dict={
            X: X_train[:100],
            y: y_train[:100],
            kp: 0.8,
            lam: lamda
        })
        print('train accuracy', train_acc)

        # save model
        saver = tf.train.Saver({
            'W_conv1': W_conv1,
            'b_conv1': b_conv1,
            'W_conv2': W_conv2,
            'b_conv2': b_conv2,
            'W_fc1': W_fc1,
            'b_fc1': b_fc1,
            'W_fc2': W_fc2,
            'b_fc2': b_fc2
        })
        if not os.path.exists('model'):
            os.mkdir('model')
        saver.save(sess, os.path.join('model', 'cnn_model.ckpt'))
        # save the trained model as .pb file for using in Android studio
        output_graph_def = graph_util.convert_variables_to_constants(
            sess, sess.graph_def, output_node_names=['predict'])
        with tf.gfile.FastGFile(os.path.join('model', 'gesture.pb'),
                                mode='wb') as f:
            f.write(output_graph_def.SerializeToString())