def augmentation_demo(filename, it=20, mean_RGB=None): """ Little demo to show how data augmentation is performed on a single image. Parameters ---------- filename : str Path of the image it : int Number of examples of data augmentation """ if mean_RGB is None: mean_RGB = np.array([107.59348955, 112.1047813, 80.9982362]) else: mean_RGB = np.array(mean_RGB) batch = data_augmentation([filename]*it, mean_RGB=mean_RGB) plt.ion() fig, [ax1, ax2] = plt.subplots(1, 2, num=1) ax1.set_title('Original image') ax2.set_title('Transformed image') image = Image.open(filename) ax1.imshow(np.asarray(image)) mean_RGB = mean_RGB.astype(np.float32) for im in batch: im = im[::-1, :, :] im = np.transpose(im, (1, 2, 0)) im = im + mean_RGB[None, None, :] ax2.imshow(im.astype(np.uint8)) plt.waitforbuttonpress(1)
def batch_fetching(image_list, image_labels, output): for i, im in enumerate(image_list): print 'Image number: {}'.format(i) try: batch_tmp = data_augmentation([im]*10, mode='test', filemode='url') except Exception: print 'Error at Image {}'.format(i) continue output['batch'].append(batch_tmp) output['labels'].append(image_labels[i])
def batch_fetching(image_list, image_labels, output): for i, im in enumerate(image_list): print 'Image number: {}'.format(i) batch_tmp = [] for subim in im.split(' '): try: subbatch_tmp = data_augmentation([subim]*10, mode='test', filemode='url') batch_tmp.append(subbatch_tmp) except Exception: print 'Error at Image {}'.format(i) continue if len(batch_tmp) > 1: # enough image queries have been successful as to have a multiimage prediction output['batch'].append(batch_tmp) output['labels'].append(image_labels[i]) output['im_per_obs'].append(len(batch_tmp))
def single_prediction(test_func, im_list, aug_params=None, crop_mode='random'): """ Function for identying a SINGLE plant with one or more images. It combines the predictions for all the images to output the best possible labels overall. Parameters ---------- test_func : theano function Function to make predictions im_list : list List of image filepaths or urls. aug_params : dict, None, optional Parameters for data augmentation. crop_mode : {'random','standard'} Modality of croppping. Random usually works better. Returns ------- Arrays with top 5 predicted labels numbers and their corresponding probabilities. """ if aug_params is None: aug_params = {} aug_params.pop('mode', None) pred = [] for i, im in enumerate(im_list): print 'Image number: {}'.format(i) try: if crop_mode == 'random': batch = data_augmentation([im] * 10, mode='test', **aug_params) if crop_mode == 'standard': batch = standard_tencrop_batch(im, **aug_params) except Exception: print 'Error at Image {}'.format(i) continue pred_raw = test_func( batch) # probabilities for all labels for all 10 crops pred_tmp = np.sum(pred_raw, axis=0) / 10. # mean probabilities across crops pred.append(pred_tmp) pred_prob = np.sum(pred, axis=0) / len( im_list) # mean probabilities across images args = pred_prob.argsort()[-5:][::-1] # top5 predicted labels pred_lab = args return np.array(pred_lab), np.array(pred_prob[args])
def test_predictions(test_func, im_list, aug_params=None, crop_mode='random'): """ Function for testing single images with random ten crop. Parameters ---------- test_func : theano function Function to make predictions. im_list : list List of image filepaths or urls. aug_params : dict, None, optional Parameters for data augmentation. crop_mode : {'random','standard'} Modality of croppping. Random usually works better. Returns ------- Arrays with top 5 predicted labels numbers and their corresponding probabilities. """ if aug_params is None: aug_params = {} aug_params.pop('mode', None) pred_lab, pred_prob = [], [] for i, im in enumerate(im_list): print 'Image number: {}'.format(i) try: if crop_mode == 'random': batch = data_augmentation([im] * 10, mode='test', **aug_params) elif crop_mode == 'standard': batch = standard_tencrop_batch(im, **aug_params) except Exception: print 'Error at Image {}'.format(i) pred_lab.append([0] * 5) pred_prob.append([0] * 5) continue pred_raw = test_func( batch) # probabilities for all labels for all 10 crops pred_tmp = np.sum(pred_raw, axis=0) / 10. # mean probabilities across crops args = pred_tmp.argsort()[-5:][::-1] # top5 predicted labels pred_lab.append(args) pred_prob.append(pred_tmp[args]) return np.array(pred_lab), np.array(pred_prob)
def train_and_save(self, X_train, y_train, num_epochs=420, lamda=1e-4): img_size = global_vals.resized_image_size num_classes = global_vals.num_classes X = tf.placeholder(tf.float32, [None, img_size, img_size, 3], name='input_x') y = tf.placeholder(tf.float32, [None, num_classes], name='input_y') lam = tf.placeholder(tf.float32, name='lambda') with tf.variable_scope('conv1_layer'): conv1 = tf.contrib.layers.conv2d(X, num_outputs=256, kernel_size=9, stride=1, padding='VALID') with tf.variable_scope('primary_layer'): primary_caps, activation = capslayer.layers.primaryCaps( conv1, filters=32, kernel_size=9, strides=2, out_caps_shape=[8, 1], method='logistic') with tf.variable_scope('digit_layer'): primary_caps = tf.reshape(primary_caps, shape=[self.batch_size, -1, 8, 1]) self.digit_caps, self.activation = capslayer.layers.fully_connected( primary_caps, activation, num_outputs=self.num_classes, out_caps_shape=[16, 1], routing_method='DynamicRouting') # input: [None,-1] # output: [None,global_vals.output_dim_vectors] dim_vectors = global_vals.output_dim_vectors W_fc = tf.Variable( tf.truncated_normal( shape=[self.activation.get_shape().as_list()[1], dim_vectors], stddev=0.1)) b_fc = tf.Variable(tf.constant(0.0, shape=[dim_vectors])) z_fc = tf.nn.relu(tf.matmul(self.activation, W_fc) + b_fc, name='output_vector') # softmax layer # output: [None,num_classes] W_fc2 = tf.Variable( tf.truncated_normal(shape=[dim_vectors, self.num_classes], stddev=0.1)) b_fc2 = tf.Variable(tf.constant(0.0, shape=[self.num_classes])) z_fc2 = tf.nn.relu(tf.matmul(z_fc, W_fc2) + b_fc2, name='output_layer') prob = tf.nn.softmax(z_fc2, name='probability') # cost function regularizer = tf.contrib.layers.l2_regularizer(1e-4) regulazation = regularizer(W_fc) + regularizer(W_fc2) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=y, logits=z_fc2)) + regulazation train = tf.train.AdadeltaOptimizer().minimize(cost) pred = tf.argmax(prob, axis=1, output_type='int32', name='predict') correct_prediction = tf.equal( pred, tf.argmax(y, axis=1, output_type='int32')) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.set_random_seed(2018) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for epoch in range(num_epochs): train_generator = data_utils.data_augmentation() minibatch_cost = None batches = 0 for X_data, y_data in train_generator.flow( X_train, y_train, batch_size=self.batch_size): _, minibatch_cost = sess.run([train, cost], feed_dict={ X: X_data, y: y_data, lam: lamda }) batches += 1 # print('batches:',batches) if batches >= X_train.shape[0]: break if epoch % 10 == 0: print(str((time.strftime('%Y-%m-%d %H:%M:%S')))) print('cost after epoch {}:{}'.format( epoch, minibatch_cost)) # 这个accuracy是前面的accuracy,tensor.eval()和Session.run区别很小 train_acc = accuracy.eval(feed_dict={ X: X_train[:100], y: y_train[:100], lam: lamda }) print('train accuracy', train_acc) # save model saver = tf.train.Saver({ 'W_fc': W_fc, 'b_fc': b_fc, 'W_fc2': W_fc2, 'b_fc2': b_fc2 }) if not os.path.exists('model'): os.mkdir('model') saver.save(sess, os.path.join('model', 'caps_model.ckpt')) # 将训练好的模型保存为.pb文件,方便在Android studio中使用 output_graph_def = graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names=['predict']) with tf.gfile.FastGFile( os.path.join('model', 'gesture_caps.pb'), mode='wb') as f: # ’wb’中w代表写文件,b代表将数据以二进制方式写入文件。 f.write(output_graph_def.SerializeToString())
def cnn_model(X_train, y_train, keep_prob=0.8, lamda=1e-4, num_epochs=450): print('X_train shape:', X_train.shape) print('y_train shape:', y_train.shape) X = tf.placeholder(tf.float32, [None, 64, 64, 3], name='input_x') y = tf.placeholder(tf.float32, [None, global_vals.num_classes], name='input_y') kp = tf.placeholder_with_default(1.0, shape=(), name='keep_prob') lam = tf.placeholder(tf.float32, name='lambda') # conv1 # input: [None,64,64,3] # output: [None,32,32,32] W_conv1 = weight_variable([5, 5, 3, 32]) b_conv1 = bias_variable([32]) z1 = tf.nn.relu(conv2d(X, W_conv1) + b_conv1) maxpool1 = max_pool_2x2(z1) # conv2 # output: [None,16,16,64] W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) z2 = tf.nn.relu(conv2d(maxpool1, W_conv2) + b_conv2) maxpool2 = max_pool_2x2(z2) # full connection1 # output: [None,200] W_fc1 = weight_variable([16 * 16 * 64, global_vals.output_dim_vectors]) b_fc1 = bias_variable([global_vals.output_dim_vectors]) maxpool2_flat = tf.reshape(maxpool2, [-1, 16 * 16 * 64]) z_fc1 = tf.nn.relu(tf.matmul(maxpool2_flat, W_fc1) + b_fc1, name='output_vector') z_fc1_drop = tf.nn.dropout(z_fc1, keep_prob=kp) # softmax layer # output: [None,num_classes] W_fc2 = weight_variable( [global_vals.output_dim_vectors, global_vals.num_classes]) b_fc2 = bias_variable([global_vals.num_classes]) z_fc2 = tf.add(tf.matmul(z_fc1_drop, W_fc2), b_fc2, name='outlayer') prob = tf.nn.softmax(z_fc2, name='probability') # cost function regularizer = tf.contrib.layers.l2_regularizer(lam) regularization = regularizer(W_fc1) + regularizer(W_fc2) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=z_fc2)) + regularization train = tf.train.AdamOptimizer().minimize(cost) # output_type='int32', name="predict" # The output node named 'predict' which can be saved as a .pb file pred = tf.argmax(prob, 1, output_type='int32', name='predict') correct_prediction = tf.equal(pred, tf.argmax(y, 1, output_type='int32')) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.set_random_seed(2018) # to keep consistent results init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for epoch in range(num_epochs): train_generator = data_utils.data_augmentation() minibatch_cost = None batches = 0 for X_data, y_data in train_generator.flow(X_train, y_train): _, minibatch_cost = sess.run([train, cost], feed_dict={ X: X_data, y: y_data, kp: keep_prob, lam: lamda }) batches += 1 if batches >= X_train.shape[ 0]: # that is 32 duplicates per image break if epoch % 10 == 0: print(str((time.strftime('%Y-%m-%d %H:%M:%S')))) print('cost after epoch {} :{}'.format(epoch, minibatch_cost)) train_acc = accuracy.eval(feed_dict={ X: X_train[:100], y: y_train[:100], kp: 0.8, lam: lamda }) print('train accuracy', train_acc) # save model saver = tf.train.Saver({ 'W_conv1': W_conv1, 'b_conv1': b_conv1, 'W_conv2': W_conv2, 'b_conv2': b_conv2, 'W_fc1': W_fc1, 'b_fc1': b_fc1, 'W_fc2': W_fc2, 'b_fc2': b_fc2 }) if not os.path.exists('model'): os.mkdir('model') saver.save(sess, os.path.join('model', 'cnn_model.ckpt')) # save the trained model as .pb file for using in Android studio output_graph_def = graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names=['predict']) with tf.gfile.FastGFile(os.path.join('model', 'gesture.pb'), mode='wb') as f: f.write(output_graph_def.SerializeToString())