def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) epochs = 50 batch_size = 5 # TF placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(nn_last_layer, correct_label, learning_rate, num_classes) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): print("Run") num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ # Note: Not done kBatchSize = 5 kEpochs = 10 with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Note: Not implemented. correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') # Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # Train NN using the train_nn function train_nn(sess, kEpochs, kBatchSize, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # Save the variables to disk. print("Saving model...") saver = tf.train.Saver() save_path = saver.save(sess, "./model/semantic_segmentation_model.ckpt") print("Model saved in path: %s" % save_path)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function epochs = 50 batch_size = 8 correct_label = tf.placeholder(tf.int32, (None, None, None, num_classes)) learning_rate = tf.placeholder(tf.float32) input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model [if there's no vgg model in data folder ...] helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function #--- input, keep_prob, layer3, layer4, layer7 = load_vgg(sess, vgg_path) output = layers(layer3, layer4, layer7, num_classes) correct_label = tf.placeholder(dtype=tf.float32, shape=(None, None, None, num_classes)) learning_rate = tf.placeholder(dtype=tf.float32) logits, train_op, cross_entropy_loss = optimize(output, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function sess.run(tf.global_variables_initializer()) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input)
def run(): num_classes = 2 image_shape = (160, 576) # KITTI dataset uses 160x576 images data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function image_input, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) model = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) labels = tf.placeholder(tf.float32, shape=[None, image_shape[0], image_shape[1], num_classes]) learning_rate = tf.placeholder(tf.float32, shape=[]) logits, training_operation, cross_entropy_loss = optimize(model, labels, learning_rate, num_classes) # TODO: Train NN using the train_nn function train_nn(sess, 100, 32, get_batches_fn, training_operation, cross_entropy_loss, image_input, labels, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, image_input)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) with tf.Session() as sess: # Path to vgg model epochs = 20 batch_size = 4 correct_label = tf.placeholder( tf.float32, [None, image_shape[0], image_shape[1], num_classes], name="correct_label") learning_rate = tf.placeholder(tf.float32, name="learning_rate") vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # TODO: Build NN using load_vgg, layers, and optimize function input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg( sess, vgg_path) l = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) # TODO: Train NN using the train_nn function logits, train_op, cross_entropy_loss = optimize( l, correct_label, learning_rate, num_classes) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): # Download pretrained vgg model vgg_path = helper.maybe_download_pretrained_vgg(FLAGS.data_dir) # Create function to get batches dataset_path = os.path.join(FLAGS.data_dir, 'data_road', 'training') get_batches_fn, samples_n = helper.gen_batch_function( dataset_path, IMAGE_SHAPE, seed=FLAGS.seed, samples_limit=FLAGS.samples_limit) batches_n = int(math.ceil(float(samples_n) / FLAGS.batch_size)) with tf.Session(config=_get_config()) as sess: labels = tf.placeholder(tf.float32, [None, None, None, CLASSES_N], 'input_labels') learning_rate = tf.placeholder(tf.float32, name='learning_rate') image_input, keep_prob, layer3, layer4, layer7 = load_vgg( sess, vgg_path) model_output = layers(layer3, layer4, layer7, CLASSES_N) logits, train_op, cross_entropy_loss, global_step = optimize( model_output, labels, learning_rate, CLASSES_N) output_softmax, prediction_op = prediction(model_output) metrics_dict, metrics_reset_op = metrics(output_softmax, labels, CLASSES_N) train_nn(sess, global_step, FLAGS.epochs, FLAGS.batch_size, get_batches_fn, batches_n, train_op, cross_entropy_loss, prediction_op, metrics_dict, metrics_reset_op, image_input, labels, keep_prob, learning_rate, MODELS_FREQ, TENSORBOARD_FREQ) helper.save_inference_samples(FLAGS.runs_dir, FLAGS.data_dir, sess, IMAGE_SHAPE, logits, keep_prob, image_input)
def run(): global image_shape, sess, logits, keep_prob, input_image num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function epochs = 6 batch_size = 8 learning_rate = tf.placeholder(tf.float32) correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes]) input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, optimizer, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) if(process_load): checkpoint = tf.train.get_checkpoint_state("save_network") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("load sucess:", checkpoint.model_checkpoint_path) else: print("load fail") if(process_train): train_nn(sess, epochs, batch_size, get_batches_fn, optimizer, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) if(process_video): Output_video = 'output.mp4' Input_video = 'project_video.mp4' video_output = Output_video clip1 = VideoFileClip(Input_video) video_clip = clip1.fl_image(process_image) #NOTE: this function expects color images!! video_clip.write_videofile(video_output, audio=False) if(process_save): save_path = saver.save(sess, "./save_network/model_60_8")
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(nn_last_layer, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function epochs = 1000 batch_size = 20 sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # export the graph for inference programs output_node_names = 'adam_logit' output_graph_def = tf.graph_util.convert_variables_to_constants( sess, # The session is used to retrieve the weights tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes output_node_names.split(",")) # The output node names are used to select the usefull nodes print("output_node: {}".format(output_node_names.split(","))) saver = tf.train.Saver() saver.save(sess, runs_dir + '/fcn') # tf.train.write_graph(tf.get_default_graph().as_graph_def(), '', './runs/base_graph.pb', False) tf.train.write_graph(output_graph_def, '', runs_dir + '/frozen_graph.pb', False) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # OPTIONAL: Apply the trained model to a video flist = [ ['project_video.mp4', 'project_video_fcn8_100.mp4', (640, 360)]] for files in (flist): print('file: ' + files[0] + ' -> ' + files[1], flush=True) clip1 = VideoFileClip(files[0]) save_size = files[2] fps = 30 fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V') video_out = cv2.VideoWriter(files[1], int(fourcc), fps, save_size) frameno = 0 for frame in clip1.iter_frames(): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) image = scipy.misc.imresize(frame, image_shape) im_softmax = sess.run([tf.nn.softmax(logits)], {keep_prob: 1.0, input_image: [image]}) im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1]) segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1) mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) mask = scipy.misc.toimage(mask, mode="RGBA") street_im = scipy.misc.toimage(image) street_im.paste(mask, box=None, mask=mask) result = np.array(street_im) result2 = cv2.resize(result, (save_size[0], save_size[1])) video_out.write(result2) frameno += 1 #if 3000 < frameno: # break #cv2.imshow('fcn8s', result2) #if cv2.waitKey(1) & 0xFF == ord('q'): # break video_out = None
def run(): tests.test_for_kitti_dataset(DATA_DIR) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(DATA_DIR) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ printStatistics() with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(DATA_DIR, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(DATA_DIR, 'data_road/training'), IMAGE_SHAPE) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function print("Load VGG model...") input_image, keep_prob, layer_3, layer_4, layer_7 = load_vgg( sess, vgg_path) layer_output = layers(layer_3, layer_4, layer_7, NUM_CLASSES) label = tf.placeholder(tf.int32, shape=[None, None, None, NUM_CLASSES]) learning_rate = tf.placeholder(tf.float32) iou_obj = None if IOU_ENABLED: logits, train_op, cross_entropy_loss, iou_obj, accuracy_op = optimize( layer_output, label, learning_rate, NUM_CLASSES, iou_enabled=IOU_ENABLED) else: logits, train_op, cross_entropy_loss, accuracy_op = optimize( layer_output, label, learning_rate, NUM_CLASSES) # Train NN using the train_nn function print("Start training...") train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input_image, label, keep_prob, learning_rate, accuracy_op, iou_obj) # Safe the trained model print("Save trained model...") saver = tf.train.Saver() saver.save(sess, './runs/semantic_segmentation_model.ckpt') print("Saving the model") if "saved_model" in os.listdir(os.getcwd()): shutil.rmtree("./saved_model") builder = tf.saved_model.builder.SavedModelBuilder("./saved_model") builder.add_meta_graph_and_variables(sess, ["vgg16"]) builder.save() # Save inference data using helper.save_inference_samples print("Save inference samples...") helper.save_inference_samples(RUNS_DIR, DATA_DIR, sess, IMAGE_SHAPE, logits, keep_prob, input_image)
def run(): global p_keep, r_learning, BATCH_SIZE num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' Epochs = 30 #BATCH_SIZE = 16 if len(sys.argv) > 1: BATCH_SIZE = int(sys.argv[1]) if len(sys.argv) > 2: p_keep = float(sys.argv[2]) tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ tf.reset_default_graph() with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) tf.Print(keep_prob, [keep_prob]) #p_keep = keep_prob.get_Variable() #print("keep_prob from VGG: %f keep_prob in project: %f"%(keep_prob,p_keep)) final_out = layers(layer3_out, layer4_out, layer7_out, num_classes) correct_label = tf.placeholder(tf.float32, (None, None, None, num_classes)) learning_rate = tf.placeholder(tf.float32) logits, train_op, cross_entropy_loss = optimize( final_out, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function sess.run(tf.global_variables_initializer()) train_nn(sess, Epochs, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) saver = tf.train.Saver() saver.save(sess, "./runs/Batch%d_Pkeep%f.ckpt" % (BATCH_SIZE, p_keep)) # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # OPTIONAL: Apply the trained model to a video video_name = 'dataroad_marked.mp4' helper.gen_test_output_video(data_dir, sess, image_shape, logits, keep_prob, input_image, video_name) print('Writing video Finished.')
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' model_path = './teeekay/' model = FLAGS.model #'ss_mdl10' # mdl9 15 * 500 at lr = 0.000015 # mdl8 15 * 500 images at lr = 0.000025 # mdl7 5 * 500 images at lr = 0.00001 # mdl6 with cityscapes data #mdl4 100 at 0.00001 #mdl3 50 at .00003 #mdl2 25 at .00005 print ("model = '{ssmodel}'".format(ssmodel=model)) with tf.name_scope("data"): correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') Kepochs = FLAGS.epochs # set to reasonable value Kbatch_size = FLAGS.batch_size KLearningRate = FLAGS.learn_rate Kl2_regularization_rate = FLAGS.l2_regularization_rate print("Kepochs ={}, Kbatch_size= {}, KLearningRate={:3.6f}, Kl2_regularization_rate ={:3.6f} Model name ={}" .format(Kepochs, Kbatch_size, KLearningRate, Kl2_regularization_rate, model_path+model)) # Download pretrained vgg model print("helper.maybe_download_pretrained_vgg({})".format(data_dir)) helper.maybe_download_pretrained_vgg(data_dir) # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') training_data_dir = os.path.join(data_dir, 'data_road/training') # Create function to get batches # get_batches_fn = helper.gen_batch_function(training_data_dir, image_shape) # test with cityscapes data get_batches_fn = helper.gen_batch_function(data_dir, image_shape) print("get_batches_fn = {}".format(get_batches_fn)) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ config = tf.ConfigProto() tf.log_device_placement=True config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.8 with tf.Session(config = config) as sess: # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network if FLAGS.mode == 0: # Train the model print("load_vgg") input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) with tf.name_scope("data"): tf.summary.image('input_images', input_image, max_outputs=3) print("layers") last_layer = layers(layer3_out, layer4_out, layer7_out, num_classes) print("optimize") logits, train_op, combined_loss, iou_obj = optimize(last_layer, correct_label, learning_rate, num_classes, iou_test=True) print("Train!") initialized = tf.global_variables_initializer() sess.run(initialized) train_nn(sess, Kepochs, Kbatch_size, get_batches_fn, train_op, combined_loss, input_image, correct_label, keep_prob, learning_rate, Kl2_regularization_rate, lr=KLearningRate, iou_obj=iou_obj) # Save model result saver = tf.train.Saver() save_path = saver.save(sess, model_path+model) print("\nSaved model at {}.".format(save_path)) print("Kepochs ={}, Kbatch_size= {}, KLearningRate={:3.6f}, Kl2_regularization_rate ={:3.6f} Model name ={}" .format(Kepochs, Kbatch_size, KLearningRate, Kl2_regularization_rate, model_path+model)) print("saving samples") helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) elif FLAGS.mode == 1: # run inference on images from kitti dataset # Load saved model saver = tf.train.import_meta_graph(model_path+model+'.meta') saver.restore(sess, tf.train.latest_checkpoint(model_path)) graph = tf.get_default_graph() img_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') last_layer = graph.get_tensor_by_name('decoder/last_layer:0') logits = tf.reshape(last_layer, (-1, num_classes)) # Process test images print("saving samples") helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, img_input) exit() elif FLAGS.mode == 2: # Run inference on Video file def process_frame(sess, logits, keep_prob, image_pl, frame, frame_shape, image_shape): """ Generate output using the video frames :param sess: TF session :param logits: TF Tensor for the logits :param keep_prob: TF Placeholder for the dropout keep robability :param image_pl: TF Placeholder for the image placeholder :param frame: image frame in :param frame_shape: Tuple - Shape of frame coming in and going out :param image_shape: Tuple - Shape of image used in TF model :return: np.array of video frame image with superimposed semantic segmentation """ softmax_criteria = 0.5 softmax_criteria1 = 0.45 softmax_criteria2 = 0.4 # resize to shape used in model img_resized = scipy.misc.imresize(frame, image_shape, interp='lanczos') # inference with no dropout im_softmax = sess.run( [tf.nn.softmax(logits)], {keep_prob: 1.0, img_input: [img_resized]}) # reshape to image dimensions im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1]) # apply mask anywhere softmax is > softmax_criteria segmentation = (im_softmax > softmax_criteria).reshape(image_shape[0], image_shape[1], 1) segmentation1 = (np.logical_and(im_softmax <= softmax_criteria, im_softmax > softmax_criteria1)).reshape(image_shape[0], image_shape[1], 1) segmentation2 = (np.logical_and(im_softmax <= softmax_criteria1, im_softmax > softmax_criteria2)).reshape(image_shape[0], image_shape[1], 1) # create mask as green and semitransparent mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) mask1 = np.dot(segmentation1, np.array([[0, 225, 0, 63]])) mask2 = np.dot(segmentation2, np.array([[0, 200, 0, 31]])) mask = scipy.misc.toimage(mask, mode="RGBA") mask1 = scipy.misc.toimage(mask1, mode="RGBA") mask2 = scipy.misc.toimage(mask2, mode="RGBA") mask_resized = scipy.misc.imresize(mask, frame_shape, mode="RGBA") mask_resized1 = scipy.misc.imresize(mask1, frame_shape, mode="RGBA") mask_resized2 = scipy.misc.imresize(mask2, frame_shape, mode="RGBA") mask_resized = scipy.misc.toimage(mask_resized, mode="RGBA") mask_resized1 = scipy.misc.toimage(mask_resized1, mode="RGBA") mask_resized2 = scipy.misc.toimage(mask_resized2, mode="RGBA") frame_im = scipy.misc.toimage(frame) frame_im.paste(mask_resized2, box=None, mask=mask_resized2) frame_im.paste(mask_resized1, box=None, mask=mask_resized1) frame_im.paste(mask_resized, box=None, mask=mask_resized) return np.array(frame_im) # cap = imageio.get_reader('./video/harder_challenge_video.mp4') cap = imageio.get_reader('./video/NeighborhoodStreet.mov') md = cap.get_meta_data() fps = float(md['fps']) framewidth = int(md['size'][0]) frameheight = int(md['size'][1]) framecount = int(md['nframes']) frame_shape = (frameheight, framewidth) print("Video opened with framecount of {:4,d}, dimensions ({:4d},{:4d}), and speed of {:3.03f} fps." .format(framecount, framewidth, frameheight, fps)) # Load saved model saver = tf.train.import_meta_graph(model_path+model+'.meta') saver.restore(sess, tf.train.latest_checkpoint(model_path)) graph = tf.get_default_graph() img_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') last_layer = graph.get_tensor_by_name('decoder/last_layer:0') logits = tf.reshape(last_layer, (-1, num_classes)) fileruntime = datetime.datetime.now().strftime("%Y%m%d%H:%M:%S") outfilename = './video/ss_video_output_' + model + fileruntime + '.mp4' out = imageio.get_writer(outfilename, fps=fps) frames = 0 for frame in cap: frames += 1 #uncomment for early stop #framecount = 150 if frames > framecount: print("\nClosed video after passing expected framecount of {}".format(frames-1)) break out_frame = process_frame(sess, logits, keep_prob, img_input, frame, frame_shape, image_shape) out.append_data(out_frame) print("Frames: {0:02d}, Seconds: {1:03.03f}".format(frames, frames/fps), end='\r') print("finished processing video - output video is {}".format(outfilename)) cap.close() out.close()
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: train_model = True if (train_model): # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function epochs = 40 batch_size = 6 #Placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) nn_last_layer = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize( nn_last_layer, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) save_model = False # OPTIONAL: Apply the trained model to a video if (save_model): saver = tf.train.Saver() save_file = './models/semantic-segmantation-model.ckpt' print("Saving model...") #string_model_name = './models/semantic-segmantation-model-'+str(time.strftime("%d_%m_%Y")) saver.save(sess, save_file) print("Saving model finished...")
def run(): num_classes = 2 image_shape = (160, 576) epochs = 40 batch_size = 20 data_dir = './data' runs_dir = './runs' ## Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') tests.test_for_kitti_dataset(data_dir) ## Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # Change the flag to True global save save = True ## OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. ## You'll need a GPU with at least 10 teraFLOPS to train on. ## https://www.cityscapes-dataset.com/ with tf.Session() as sess: ## Create function to get batches print("Create function to get batches") get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) ## OPTIONAL: Augment Images for better results ## https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network ## TODO: Build NN using load_vgg, layers, and optimize function print("Loading the VGG16 model... ") vgg_input_tensor, vgg_keep_prob_tensor, vgg_layer3_out_tensor, vgg_layer4_out_tensor, vgg_layer7_out_tensor = load_vgg( sess, vgg_path) print("VGG16 model loaded !") print("Creating the layers ... ") deconv3 = layers(vgg_layer3_out_tensor, vgg_layer4_out_tensor, vgg_layer7_out_tensor, num_classes) print("Layers created !") print( "Creating the placeholder for labels and variable for learning rate ..." ) correct_label = tf.placeholder(dtype=tf.float32, shape=(None, None, None, num_classes)) learning_rate = tf.placeholder( dtype=tf.float32) # can not convert float to tensor error print("Placeholders created !") print( "Creating the logits, training operation and loss function for the network ..." ) logits, train_op, cross_entropy_loss = optimize( deconv3, correct_label, learning_rate, num_classes) print("Logits, training operation and loss function created !") # TODO: Train NN using the train_nn function sess.run(tf.global_variables_initializer()) print("Variables initialized !") # We will train if training flag is true or there is no checkpoint saved if training or not (os.path.exists('checkpoint')): print("\n ===================================================== ") print(" Training ....") train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, vgg_input_tensor, correct_label, vgg_keep_prob_tensor, learning_rate) print(" Training completed !") # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) print("\n ===================================================== ") print(" Testing ....") helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, vgg_keep_prob_tensor, vgg_input_tensor) else: # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint('')) print("\n ===================================================== ") print(" Testing ....") helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, vgg_keep_prob_tensor, vgg_input_tensor)
def run(): """ Run main semantic segmentation program """ print('\nStarting run...') args = parse_args() # Basic parameters kNumClasses = 2 # "road" or "not road" kImageShape = (160, 576) data_dir = './data' runs_dir = './runs' model_path = './duffnet/' model_name = 'duffnet' # Hyperparameters epochs = args.epochs batch_size = args.batch_size learning_rate = args.learn_rate # TensorFlow placeholders correct_label = tf.placeholder(tf.bool, [None, None, None, kNumClasses]) # Check data set validity print('\nTesting Kitti dataset...') tests.test_for_kitti_dataset(data_dir) # Download pretrained VGG model if necessary helper.maybe_download_pretrained_vgg(data_dir) # Path to VGG model vgg_path = os.path.join(data_dir, 'vgg') data_folder = os.path.join(data_dir, 'data_road/training') # Create generator function to get batches for training get_batches_fn = helper.gen_batch_function(data_folder, kImageShape) # Start TensorFlow session with tf.Session() as sess: ### Train new network ### if args.mode == 0: # Build NN using load_vgg, layers, and optimize function img_input, keep_prob, vgg3, vgg4, vgg7 = load_vgg(sess, vgg_path) fcn8s_out = layers(vgg3, vgg4, vgg7, kNumClasses) logits, train_op, loss = optimize(fcn8s_out, correct_label, learning_rate, kNumClasses) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, loss, img_input, correct_label, keep_prob, learning_rate) # Save model result saver = tf.train.Saver() save_path = saver.save(sess, model_path + model_name) print("\nModel saved.") ### Test network ### elif args.mode == 1: # Load saved model saver = tf.train.import_meta_graph(model_path + model_name + '.meta') saver.restore(sess, tf.train.latest_checkpoint(model_path)) graph = tf.get_default_graph() img_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') fcn8s_out = graph.get_tensor_by_name('fcn8s_out:0') logits = tf.reshape(fcn8s_out, (-1, kNumClasses)) # Process test images helper.save_inference_samples(runs_dir, data_dir, sess, kImageShape, logits, keep_prob, img_input) ### Process video ### elif args.mode == 2: def process_frame(img): # Input image is a Numpy array, resize it to match NN input dimensions img_orig_size = (img.shape[0], img.shape[1]) img_resized = scipy.misc.imresize(img, kImageShape) # Get NN tensors graph = tf.get_default_graph() img_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') fcn8s_out = graph.get_tensor_by_name('fcn8s_out:0') logits = tf.reshape(fcn8s_out, (-1, kNumClasses)) # Process image with NN img_softmax = sess.run([tf.nn.softmax(logits)], { keep_prob: 1.0, img_input: [img_resized] }) # Reshape to 2D image dimensions img_softmax = img_softmax[0][:, 1].reshape( kImageShape[0], kImageShape[1]) # Threshold softmax probability to a binary road judgement (>50%) segmentation = (img_softmax > 0.5).reshape( kImageShape[0], kImageShape[1], 1) # Apply road judgement to original image as a mask with alpha = 50% mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) mask = scipy.misc.toimage(mask, mode="RGBA") street_img = Image.fromarray(img_resized) street_img.paste(mask, box=None, mask=mask) # Resize image back to original dimensions street_img_resized = scipy.misc.imresize( street_img, img_orig_size) # Output image as a Numpy array img_out = np.array(street_img_resized) return img_out # Load saved model saver = tf.train.import_meta_graph(model_path + model_name + '.meta') saver.restore(sess, tf.train.latest_checkpoint(model_path)) # Process video frames video_outfile = './video/project_video_out.mp4' video = VideoFileClip( './video/project_video.mp4') #.subclip(37,38) video_out = video.fl_image(process_frame) video_out.write_videofile(video_outfile, audio=False) else: print('Error: Invalid mode selected.')
def run(): #num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' #correct_label = tf.placeholder(tf.float32, [None, None, None, num_classes], name='correct_label') correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') #keep_prob = tf.placeholder(tf.float32, name='keep_prob') tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # Freezing Graphs #TensorFlow configuration object. config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' # JIT level, this can be set to ON_1 or ON_2 jit_level = tf.OptimizerOptions.ON_1 config.graph_options.optimizer_options.global_jit_level = jit_level # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ #with tf.Session(config=config) as sess: with tf.Session() as sess: #save_trg=None # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network #not done as publication mentioned augmentation didn't help much # TODO: Build NN using load_vgg, layers, and optimize function # use load_vgg function to return input_image,keep_prob,layer3/4/7 #from pretrained vgg architecture input_image, keep_prob, layer3, layer4, layer7 = load_vgg( sess, vgg_path) #FCN decoder network of skip and upsampling fcn_output = layers(layer3, layer4, layer7, num_classes) #Set cross_entropy loss,logits and optimizer calculation expression using tf functions #optimize(nn_last_layer, correct_label, learning_rate, num_classes,iou_f=False): #logits, train_operation, cross_entropy_loss,iou,iou_op iou_f = True #iou_f=False iou = None iou_op = None if iou_f: logits, train_op, cross_entropy_loss, iou, iou_op = optimize( fcn_output, correct_label, learning_rate, num_classes, iou_f) else: logits, train_op, cross_entropy_loss = optimize( fcn_output, correct_label, learning_rate, num_classes, iou_f) #logits--> shape nrpixel_pixel x class #train_op --> Adamoptimizer with learning rate minimization function # cross_entropy_loss --> softmax_cross_entropy_loss mean on logits vs correct labels #intialize variables sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) save_trg = tf.train.Saver(max_to_keep=5) # TODO: Train NN using the train_nn function #train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, # correct_label, keep_prob, learning_rate,iou=None,iou_op=None,save_trg=None): loss_list, acc_list, mean_loss_list, mean_iou_list = train_nn( sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate, iou, iou_op, save_trg) ##SAVE BATCH acc/loss data print("batch loss_list length", len(loss_list)) batch_file_acc_loss = 'batch_trg_file.txt' with open(batch_file_acc_loss, 'w') as resf: bb = "Batch,Loss,Accuracy\n" resf.write(bb) for i in range(len(loss_list)): aa = [str(i), str(loss_list[i]), str(acc_list[i])] aa_str = ",".join(aa) aa_str += "\n" resf.write(aa_str) ##SAVE epoch mean loss/iou #print("epoch loss_list length",len(mean_loss_list)) epoch_file_acc_loss = 'epoch_trg_file.txt' with open(epoch_file_acc_loss, 'w') as resfe: bb = "Epoch,Mean Loss,Mean Accuracy\n" resfe.write(bb) for i in range(len(mean_loss_list)): aa = [str(i), str(mean_loss_list[i]), str(mean_iou_list[i])] aa_str = ",".join(aa) aa_str += "\n" resfe.write(aa_str) # TODO: Save inference data using helper.save_inference_samples img_labeling = True #img_labeling=False if (img_labeling): helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # OPTIONAL: Apply the trained model to a video #video_labeling=False video_labeling = True if (video_labeling): print("Starting Video Pipeline") vid1 = './driving.mp4' voutput1 = './driving_annotated.mp4' if os.path.isfile(voutput1): os.remove(voutput1) video_clip = VideoFileClip(vid1) #.subclip(0,2) ##pipeline(sess, logits, keep_prob, image_pl, image_file, image_shape) processed_video = video_clip.fl_image( lambda image: helper.pipeline(image, sess, logits, keep_prob, input_image, image_shape)) ##lambda image: change_image(image, myparam) processed_video.write_videofile(voutput1, audio=False)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function epochs = 20 batch_size = 5 correct_label = tf.placeholder(dtype=tf.int32, shape=[None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate') input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg( sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize( nn_last_layer, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) path_save_location = './data/saved_model/' name_model = 'saved_model' # save tf model saver = tf.train.Saver() saver.save(sess, path_save_location + name_model + '.ckpt') saver.export_meta_graph(path_save_location + name_model + '.meta') tf.train.write_graph(sess.graph_def, path_save_location, name_model + ".pb", False)
def run(): num_classes = 2 # Define number of epochs and batch sizes: parameters to be tuned num_epochs = 50 batch_size = 4 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function # We need to create placholders for correct_label and learning rate which are needed in optimize function correct_label = tf.placeholder(tf.int32, shape=[None, None, None, num_classes], name="Correct_Label") learning_rate = tf.placeholder(tf.float32, shape=None, name="Learning_Rate") # Calling the functions that we have created previously input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess=sess, vgg_path=vgg_path) final_layer_output = layers(vgg_layer3_out=layer3_out, vgg_layer4_out=layer4_out, vgg_layer7_out=layer7_out, num_classes=num_classes) logits, train_op, Loss = optimize(nn_last_layer=final_layer_output, correct_label=correct_label, learning_rate=learning_rate, num_classes=num_classes) # TODO: Train NN using the train_nn function # Saving the model for the inference saver = tf.train.Saver() # Calling the function of training train_nn(sess=sess, epochs=num_epochs, batch_size=batch_size, get_batches_fn=get_batches_fn, train_op=train_op, cross_entropy_loss=Loss, input_image=input_image, correct_label=correct_label, keep_prob=keep_prob, learning_rate=learning_rate) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' model_dir = './models' loss_dir = './loss' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function # Placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') # Getting layers from vgg. input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) # Creating new layers. layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) # Creating loss and optimizer operations. logits, train_op, cross_entropy_loss = optimize( layer_output, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function epochs = 60 # 6 12 24 batch_size = 1 saver = tf.train.Saver() loss_epoch = train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) loss_dir = './loss' print(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) filename = loss_dir + '/' + datetime.datetime.now().strftime( '%Y%m%d%H%M%S') + '.log' with open(filename, "w") as outfile: for entries in loss_epoch: outfile.write(str(entries)) outfile.write("\n") # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(model_dir, runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image, saver)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ # Hyperparameters for training epochs = 75 batch_size = 5 lr = 0.0001 learning_rate = tf.constant(lr) # Download the VGG-16 model if it doesn't exist helper.maybe_download_pretrained_vgg(data_dir) with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Seperate the training image set into training and validation sets validation_path, training_path, label_path = load_data( os.path.join(data_dir, 'data_road/training'), 0.1) # Create function to get batches for validation and training get_validation_batches_fn = helper.gen_batch_function( validation_path, label_path, image_shape) get_training_batches_fn = helper.gen_batch_function( training_path, label_path, image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function # Placeholder for model training(batch size, shape[0], shape[1], num_classes) correct_label = tf.placeholder( tf.float32, [None, image_shape[0], image_shape[1], num_classes]) # Get the VGG-16 layers vgg_input, keep_prob, vgg_layer3, vgg_layer4, vgg_layer7 = load_vgg( sess, vgg_path) # Get the last layer(output) of the network fcn_output = layers(vgg_layer3, vgg_layer4, vgg_layer7, num_classes) # Get the logits, optimizer and cross entropy loss logits, optimizer, cross_entropy_loss = optimize( fcn_output, correct_label, learning_rate, num_classes) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_validation_batches_fn, get_training_batches_fn, optimizer, cross_entropy_loss, vgg_input, correct_label, keep_prob, lr) # Save the inference data from the run save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, vgg_input, 'FINAL')
def run(): num_classes = 2 image_shape = (160, 576) # KITTI dataset uses 160x576 images data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function # TODO: Train NN using the train_nn function # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # OPTIONAL: Apply the trained model to a video # Placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') # Getting layers from vgg. input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) # Creating new layers. last_layer = layers(layer3_out, layer4_out, layer7_out, num_classes) # Creating loss and optimizer operations. logits, train_op, cross_entropy_loss = optimize( last_layer, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function epochs = 35 batch_size = 3 saver = tf.train.Saver() train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ USE_GPU = True config = tf.ConfigProto(device_count={'GPU': 1 if USE_GPU else 0}) with tf.Session(config=config) as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Building NN using load_vgg, layers, and optimize function correct_label = tf.placeholder(tf.float32) learning_rate = tf.placeholder(tf.float32) image_input, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, os.path.join(data_dir, "vgg")) nn_last_layer = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize( nn_last_layer, correct_label, learning_rate, num_classes) # Training NN using the train_nn function # Epochs 20 = Average loss per image in epoch 20 has been: 0.1744 epochs = 30 batch_size = 1 train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, image_input, correct_label, keep_prob, learning_rate) # Saving inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, image_input) # OPTIONAL: Applying the trained model to a video def process_videoimage(original_image, sess=sess, image_shape=image_shape, logits=logits, keep_prob=keep_prob, image_input=image_input): original_image_shape = original_image.shape image = scipy.misc.imresize(original_image, image_shape) im_softmax = sess.run([tf.nn.softmax(logits)], { keep_prob: 1.0, image_input: [image] }) im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1]) segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1) mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) mask = scipy.misc.toimage(mask, mode="RGBA") street_im = scipy.misc.toimage(image) street_im.paste(mask, box=None, mask=mask) return np.array( scipy.misc.imresize(street_im, original_image_shape)) clip1 = VideoFileClip("./data/harder_challenge_video.mp4") white_clip = clip1.fl_image( process_videoimage) # NOTE: this function expects color images!! white_clip.write_videofile("./data/segmented_project_video.mp4", audio=False)
def run(validate): num_classes = 3 image_shape = (288,416)#(576, 800) data_dir = './data' train_dir = 'Train' runs_dir = './runs' #tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ epochs = 5#8#2#10 batch_size = 4#10#6#8# #learning_rate = 10.0#from project_tests.py with tf.Session() as sess: #vars correct_label = tf.placeholder(tf.float32, [None, None, None, num_classes], name='correct_label')#from project_tests.py #correct_label = np.reshape(correct_label, (-1, image_shape[0], image_shape[1], 2)) learning_rate = tf.placeholder(tf.float32, name='learning_rate')#from project_tests.py # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn, train_files, validation_files = helper.gen_batch_function(os.path.join(data_dir, train_dir), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network print("train_paths : %d"%len(train_files)) print("validation_paths: %d"%len(validation_files)) # TODO: Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess,vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(layer_output,correct_label,learning_rate,num_classes) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() if validate : #################### load model if there are any ############### print(">>>>>>>>>> vars in saved model :") #chkp.print_tensors_in_checkpoint_file("./saved/segmentation_model_180529", tensor_name='', all_tensors=True) #saver = tf.train.import_meta_graph('./saved/segmentation_model_180531.meta') #saver.restore(sess,tf.train.latest_checkpoint('./saved/')) saver.restore(sess, "./saved/_180603_01_001/segmentation_model.ckpt") print("Model loaded!") #graph.get_tensor_by_name ################################################################ # TODO: Train NN using the train_nn function else: #saver.restore(sess, "./saved/_180603_01_001/segmentation_model.ckpt") print("Model loaded!") print("start train 180603_002 : epochs="+str(epochs)+" ,batch_size="+str(batch_size)) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate, num_classes, train_files) #TODO: safe model : saver.save(sess, './saved/_180603_02/segmentation_model.ckpt') print("Model Saved!") # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image, validation_files) print("done")
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' model_dir = './data/model/model.ckpt' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes]) learning_rate = tf.placeholder(tf.float32) input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg( sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss, iou_obj = optimize( nn_last_layer, correct_label, learning_rate, num_classes) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) tf_saver = tf.train.Saver(max_to_keep=5) if training: train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate, iou_obj) model_save = tf_saver.save(sess, model_dir) print("Model saved to: ", model_dir) else: tf_saver.restore(sess, model_dir) print("Model restored from: ", model_dir) if video: input_vid = "./videoinput.mp4" video_pipeline(input_vid, runs_dir, sess, image_shape, logits, keep_prob, input_image) else: helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): global is_in_training; global use_pretrained_weights; global train_only_decoder; global KEEP_PROB; global LEARNING_RATE; global EPOCHS; global BATCH_SIZE; num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # DONE: Build NN using load_vgg, layers, and optimize function # creating place holders correct_label = tf.placeholder(tf.int32,[None,None,None,num_classes],name = 'dec_correct_label'); learning_rate = tf.placeholder(tf.float32,name='dec_learning_rate'); input_image, keep_prob,vgg_layer3_out,vgg_layer4_out,vgg_layer7_out = load_vgg(sess,vgg_path); nn_last_layer = layers(vgg_layer3_out,vgg_layer4_out,vgg_layer7_out,num_classes); logits, train_op, cross_entropy_loss = optimize(nn_last_layer,correct_label,LEARNING_RATE,num_classes); if use_pretrained_weights: variable_initializers = [ var.initializer for var in tf.global_variables() if 'dec_' in var.name or 'beta' in var.name ] sess.run(variable_initializers); else: sess.run(tf.global_variables_initializer()); # saver = tf.train.Saver(); # DONE: Train NN using the train_nn function best_model_path = train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate); # restoring best session # saver.restore(sess,best_model_path); is_in_training = False; # DONE: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image);
def run(): runs_dir = './runs' print("\n\nTesting for datatset presence......") tests.test_looking_for_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(vgg_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Create function to get batches get_batches_fn = helper.gen_batch_function( glob_trainig_images_path, glob_labels_trainig_image_path, image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function KK-DONE # TF placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_dir) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # tfImShape = tf.get_variable("image_shape") # tfLogits = tf.get_variable("logits") # tfKeepProb = tf.get_variable("keep_prob") TEM NO TF print(100*'*') print(image_shape) #(160, 576) print(100*'*') print(logits) #Tensor("Reshape:0", shape=(?, 2), dtype=float32) print(100*'*') print(keep_prob) #Tensor("keep_prob:0", dtype=float32) print(100*'*') print(input_image) #Tensor("image_input:0", shape=(?, ?, ?, 3), dtype=float32) print(100*'*') init_op = tf.global_variables_initializer() sess.run(init_op) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) folderToSaveModel = "model" # Add ops to save and restore all the variables. saver = tf.train.Saver() for i, var in enumerate(saver._var_list): print('Var {}: {}'.format(i, var)) if not os.path.exists(folderToSaveModel): os.makedirs(path) pathSaveModel = os.path.join(folderToSaveModel, "model.ckpt") pathSaveModel = saver.save(sess, pathSaveModel) print(colored("Model saved in path: {}".format(pathSaveModel), 'green')) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
# ??? correct_label, learning_rate logits, train_op, cross_entropy_loss = optimize( final_layer_output, correct_label, learning_rate, num_classes) # Initialize all variables sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) print("Model build successful, starting training...") # Train the neural network # ??? correct_label, learning_rate train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # Save the trained session # saver = tf.train.Saver() # saver.save(sess, runs_dir) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # OPTIONAL: Apply the trained model to a video if __name__ == '__main__': helper.maybe_download_pretrained_vgg('data/') run()
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function # TF placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') # Getting layers from vgg. input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg( sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize( nn_last_layer, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function epochs = 48 # 6 12 10 24 30 48 50 batch_size = 5 train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples runs_dir = runs_dir + str(epochs) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) summary_writer = tf.summary.FileWriter('/home/ubuntu/graph', graph_def=sess.graph_def) # OPTIONAL: Apply the trained model to a video # Save model weights to disk saver = tf.train.Saver() model_path = "/model_{}.ckpt".format(str(epochs)) save_path = saver.save(sess, model_path) print("Model saved in file: %s" % save_path)
def run(): num_classes = 2 image_shape = (160, 576) # KITTI dataset uses 160x576 images epochs = 40 batch_size = 15 dropout = 0.75 data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) correct_label = tf.placeholder( tf.float32, [None, image_shape[0], image_shape[1], num_classes]) learning_rate = tf.placeholder(tf.float32) keep_prob = tf.placeholder(tf.float32) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function image_input, keep_prob, layer3, layer4, layer7 = load_vgg( sess, vgg_path) print("loadded vgg") model_output = layers(layer3, layer4, layer7, num_classes) print("got output") logits, train_op, cross_entropy_loss = optimize( model_output, correct_label, learning_rate, num_classes) print("optimizer configured") saver = tf.train.Saver() sess.run(tf.compat.v1.global_variables_initializer()) print("global variables loaded") sess.run(tf.local_variables_initializer()) print("Model build successful, starting training") train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, image_input, correct_label, keep_prob, learning_rate) saver.save(sess, 'my_segmentation_model') helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, image_input) print('Done')
def run(): num_classes = 13 image_shape = (608, 800) data_dir = '../data' runs_dir = './runs' epochs = 10 batch_size = 16 if len(sys.argv) > 1: mode = sys.argv[1].lower() else: mode = "train" print("Current mode: ", mode) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ LOGDIR = os.path.join('./data', 'fcn8_log') if mode == "train": with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'Train'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) correct_label = tf.placeholder( tf.int32, (None, image_shape[0], image_shape[1], num_classes), name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') logits, train_op, cross_entropy_loss = optimize( layer_output, correct_label, learning_rate, num_classes) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(LOGDIR, graph=sess.graph) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate, train_writer, merged, logits=logits) # Save the model for future use # as_text = true will cause freeze_graph throw memory error tf.train.write_graph(sess.graph_def, './fcn8', 'base_graph.pb', as_text=False) print("Model graph saved in path: ./fcn8/base_graph.pb") saver = tf.train.Saver() save_path = saver.save(sess, "./fcn8/ckpt") print("Model weights saved in path: %s" % save_path) t0 = time.time() # Save inference data using helper.save_inference_samples output_dir = helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) duration = time.time() - t0 print("Run complete, time taken = {0}".format(duration)) helper.calculate_score(os.path.join(data_dir, 'Test', 'CameraSeg'), output_dir) elif mode == "test": if len(sys.argv) < 3: print("main.py test <graph location>") else: graph_file = sys.argv[2] use_xla = False config = tf.ConfigProto() if use_xla: jit_level = tf.OptimizerOptions.ON_1 config.graph_options.optimizer_options.global_jit_level = jit_level with tf.Session(graph=tf.Graph(), config=config) as sess: gd = tf.GraphDef() g = sess.graph with tf.gfile.Open(graph_file, 'rb') as f: data = f.read() gd.ParseFromString(data) tf.import_graph_def(gd, name='') x = g.get_tensor_by_name('input_2:0') out = g.get_tensor_by_name('output_node0:0') t0 = time.time() # Save inference data using helper.save_inference_samples output_dir = helper.save_inference_samples_2( runs_dir, data_dir, sess, image_shape, out, None, x) duration = time.time() - t0 print("Run complete, time taken = {0}".format(duration)) helper.calculate_score( os.path.join(data_dir, 'Test', 'CameraSeg'), output_dir) else: print("Command unrecognized.")
def run(): num_classes = 4 # CW: red, yellow, green, unknown proportion_train = 0.75 # rest validation. Don't have big enough set for separate test set really! img_type = "real" # "sim", "real" or "both" # CW: both real Carla images and simulator exports are 800x600. # We might find shrinking them helps with performance in terms of # speed or memory, though classification quality will suffer if # we go too far. Semantic segregation project chose a size with # reasonably high power-of-two factors to allow for the repeated halving # of resolution going up the CNN funnel (160x576, or 2^5*5 x 2^6*9) # without any awkward padding issues. 800 already divides nicely, # but 600 is 2^3*3*5^2 so it can only be halved cleanly 3 times. # But there is not too much happening at the bottom of any of our # images, so clipping a little to 800x576 should be quite nice, # maybe with a 1/2 or 1/4 shrink to speed things up. # TODO clipping logic -- for now just shrinking to avoid code changes image_shape = (288, 384) # Initial experiment size (heightxwidth) -- out of GPU memory trying 576*800. Multiples of 32. data_dir = './data' runs_dir = './runs' # Walkthrough: maybe ~6 epochs to start with. Batches not too big because large amount of information. epochs = 20 # To get started batch_size = 1 # Already getting memory warnings! # Other hyperparameters in train_nn(); would have put them here but went with template calling structure # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Split images into training and validation sets training_image_paths, validation_image_paths = \ helper.get_split_image_paths(proportion_train, img_type, '../data/training_images') # Create function to get batches get_batches_fn = helper.gen_batch_function(training_image_paths, image_shape, num_classes) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Walkthrough: correct labels will be 4D (batch, height, width, num classes) # CW: see my comments in get_batches_fn() to remind self of why... final (num classes) axis is one-hot # with [0]=1 for background and [1]=1 for (any) road # DONE: Build NN using load_vgg, layers, and optimize function # CW: load VGG16 (actually already modified version for FCN) and pick out tensors corresponding # to layers we want to attach to input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) # CW: add our own layers to do transpose convolution skip connections from encoder layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) # get final layer out # CW: for debug, want to visualise model structure in Tensorboard; initially did this # before adding my layers to understand how to connect to unmodified VGG layers. Now # doing afterwards to include picture in write-up that includes my layers. if True: # Turned off for most runs when not debugging print(tf.trainable_variables()) # also trying to understand what we've got log_path = os.path.join(vgg_path, 'logs') writer = tf.summary.FileWriter(log_path, graph=sess.graph) # Then visualise as follows: # >tensorboard --logdir=C:\Users\UK000044\git\CarND-Semantic-Segmentation\data\vgg\logs --host localhost # Open http://localhost:6006 in browser (if don't specify --host, in Windows 10 uses PC name, and # localhost or 127.0.0.1 find no server, whereas http://pc_name:6006 does work) # CW: add operations to classify each pixel by class and assess performance # Input label size dynamic because have odd number of images as last batch; can get away without specifying # shape in complete detail up front but specifying those we know to hopefully make bugs more apparent correct_label = tf.placeholder(tf.float32, shape=[None,num_classes], name='correct_label') # Reshape labels as one-hot matrix spanning all of the pixels from all of the images concatenated together flattened_label = tf.reshape(correct_label, (-1, num_classes), name='flattened_label') learning_rate = tf.placeholder(tf.float32, shape=(), name='learning_rate') logits, train_op, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # CW: have to initialise variables at some point init_op = tf.global_variables_initializer() sess.run(init_op) # DONE: Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, flattened_label, keep_prob, learning_rate) # DONE: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, validation_image_paths, sess, image_shape, logits, keep_prob, input_image)
def run(): parser = argparse.ArgumentParser() parser.add_argument('--train', action='store_true', help='Run the training') parser.add_argument('--restore', type=str, nargs='?', help='Restore from a checkpoint') parser.add_argument('--video', type=str, nargs='?', help='Run segmentation on a video') args = parser.parse_args() data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), IMAGE_SHAPE) # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') with tf.Session() as sess: # Returns the three layers, keep probability and input layer from the vgg architecture image_input, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) # The resulting network architecture from adding a decoder on top of the given vgg model model_output = layers(layer3_out, layer4_out, layer7_out, NUM_CLASSES) # placeholders correct_label = tf.placeholder( tf.float32, [None, IMAGE_SHAPE[0], IMAGE_SHAPE[1], NUM_CLASSES]) learning_rate = tf.placeholder(tf.float32) # Returns the output logits, training operation and cost operation to be used # - logits: each row represents a pixel, each column a class # - train_op: function used to get the right parameters to the model to correctly label the pixels # - cross_entropy_loss: function outputting the cost which we are minimizing, lower cost should yield higher accuracy logits, train_op, cross_entropy_loss = optimize( model_output, correct_label, learning_rate, NUM_CLASSES) # Initialize all variables sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) saver = tf.train.Saver() if args.restore: saver.restore(sess, args.restore) print('Model restored from {0}'.format(args.restore)) if args.train: # Train the neural network train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, image_input, correct_label, keep_prob, learning_rate) pathlib.Path(CHECKPOINT_FOLDER).mkdir(parents=True, exist_ok=True) save_path = saver.save(sess, CHECKPOINT_PREFIX) print("Model saved in path: {}".format(save_path)) if args.video: from moviepy.editor import VideoFileClip def process_image(image): return helper.process_image(sess, logits, keep_prob, image_input, image, IMAGE_SHAPE) clip = VideoFileClip(args.video) white_clip = clip.fl_image(process_image) output_file = "{0}_{2}.{1}".format(*args.video.rsplit('.', 1) + ['output']) white_clip.write_videofile(output_file, audio=False) else: # Run the model with the test images and save each painted output image (roads painted green) helper.save_inference_samples(runs_dir, data_dir, sess, IMAGE_SHAPE, logits, keep_prob, image_input) print("All done!")
def run(): image_shape = (160, 576) data_dir = './data' runs_dir = './runs' print( '\nTEST KITTI DATASET ================================================\n' ) tests.test_for_kitti_dataset(data_dir) print( '\nRUNNING MAIN ======================================================\n' ) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # Note that hyper-parameters were defined at the top of main.py: # LEARNING_RATE, KEEP_PROB, EPOCHS, BATCH_SIZE, NUM_CLASSES, NUM_FEATURES # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # TF placeholders learning_rate = tf.placeholder(tf.float32) correct_label = tf.placeholder(tf.float32, shape=(None, None, None, NUM_CLASSES)) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg( sess, vgg_path) # use model architecture alternative to fcn-8s alt_architecture = True nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, NUM_CLASSES, alt_architecture) logits, train_op, cross_entropy_loss = optimize( nn_last_layer, correct_label, learning_rate, NUM_CLASSES) # TODO: Train NN using the train_nn function train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)