def main(argv=None): # Input placeholder input_img = tf.placeholder(tf.float32, [1, FLAGS.height, FLAGS.width, 3]) # Create network. net = DeepLabResNetModel({'data': input_img}, is_training=False, num_classes=NUM_CLASSES) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc_out'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(input_img)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) # Color transform color_mat = label_colours[..., [2, 1, 0]] color_mat = tf.constant(color_mat, dtype=tf.float32) onehot_output = tf.one_hot(raw_output_up, depth=len(label_colours)) onehot_output = tf.reshape(onehot_output, (-1, len(label_colours))) pred = tf.matmul(onehot_output, color_mat) pred = tf.reshape(pred, (1, FLAGS.height, FLAGS.width, 3)) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. ckpt = tf.train.get_checkpoint_state(RESTORE_PATH) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') load_step = 0 #p = predictor.getSingleFramePredictor(load_path=FLAGS.path, transform=False) p = predictor.getPredictor(load_path=FLAGS.path, transform=False) scale = 1.0 ros_rmp_motion_publisher = RosKobukiMotionPublisher(linear=FLAGS.lin, angular=FLAGS.ang) #ros_rmp_motion_publisher = RosRMPMotionPublisher(linear=FLAGS.lin, angular=FLAGS.ang) def actor(stop_event): global image_queue while len(image_queue) == 0: print('Wait for image queue filled ... (sleep 1)') time.sleep(1) pass timestep = 0 while not stop_event.is_set(): start = time.time() img = image_queue[0] raw_img = img #raw_img = cv2.resize(img, (FLAGS.width, FLAGS.height)).astype(float) img = raw_img - IMG_MEAN img = np.expand_dims(img, axis=0) if not FLAGS.use_depth: preds = sess.run(pred, feed_dict={input_img: img}) if FLAGS.use_seg: s = preds[0].astype(np.uint8) msk_img = s s = cv2.resize(s, (84, 84)) else: s = raw_img msk_img = s s = cv2.resize(s, (84, 84)) if FLAGS.use_depth: s = np.expand_dims(s, axis=-1) else: s = cv2.cvtColor(s, cv2.COLOR_BGR2RGB) act = p(s) ros_rmp_motion_publisher.publish(act) end = time.time() print('Inference time = %f' % (end - start)) if timestep < 3000: #raw_img_resize = cv2.resize(raw_img, (84, 84)) s = cv2.cvtColor(s, cv2.COLOR_RGB2BGR) cv2.imwrite('imgs/raw_img_%05d.png' % (timestep), raw_img) cv2.imwrite('imgs/msk_img_%05d.png' % (timestep), msk_img) timestep += 1 print("STEP: {}".format(timestep)) def image_subscribe_callback(img): global image_queue image_queue.append(img) if FLAGS.use_depth: ros_camera_image_subscriber = RosDepthImageSubscriber( user_callback=image_subscribe_callback) else: ros_camera_image_subscriber = RosImageSubscriber( user_callback=image_subscribe_callback) rospy.init_node('agent', anonymous=True) actor_thread_stop_event = threading.Event() try: actor_thread = threading.Thread(target=actor, args=(actor_thread_stop_event, )) actor_thread.start() rospy.spin() except KeyboardInterrupt: print("Shutting down") finally: actor_thread_stop_event.set() actor_thread.join()
def main(argv=None): print ('Use seg', FLAGS.use_seg) input_img = tf.placeholder(tf.float32, [1, FLAGS.height, FLAGS.width, 3]) # Create network. net = DeepLabResNetModel({'data': input_img}, is_training=False, num_classes=NUM_CLASSES) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc_out'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(input_img)[1:3,]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. ckpt = tf.train.get_checkpoint_state(RESTORE_PATH) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') load_step = 0 p = predictor.getPredictor(load_path=FLAGS.path, transform=False) # create server # TODO: Remove socket dependency # ---begin--- server = serverSock(name=FLAGS.name) server.create(port=FLAGS.port) server.waitForClient() # ---end--- img_counter = 0 while True: print('wait for task') # TODO: img from ros callback function img = server.recv() print('receive task') img = np.fromstring(img, np.uint8) img = cv2.imdecode(img, 1) hi, wi, _ = img.shape store_img = img img_counter += 1 img = cv2.resize(img, (FLAGS.width, FLAGS.height)).astype(float) raw_img_resize = img img = img - IMG_MEAN img = np.expand_dims(img, axis=0) #### segmentation preds = sess.run(pred, feed_dict={input_img: img}) #### if FLAGS.use_seg: msk = decode_labels(preds, num_classes=NUM_CLASSES) else: msk = raw_img_resize msk = cv2.resize(msk, (84, 84)) #z = np.zeros(shape=(84, 84, 4), dtype=np.float32) #z[:, :, 0:3] = msk if FLAGS.rtimg == True: msk = cv2.resize(msk, (wi, hi)) _, msk = cv2.imencode('.png', msk) msk = msk.tostring() # TODO: No use server.send(msk) else: msk = cv2.resize(msk, (84, 84)) act = p(msk) cv2.imwrite('seg_image/raw_img_%05d_%d.png' % (img_counter, act), store_img) print('predict: {}'.format(act)) # if img_counter==5: # time.sleep(5) act = pickle.dumps(act) # TOOD: Send action by ROS server.send(act) print('task done') # TODO: No use server.close()
def main(): args = get_arguments() filename = args.img_path.split('/')[-1] file_type = filename.split('.')[-1] if os.path.isfile(args.img_path): print('successful load img: {0}'.format(args.img_path)) else: print('not found file: {0}'.format(args.img_path)) sys.exit(0) # Prepare image. if file_type.lower() == 'png': img = tf.image.decode_png(tf.read_file(args.img_path), channels=3) elif file_type.lower() == 'jpg': img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3) else: print('cannot process {0} file.'.format(file_type)) # Convert RGB to BGR. img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) # Extract mean. img -= IMG_MEAN # Create network. net = DeepLabResNetModel({'data': tf.expand_dims(img, dim=0)}, is_training=False, num_classes=NUM_CLASSES) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc_out'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. ckpt = tf.train.get_checkpoint_state(args.restore_from) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') load_step = 0 # Perform inference. preds = sess.run(pred) msk = decode_labels(preds, num_classes=NUM_CLASSES) im = Image.fromarray(msk[0]) if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) im.save(SAVE_DIR + filename) print('The output file has been saved to {0}'.format(SAVE_DIR + filename))
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) #tf.set_random_seed(args.random_seed) coord = tf.train.Coordinator() with tf.Graph().as_default(), tf.device('/cpu:0'): # Using Poly learning rate policy base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.train.exponential_decay(base_lr, step_ph, 20000, 0.5, staircase=True) tf.summary.scalar('lr', learning_rate) opt = tf.train.MomentumOptimizer(learning_rate, 0.9) #opt = tf.train.RMSPropOptimizer(learning_rate, 0.9, momentum=0.9, epsilon=1e-10) #opt = tf.train.AdamOptimizer(learning_rate) losses = [] train_op = [] total_batch_size = args.batch_size*args.gpu_nums with tf.name_scope('DeepLabResNetModel') as scope: with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, args.random_blur, args.random_scale, args.random_mirror, args.random_rotate, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(total_batch_size) images_splits = tf.split(axis=0, num_or_size_splits=args.gpu_nums, value=image_batch) labels_splits = tf.split(axis=0, num_or_size_splits=args.gpu_nums, value=label_batch) net = DeepLabResNetModel({'data': images_splits}, is_training=True, num_classes=args.num_classes) raw_output_list = net.layers['fc_voc12'] num_valide_pixel = 0 for i in range(len(raw_output_list)): with tf.device('/gpu:%d' % i): raw_output_up = tf.image.resize_bilinear(raw_output_list[i], size=input_size, align_corners=True) tf.summary.image('images_{}'.format(i), images_splits[i]+IMG_MEAN, max_outputs = 4) tf.summary.image('labels_{}'.format(i), labels_splits[i], max_outputs = 4) tf.summary.image('predict_{}'.format(i), tf.cast(tf.expand_dims(tf.argmax(raw_output_up, -1),3),tf.float32), max_outputs = 4) all_trainable = [v for v in tf.trainable_variables()] # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output_up, [-1, args.num_classes]) label_proc = prepare_label(labels_splits[i], tf.stack(raw_output_up.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [-1,]) #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) indices = tf.where(tf.logical_and(tf.less(raw_gt, args.num_classes), tf.greater_equal(raw_gt, 0))) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(tf.argmax(tf.nn.softmax(prediction), axis=-1), gt, num_classes=args.num_classes) tf.summary.scalar('mean IoU_{}'.format(i), mIoU) train_op.append(update_op) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) num_valide_pixel += tf.shape(gt)[0] losses.append(tf.reduce_sum(loss)) l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] reduced_loss = tf.truediv(tf.reduce_sum(losses), tf.cast(num_valide_pixel, tf.float32)) + tf.add_n(l2_losses) tf.summary.scalar('average_loss', reduced_loss) grads = tf.gradients(reduced_loss, all_trainable, colocate_gradients_with_ops=True) variable_averages = tf.train.ExponentialMovingAverage(0.99, step_ph) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variables_to_average) train_op = tf.group(opt.apply_gradients(zip(grads, all_trainable)), *train_op) train_op = tf.group(train_op, variables_averages_op) summary_op = tf.summary.merge_all() # Set up tf session and initialize variables. config = tf.ConfigProto() config.allow_soft_placement=True sess = tf.Session(config=config) init = [tf.global_variables_initializer(),tf.local_variables_initializer()] sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=2) #restore from resnet imagenet, bised and local_step is in moving_average #restore_var = [v for v in tf.trainable_variables() if 'fc' not in v.name]+[v for v in tf.global_variables() if ('moving_mean' in v.name or 'moving_variance' in v.name) and ('biased' not in v.name and 'local_step' not in v.name)] restore_var = [v for v in tf.trainable_variables() if 'fc' not in v.name] ckpt = tf.train.get_checkpoint_state(args.restore_from) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') """ #restore from snapshot restore_var = tf.global_variables() ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var, allow_empty=True) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') load_step = 0 """ # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=sess.graph) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0 and step != 0: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) elif step%100 == 0: summary_str, loss_value, _, IOU = sess.run([summary_op, reduced_loss, train_op, mIoU], feed_dict=feed_dict) duration = time.time() - start_time summary_writer.add_summary(summary_str, step) print('step {:d} \t loss = {:.3f}, mean_IoU = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, IOU, duration)) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() filename = args.img_path.split('/')[-1] file_type = filename.split('.')[-1] if os.path.isfile(args.img_path): print('successful load img: {0}'.format(args.img_path)) else: print('not found file: {0}'.format(args.img_path)) sys.exit(0) # Prepare image. if file_type.lower() == 'png': img = tf.image.decode_png(tf.read_file(args.img_path), channels=3) elif file_type.lower() == 'jpg': img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3) else: print('cannot process {0} file.'.format(file_type)) # Convert RGB to BGR. img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) # Extract mean. img -= IMG_MEAN # Create network. net = DeepLabResNetModel({'data': tf.expand_dims(img, dim=0)}, is_training=False, num_classes=NUM_CLASSES) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc_out'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) # Matmul ver shape = tf.shape(raw_output_up) raw_output_up = tf.one_hot(raw_output_up, depth=150, dtype=tf.float32) raw_output_up = tf.reshape(raw_output_up, (-1, 150)) label_colours = read_labelcolours(matfn) color_trans = tf.constant(label_colours, dtype=tf.float32) pred = tf.matmul(raw_output_up, color_trans) pred = tf.reshape(pred, (1, 480, 640, 3)) # Decode ver #pred = tf.expand_dims(raw_output_up, dim=3) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. ckpt = tf.train.get_checkpoint_state(args.restore_from) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') load_step = 0 # Perform inference. for i in range(2): start = time.time() preds = sess.run(pred) end = time.time() print(preds.shape) print('Inference time = %f sec' % (end - start)) ''' start = time.time() msk = decode_labels(preds, num_classes=NUM_CLASSES) end = time.time() print('Decode time = %f sec' % (end - start)) ''' im = preds[0].astype(np.uint8) import cv2 print(im.shape) cv2.imwrite('%s' % (SAVE_DIR + filename), im) print('The output file has been saved to {0}'.format(SAVE_DIR + filename))
def main(): args = get_arguments() print(args) coord = tf.train.Coordinator() tf.reset_default_graph() image_list, label_list = read_labeled_image_list(DATA_DIRECTORY, DATA_LIST_PATH) # Create network. image_batch = tf.placeholder(tf.float32, [None, input_size, input_size, 3]) net = DeepLabResNetModel({'data': [image_batch]}, is_training=False, num_classes=num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc_voc12'][0] raw_output_up = tf.image.resize_bilinear(raw_output, size=[input_size, input_size], align_corners=True) # mIoU pred_all = tf.placeholder(tf.float32, [None, None]) raw_all = tf.placeholder(tf.float32, [None, None, None, None]) pred_flatten = tf.reshape(pred_all, [ -1, ]) raw_gt = tf.reshape(raw_all, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) pred_label = tf.gather(pred_flatten, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred_label, gt, num_classes=num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) restore_var = tf.global_variables() ckpt = tf.train.get_checkpoint_state(args.model) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') for step in range(len(image_list)): image, label = cv2.imread(image_list[step], 1), cv2.imread(label_list[step], 0) label = np.reshape(label, [1, label.shape[0], label.shape[1], 1]) imgsplitter = ImageSplitter(image, 1.0, input_size, IMG_MEAN) feed_dict = {image_batch: imgsplitter.get_split_crops()} logits = sess.run(raw_output_up, feed_dict=feed_dict) total_logits = imgsplitter.reassemble_crops(logits) #mirror image_mirror = image[:, ::-1] imgsplitter_mirror = ImageSplitter(image_mirror, 1.0, input_size, IMG_MEAN) feed_dict = {image_batch: imgsplitter_mirror.get_split_crops()} logits_mirror = sess.run(raw_output_up, feed_dict=feed_dict) logits_mirror = imgsplitter_mirror.reassemble_crops(logits_mirror) total_logits += logits_mirror[:, ::-1] prediction = np.argmax(total_logits, axis=-1) #=====================================================# sess.run([update_op], feed_dict={pred_all: prediction, raw_all: label}) if step > 0 and args.measure_time: calculate_time(sess, net) if step % 10 == 0: print('Finish {0}/{1}'.format(step, len(image_list))) print('step {0} mIoU: {1}'.format(step, sess.run(mIoU))) print('step {0} mIoU: {1}'.format(step, sess.run(mIoU)))