def get_next_image_crops(images, labels, dd, noisy_box, mirrored, real_motion, network_outs): if network_outs is not None: xyxy_pred = network_outs.squeeze() / 10 output_box = bb_util.from_crop_coordinate_system(xyxy_pred, noisy_box, CROP_PAD, 1) bbox_prev = noisy_box elif dd == 0: bbox_prev = labels[dd] else: bbox_prev = labels[dd - 1] bbox_on = labels[dd] if dd == 0: noisy_box = bbox_on.copy() elif not real_motion and network_outs is None: noisy_box = add_noise(bbox_on, bbox_on, images[0].shape[1], images[0].shape[0]) else: noisy_box = fix_bbox_intersection(bbox_prev, bbox_on) image0 = im_util.get_cropped_input(images[max(dd - 1, 0)], bbox_prev, CROP_PAD, CROP_SIZE)[0] image1 = im_util.get_cropped_input(images[dd], noisy_box, CROP_PAD, CROP_SIZE)[0] shifted_bbox = bb_util.to_crop_coordinate_system(bbox_on, noisy_box, CROP_PAD, 1) shifted_bbox_xywh = bb_util.xyxy_to_xywh(shifted_bbox) xywh_labels = shifted_bbox_xywh xyxy_labels = bb_util.xywh_to_xyxy(xywh_labels) * 10 return image0, image1, xyxy_labels, noisy_box
def add_noise(self, bbox, prevBBox, imageWidth, imageHeight): numTries = 0 bboxXYWHInit = bb_util.xyxy_to_xywh(bbox) while numTries < 10: bboxXYWH = bboxXYWHInit.copy() centerNoise = np.random.laplace(0, 1.0 / 5, 2) * bboxXYWH[[2, 3]] sizeNoise = np.clip(np.random.laplace(1, 1.0 / 15, 2), .6, 1.4) bboxXYWH[[2, 3]] *= sizeNoise bboxXYWH[[0, 1]] = bboxXYWH[[0, 1]] + centerNoise if not (bboxXYWH[0] < prevBBox[0] or bboxXYWH[1] < prevBBox[1] or bboxXYWH[0] > prevBBox[2] or bboxXYWH[1] > prevBBox[3] or bboxXYWH[0] < 0 or bboxXYWH[1] < 0 or bboxXYWH[0] > imageWidth or bboxXYWH[1] > imageHeight): numTries = 10 else: numTries += 1 return self.fix_bbox_intersection(bb_util.xywh_to_xyxy(bboxXYWH), prevBBox, imageWidth, imageHeight)
def add_noise(bbox, prev_bbox, image_width, image_height): num_tries = 0 bbox_xywh_init = bb_util.xyxy_to_xywh(bbox) while num_tries < 10: bbox_xywh = bbox_xywh_init.copy() center_noise = np.random.laplace(0, 1.0 / 5, 2) * bbox_xywh[[2, 3]] size_noise = np.clip(np.random.laplace(1, 1.0 / 15, 2), 0.6, 1.4) bbox_xywh[[2, 3]] *= size_noise bbox_xywh[[0, 1]] = bbox_xywh[[0, 1]] + center_noise if not ( bbox_xywh[0] < prev_bbox[0] or bbox_xywh[1] < prev_bbox[1] or bbox_xywh[0] > prev_bbox[2] or bbox_xywh[1] > prev_bbox[3] or bbox_xywh[0] < 0 or bbox_xywh[1] < 0 or bbox_xywh[0] > image_width or bbox_xywh[1] > image_height ): num_tries = 10 else: num_tries += 1 return fix_bbox_intersection(bb_util.xywh_to_xyxy(bbox_xywh), prev_bbox)
def main(args): num_unrolls = args.num_unrolls batch_size = args.batch_size timing = args.timing debug = args.debug or args.output device = pt_util.setup_devices(args.device)[0] np.set_printoptions(suppress=True) np.set_printoptions(precision=4) # pool = mp.Pool(min(batch_size, mp.cpu_count())) time_str = python_util.get_time_str() checkpoint_path = os.path.join(LOG_DIR, "checkpoints") if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) train_logger = None if not debug: tensorboard_dir = os.path.join(LOG_DIR, "tensorboard", time_str + "_train") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) train_logger = tensorboard_logger.Logger(tensorboard_dir) data_loader = pt_dataset.get_data_loader(num_unrolls, batch_size, args.num_threads) batch_iter = iter(data_loader) network = Re3SmallNet(device, args) network.setup_optimizer(1e-5) network.to(device) network.train() start_iter = 0 if args.restore: print("Restoring") start_iter = pt_util.restore_from_folder( network, checkpoint_path, ) print("Restored", start_iter) if debug: cv2.namedWindow("debug", cv2.WINDOW_NORMAL) cv2.resizeWindow("debug", OUTPUT_WIDTH, OUTPUT_HEIGHT) try: time_total = 0.000001 num_iters = 0 iteration = start_iter # Run training iterations in the main thread. while iteration < args.max_steps: if train_logger is not None and iteration % 1000 == 0: train_logger.network_conv_summary(network, iteration) if iteration == 10000: network.update_learning_rate(1e-6) if (iteration - 1) % 10 == 0: current_time_start = time.time() start_solver = time.time() # Timers: initial data read time | data time | forward time | backward time | total time timers = np.zeros(5) try: image_sequences = next(batch_iter) except StopIteration: batch_iter = iter(data_loader) image_sequences = next(batch_iter) timers[0] = time.time() - start_solver outputs = [] labels = [] images = [] noisy_boxes = [None for _ in range(len(image_sequences))] mirrored = np.random.random(batch_size) < 0.5 real_motion = np.random.random(batch_size) < REAL_MOTION_PROB use_network_outs = np.random.random(batch_size) < USE_NETWORK_PROB lstm_state = None network_outs = [None for _ in range(len(image_sequences))] for dd in range(num_unrolls): batch_images = [] batch_labels = [] process_t_start = time.time() for ii, vals in enumerate(image_sequences): image_sequence = vals["images"] label_sequence = vals["labels"] image0, image1, xyxy_labels, noisy_box = pt_dataset.get_next_image_crops( image_sequence, label_sequence, dd, noisy_boxes[ii], mirrored[ii], real_motion[ii], network_outs[ii], ) batch_images.append((image0, image1)) batch_labels.append(xyxy_labels) noisy_boxes[ii] = noisy_box images.append(batch_images) labels.append(batch_labels) image_tensor = pt_util.from_numpy(batch_images) timers[1] += time.time() - process_t_start forward_t_start = time.time() output = network(image_tensor, lstm_state) outputs.append(output) output = pt_util.to_numpy_array(output) for ii in range(batch_size): if use_network_outs[ii]: network_outs[ii] = output[ii] lstm_state = network.lstm_state timers[2] += time.time() - forward_t_start backward_t_start = time.time() labels = pt_util.from_numpy(labels) network.optimizer.zero_grad() outputs = torch.stack(outputs) loss_value = network.loss( outputs, labels.to(dtype=outputs.dtype, device=network.device)) loss_value.backward() network.optimizer.step() loss_value = loss_value.item() timers[3] = time.time() - backward_t_start end_solver = time.time() timers[4] = time.time() - start_solver time_total += end_solver - start_solver per_image_timers = timers / (num_unrolls * batch_size) if train_logger is not None and iteration % 10 == 0: train_logger.dict_log( { "losses/loss": loss_value, "stats/data_read_time": timers[0], "stats/data_time": timers[1], "stats/forward_time": timers[2], "stats/backward_time": timers[3], "stats/total_time": timers[4], "per_image_stats/data_read_time": per_image_timers[0], "per_image_stats/data_time": per_image_timers[1], "per_image_stats/forward_time": per_image_timers[2], "per_image_stats/backward_time": per_image_timers[3], "per_image_stats/total_time": per_image_timers[4], }, iteration, ) num_iters += 1 iteration += 1 if timing and (iteration - 1) % 10 == 0: print("Iteration: %d" % (iteration - 1)) print("Loss: %.3f" % loss_value) print("Average Time: %.3f" % (time_total / num_iters)) print("Current Time: %.3f" % (end_solver - start_solver)) if num_iters > 20: print("Current Average: %.3f" % ((time.time() - current_time_start) / 10)) print("") # Save a checkpoint and remove old ones. if iteration % 500 == 0 or iteration == args.max_steps: pt_util.save(network, LOG_DIR + "/checkpoints/iteration_%07d.pt" % iteration, num_to_keep=1) # Every once in a while save a checkpoint that isn't ever removed except by hand. if iteration % 10000 == 0 or iteration == args.max_steps: pt_util.save( network, LOG_DIR + "/checkpoints/long_checkpoints/iteration_%07d.pt" % iteration) if not debug: if args.run_val and (num_iters == 1 or iteration % 1000 == 0): # Run a validation set eval in a separate process. def test_func(): test_iter_on = iteration print("Staring test iter", test_iter_on) import subprocess import json command = [ "python", "test_net.py", "--video-sample-rate", str(10), "--no-display", "-v", str(args.val_device), ] subprocess.call(command) result = json.load(open("results.json", "r")) train_logger.dict_log( { "eval/robustness": result["robustness"], "eval/lost_targets": result["lostTarget"], "eval/mean_iou": result["meanIou"], "eval/avg_measure": (result["meanIou"] + result["robustness"]) / 2, }, test_iter_on, ) os.remove("results.json") print("Ending test iter", test_iter_on) test_thread = threading.Thread(target=test_func) test_thread.daemon = True test_thread.start() if args.output: # Look at some of the outputs. print("new batch") images = (np.array(images).transpose( (1, 0, 2, 3, 4, 5)).reshape( (batch_size, num_unrolls, 2, CROP_SIZE, CROP_SIZE, 3))) labels = pt_util.to_numpy_array(labels).transpose(1, 0, 2) outputs = pt_util.to_numpy_array(outputs).transpose(1, 0, 2) for bb in range(batch_size): for dd in range(num_unrolls): image0 = images[bb, dd, 0, ...] image1 = images[bb, dd, 1, ...] label = labels[bb, dd, :] xyxy_label = label / 10 label_box = xyxy_label * CROP_PAD output = outputs[bb, dd, ...] xyxy_pred = output / 10 output_box = xyxy_pred * CROP_PAD drawing.drawRect( image0, bb_util.xywh_to_xyxy( np.full((4, 1), 0.5) * CROP_SIZE), 2, [0, 255, 0]) drawing.drawRect(image1, xyxy_label * CROP_SIZE, 2, [0, 255, 0]) drawing.drawRect(image1, xyxy_pred * CROP_SIZE, 2, [255, 0, 0]) plots = [image0, image1] subplot = drawing.subplot(plots, 1, 2, outputWidth=OUTPUT_WIDTH, outputHeight=OUTPUT_HEIGHT, border=5) cv2.imshow("debug", subplot[:, :, ::-1]) cv2.waitKey(0) except Exception as e: import traceback traceback.print_exc() finally: # Save if error or killed by ctrl-c. if not debug: print("Saving...") pt_util.save(network, LOG_DIR + "/checkpoints/iteration_%07d.pt" % iteration, num_to_keep=-1)
def main(FLAGS): global PORT, delta, REPLAY_BUFFER_SIZE delta = FLAGS.delta batchSize = FLAGS.batch_size timing = FLAGS.timing debug = FLAGS.debug or FLAGS.output PORT = FLAGS.port os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.cuda_visible_devices) np.set_printoptions(suppress=True) np.set_printoptions(precision=4) # Tensorflow setup if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) if not os.path.exists(LOG_DIR + '/checkpoints'): os.makedirs(LOG_DIR + '/checkpoints') tf.Graph().as_default() tf.logging.set_verbosity(tf.logging.INFO) sess = tf_util.Session() # Create the nodes for single image forward passes for learning to fix mistakes. # Parameters here are shared with the learned network. if ',' in FLAGS.cuda_visible_devices: with tf.device('/gpu:1'): forwardNetworkImagePlaceholder = tf.placeholder( tf.uint8, shape=(2, CROP_SIZE, CROP_SIZE, 3)) prevLstmState = tuple([ tf.placeholder(tf.float32, shape=(1, LSTM_SIZE)) for _ in range(4) ]) initialLstmState = tuple( [np.zeros((1, LSTM_SIZE)) for _ in range(4)]) networkOutputs, state1, state2 = network.inference( forwardNetworkImagePlaceholder, num_unrolls=1, train=False, prevLstmState=prevLstmState, reuse=False) else: forwardNetworkImagePlaceholder = tf.placeholder(tf.uint8, shape=(2, CROP_SIZE, CROP_SIZE, 3)) prevLstmState = tuple([ tf.placeholder(tf.float32, shape=(1, LSTM_SIZE)) for _ in range(4) ]) initialLstmState = tuple([np.zeros((1, LSTM_SIZE)) for _ in range(4)]) networkOutputs, state1, state2 = network.inference( forwardNetworkImagePlaceholder, num_unrolls=1, train=False, prevLstmState=prevLstmState, reuse=False) tf_dataset_obj = tf_dataset.Dataset(sess, delta, batchSize * 2, PORT, debug=FLAGS.debug) tf_dataset_obj.initialize_tf_placeholders(forwardNetworkImagePlaceholder, prevLstmState, networkOutputs, state1, state2) tf_dataset_iterator = tf_dataset_obj.get_dataset(batchSize) imageBatch, labelsBatch = tf_dataset_iterator.get_next() imageBatch = tf.reshape(imageBatch, (batchSize * delta * 2, CROP_SIZE, CROP_SIZE, 3)) labelsBatch = tf.reshape(labelsBatch, (batchSize * delta, -1)) learningRate = tf.placeholder(tf.float32) imagePlaceholder = tf.placeholder(tf.uint8, shape=(batchSize, delta * 2, CROP_SIZE, CROP_SIZE, 3)) labelPlaceholder = tf.placeholder(tf.float32, shape=(batchSize, delta, 4)) if ',' in FLAGS.cuda_visible_devices: with tf.device('/gpu:0'): tfOutputs = network.inference(imageBatch, num_unrolls=delta, train=True, reuse=True) tfLossFull, tfLoss = network.loss(tfOutputs, labelsBatch) train_op = network.training(tfLossFull, learningRate) else: tfOutputs = network.inference(imageBatch, num_unrolls=delta, train=True, reuse=True) tfLossFull, tfLoss = network.loss(tfOutputs, labelsBatch) train_op = network.training(tfLossFull, learningRate) loss_summary_op = tf.summary.merge([ tf.summary.scalar('loss', tfLoss), tf.summary.scalar('l2_regularizer', tfLossFull - tfLoss), ]) train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) init = tf.global_variables_initializer() saver = tf.train.Saver() longSaver = tf.train.Saver() # Initialize the network and load saved parameters. sess.run(init) startIter = 0 if FLAGS.restore: print('Restoring') startIter = tf_util.restore_from_dir( sess, os.path.join(LOG_DIR, 'checkpoints')) if not debug: tt = time.localtime() time_str = ('%04d_%02d_%02d_%02d_%02d_%02d' % (tt.tm_year, tt.tm_mon, tt.tm_mday, tt.tm_hour, tt.tm_min, tt.tm_sec)) summary_writer = tf.summary.FileWriter( LOG_DIR + '/train/' + time_str + '_n_' + str(delta) + '_b_' + str(batchSize), sess.graph) summary_full = tf.summary.merge_all() conv_var_list = [ v for v in tf.trainable_variables() if 'conv' in v.name and 'weight' in v.name and (v.get_shape().as_list()[0] != 1 or v.get_shape().as_list()[1] != 1 ) ] for var in conv_var_list: tf_util.conv_variable_summaries(var, scope=var.name.replace('/', '_')[:-2]) summary_with_images = tf.summary.merge_all() # Logging stuff robustness_ph = tf.placeholder(tf.float32, shape=[]) lost_targets_ph = tf.placeholder(tf.float32, shape=[]) mean_iou_ph = tf.placeholder(tf.float32, shape=[]) avg_ph = tf.placeholder(tf.float32, shape=[]) if FLAGS.run_val: val_gpu = None if FLAGS.val_device == '0' else FLAGS.val_device test_tracker = re3_tracker.CopiedRe3Tracker(sess, train_vars, val_gpu) test_runner = test_net.TestTrackerRunner(test_tracker) with tf.name_scope('test'): test_summary_op = tf.summary.merge([ tf.summary.scalar('robustness', robustness_ph), tf.summary.scalar('lost_targets', lost_targets_ph), tf.summary.scalar('mean_iou', mean_iou_ph), tf.summary.scalar('avg_iou_robustness', avg_ph), ]) if debug: cv2.namedWindow('debug', cv2.WINDOW_NORMAL) cv2.resizeWindow('debug', OUTPUT_WIDTH, OUTPUT_HEIGHT) sess.graph.finalize() try: timeTotal = 0.000001 numIters = 0 iteration = startIter # Run training iterations in the main thread. while iteration < FLAGS.max_steps: if (iteration - 1) % 10 == 0: currentTimeStart = time.time() startSolver = time.time() if debug: _, outputs, lossValue, images, labels, = sess.run( [train_op, tfOutputs, tfLoss, imageBatch, labelsBatch], feed_dict={learningRate: LEARNING_RATE}) debug_feed_dict = { imagePlaceholder: images, labelPlaceholder: labels, } else: if iteration % 10 == 0: _, lossValue, loss_summary = sess.run( [train_op, tfLoss, loss_summary_op], feed_dict={learningRate: LEARNING_RATE}) summary_writer.add_summary(loss_summary, iteration) else: _, lossValue = sess.run( [train_op, tfLoss], feed_dict={learningRate: LEARNING_RATE}) endSolver = time.time() numIters += 1 iteration += 1 timeTotal += (endSolver - startSolver) if timing and (iteration - 1) % 10 == 0: print('Iteration: %d' % (iteration - 1)) print('Loss: %.3f' % lossValue) print('Average Time: %.3f' % (timeTotal / numIters)) print('Current Time: %.3f' % (endSolver - startSolver)) if numIters > 20: print('Current Average: %.3f' % ((time.time() - currentTimeStart) / 10)) print('') # Save a checkpoint and remove old ones. if iteration % 500 == 0 or iteration == FLAGS.max_steps: checkpoint_file = os.path.join(LOG_DIR, 'checkpoints', 'model.ckpt') saver.save(sess, checkpoint_file, global_step=iteration) print("Saving checkpoint at " + checkpoint_file) if FLAGS.clearSnapshots: files = glob.glob(LOG_DIR + '/checkpoints/*') for file in files: basename = os.path.basename(file) if os.path.isfile(file) and str( iteration ) not in file and 'checkpoint' not in basename: os.remove(file) # Every once in a while save a checkpoint that isn't ever removed except by hand. if iteration % 10000 == 0 or iteration == FLAGS.max_steps: if not os.path.exists(LOG_DIR + '/checkpoints/long_checkpoints'): os.makedirs(LOG_DIR + '/checkpoints/long_checkpoints') checkpoint_file = os.path.join(LOG_DIR, 'checkpoints/long_checkpoints', 'model.ckpt') longSaver.save(sess, checkpoint_file, global_step=iteration) if not debug: if (numIters == 1 or iteration % 100 == 0 or iteration == FLAGS.max_steps): # Write out the full graph sometimes. if (numIters == 1 or iteration == FLAGS.max_steps): print('Running detailed summary') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, summary_str = sess.run( [train_op, summary_with_images], options=run_options, run_metadata=run_metadata, feed_dict={learningRate: LEARNING_RATE}) summary_writer.add_run_metadata( run_metadata, 'step_%07d' % iteration) elif iteration % 1000 == 0: _, summary_str = sess.run( [train_op, summary_with_images], feed_dict={learningRate: LEARNING_RATE}) print('Running image summary') else: print('Running summary') _, summary_str = sess.run( [train_op, summary_full], feed_dict={learningRate: LEARNING_RATE}) summary_writer.add_summary(summary_str, iteration) summary_writer.flush() if (FLAGS.run_val and (numIters == 1 or iteration % 500 == 0)): # Run a validation set eval in a separate thread. def test_func(test_iter_on): print('Starting test iter', test_iter_on) test_runner.reset() result = test_runner.run_test( dataset=FLAGS.val_dataset, display=False) summary_str = sess.run( test_summary_op, feed_dict={ robustness_ph: result['robustness'], lost_targets_ph: result['lostTarget'], mean_iou_ph: result['meanIou'], avg_ph: (result['meanIou'] + result['robustness']) / 2, }) summary_writer.add_summary(summary_str, test_iter_on) os.remove('results.json') print('Ending test iter', test_iter_on) test_thread = threading.Thread(target=test_func, args=(iteration, )) test_thread.start() if FLAGS.output: # Look at some of the outputs. print('new batch') images = debug_feed_dict[imagePlaceholder].astype( np.uint8).reshape( (batchSize, delta, 2, CROP_SIZE, CROP_SIZE, 3)) labels = debug_feed_dict[labelPlaceholder].reshape( (batchSize, delta, 4)) outputs = outputs.reshape((batchSize, delta, 4)) for bb in range(batchSize): for dd in range(delta): image0 = images[bb, dd, 0, ...] image1 = images[bb, dd, 1, ...] label = labels[bb, dd, :] xyxyLabel = label / 10 labelBox = xyxyLabel * CROP_PAD output = outputs[bb, dd, ...] xyxyPred = output / 10 outputBox = xyxyPred * CROP_PAD drawing.drawRect( image0, bb_util.xywh_to_xyxy( np.full((4, 1), .5) * CROP_SIZE), 2, [255, 0, 0]) drawing.drawRect(image1, xyxyLabel * CROP_SIZE, 2, [0, 255, 0]) drawing.drawRect(image1, xyxyPred * CROP_SIZE, 2, [255, 0, 0]) plots = [image0, image1] subplot = drawing.subplot(plots, 1, 2, outputWidth=OUTPUT_WIDTH, outputHeight=OUTPUT_HEIGHT, border=5) cv2.imshow('debug', subplot[:, :, ::-1]) cv2.waitKey(1) except: # Save if error or killed by ctrl-c. if not debug: print('Saving...') checkpoint_file = os.path.join(LOG_DIR, 'checkpoints', 'model.ckpt') saver.save(sess, checkpoint_file, global_step=iteration) raise
def get_data_sequence(self): try: # Preallocate the space for the images and labels. tImage = np.zeros((self.delta, 2, CROP_SIZE, CROP_SIZE, 3), dtype=np.uint8) xywhLabels = np.zeros((self.delta, 4), dtype=np.float32) mirrored = random.random() < 0.5 useSimulator = random.random() < USE_SIMULATOR gtType = random.random() realMotion = random.random() < REAL_MOTION_PROB # Initialize first frame (give the network context). if useSimulator: # Initialize the simulation and run through a few frames. trackingObj, trackedObjects, background = simulator.create_new_track( ) for _ in range(random.randint(0, 200)): simulator.step(trackedObjects) bbox = trackingObj.get_object_box() occlusion = simulator.measure_occlusion( bbox, trackingObj.occluder_boxes, cropPad=1) if occlusion > .2: break for _ in range(1000): bbox = trackingObj.get_object_box() occlusion = simulator.measure_occlusion( bbox, trackingObj.occluder_boxes, cropPad=1) if occlusion < 0.01: break simulator.step(trackedObjects) initBox = trackingObj.get_object_box() if self.debug: images = [ simulator.get_image_for_frame(trackedObjects, background) ] else: images = [np.zeros((SIMULATION_HEIGHT, SIMULATION_WIDTH))] else: # Read a new data sequence from batch cache and get the ground truth. (batchKey, images) = self.getData() gtKey = batchKey imageIndex = self.key_lookup[gtKey] initBox = self.datasets[gtKey[0]][imageIndex, :4].copy() if self.debug: bboxes = [] cropBBoxes = [] # bboxPrev starts at the initial box and is the best guess (or gt) for the image0 location. # noisyBox holds the bboxPrev estimate plus some noise. bboxPrev = initBox lstmState = None for dd in range(self.delta): # bboxOn is the gt location in image1 if useSimulator: bboxOn = trackingObj.get_object_box() else: newKey = list(gtKey) newKey[3] += dd newKey = tuple(newKey) imageIndex = self.key_lookup[newKey] bboxOn = self.datasets[newKey[0]][imageIndex, :4].copy() if dd == 0: noisyBox = bboxOn.copy() elif not realMotion and not useSimulator and gtType >= USE_NETWORK_PROB: noisyBox = self.add_noise(bboxOn, bboxOn, images[0].shape[1], images[0].shape[0]) else: noisyBox = self.fix_bbox_intersection( bboxPrev, bboxOn, images[0].shape[1], images[0].shape[0]) if useSimulator: patch = simulator.render_patch(bboxPrev, background, trackedObjects) tImage[dd, 0, ...] = patch if dd > 0: simulator.step(trackedObjects) bboxOn = trackingObj.get_object_box() noisyBox = self.fix_bbox_intersection( bboxPrev, bboxOn, images[0].shape[1], images[0].shape[0]) else: tImage[dd, 0, ...] = im_util.get_cropped_input( images[max(dd - 1, 0)], bboxPrev, CROP_PAD, CROP_SIZE)[0] if useSimulator: patch = simulator.render_patch(noisyBox, background, trackedObjects) tImage[dd, 1, ...] = patch if self.debug: images.append( simulator.get_image_for_frame( trackedObjects, background)) else: tImage[dd, 1, ...] = im_util.get_cropped_input( images[dd], noisyBox, CROP_PAD, CROP_SIZE)[0] shiftedBBox = bb_util.to_crop_coordinate_system( bboxOn, noisyBox, CROP_PAD, 1) shiftedBBoxXYWH = bb_util.xyxy_to_xywh(shiftedBBox) xywhLabels[dd, :] = shiftedBBoxXYWH if gtType < USE_NETWORK_PROB: # Run through a single forward pass to get the next box estimate. if dd < self.delta - 1: if dd == 0: lstmState = self.initialLstmState feed_dict = { self.forwardNetworkImagePlaceholder: tImage[dd, ...], self.prevLstmState: lstmState } networkOuts, s1, s2 = self.sess.run( [self.networkOutputs, self.state1, self.state2], feed_dict=feed_dict) lstmState = (s1[0], s1[1], s2[0], s2[1]) xyxyPred = networkOuts.squeeze() / 10 outputBox = bb_util.from_crop_coordinate_system( xyxyPred, noisyBox, CROP_PAD, 1) bboxPrev = outputBox if self.debug: bboxes.append(outputBox) cropBBoxes.append(xyxyPred) else: bboxPrev = bboxOn if self.debug: # Look at the inputs to make sure they are correct. image0 = tImage[dd, 0, ...].copy() image1 = tImage[dd, 1, ...].copy() xyxyLabel = bb_util.xywh_to_xyxy( xywhLabels[dd, :].squeeze()) print('xyxy raw', xyxyLabel, 'actual', xyxyLabel * CROP_PAD) label = np.zeros((CROP_PAD, CROP_PAD)) drawing.drawRect(label, xyxyLabel * CROP_PAD, 0, 1) drawing.drawRect( image0, bb_util.xywh_to_xyxy(np.full((4, 1), .5) * CROP_SIZE), 2, [255, 0, 0]) bigImage0 = images[max(dd - 1, 0)].copy() bigImage1 = images[dd].copy() if dd < len(cropBBoxes): drawing.drawRect(bigImage1, bboxes[dd], 5, [255, 0, 0]) drawing.drawRect(image1, cropBBoxes[dd] * CROP_SIZE, 1, [0, 255, 0]) print('pred raw', cropBBoxes[dd], 'actual', cropBBoxes[dd] * CROP_PAD) print('\n') label[0, 0] = 1 label[0, 1] = 0 plots = [bigImage0, bigImage1, image0, image1] subplot = drawing.subplot(plots, 2, 2, outputWidth=OUTPUT_WIDTH, outputHeight=OUTPUT_HEIGHT, border=5) cv2.imshow('debug', subplot[:, :, ::-1]) cv2.waitKey(1) if mirrored: tImage = np.fliplr(tImage.transpose(2, 3, 4, 0, 1)).transpose( 3, 4, 0, 1, 2) xywhLabels[..., 0] = 1 - xywhLabels[..., 0] tImage = tImage.reshape([self.delta * 2] + list(tImage.shape[2:])) xyxyLabels = bb_util.xywh_to_xyxy(xywhLabels.T).T * 10 xyxyLabels = xyxyLabels.astype(np.float32) return tImage, xyxyLabels except Exception as e: import traceback traceback.print_exc() import pdb pdb.set_trace() print('exception')