Esempio n. 1
0
def main(label_type):
    wildcard = '/*/*/' if label_type == 'train' else '/'
    dataset_path = 'data/ILSVRC2015/'
    annotationPath = dataset_path + 'Annotations/'
    imagePath = dataset_path + 'Data/'

    if not os.path.exists(os.path.join('labels', label_type)):
        os.makedirs(os.path.join('labels', label_type))
    imageNameFile = open('labels/' + label_type + '/image_names.txt', 'w')

    labels = []
    labels = glob.glob(annotationPath + 'DET/' + label_type + wildcard + '*.xml')
    labels.sort()
    images = [label.replace('Annotations', 'Data').replace('xml', 'JPEG') for label in labels]

    bboxes = []
    for ii,imageName in enumerate(images):
        if ii % 100 == 0:
            print('iter %d of %d = %.2f%%' % (ii, len(images), ii * 1.0 / len(images) * 100))
        if not DEBUG:
            imageNameFile.write(imageName + '\n')
        imOn = ii
        label = labels[imOn]
        labelTree = ET.parse(label)
        imgSize = get_image_size(images[imOn])
        area_cutoff = imgSize[0] * imgSize[1] * 0.01
        if DEBUG:
            print('\nimage name\n\n%s\n' % images[imOn])
            image = cv2.imread(images[imOn])
            print('image size', image.shape)
            print(label)
            print(labelTree)
            print(labelTree.findall('object'))
        for obj in labelTree.findall('object'):
            bbox = obj.find('bndbox')
            bbox = [int(bbox.find('xmin').text),
                    int(bbox.find('ymin').text),
                    int(bbox.find('xmax').text),
                    int(bbox.find('ymax').text),
                    imOn]
            if (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) < area_cutoff:
                continue
            if DEBUG:
                print('name', obj.find('name').text, '\n')
                print(bbox)
                image = image.squeeze()
                if len(image.shape) < 3:
                    image = np.tile(image[:,:,np.newaxis], (1,1,3))
                drawing.drawRect(image, bbox[:-1], 3, [0, 0, 255])
            bboxes.append(bbox)

        if DEBUG:
            if len(image.shape) == 2:
                image = np.tile(image[:,:,np.newaxis], (1,1,3))
            cv2.imshow('image', image)
            cv2.waitKey(0)

    bboxes = np.array(bboxes)
    if not DEBUG:
        np.save('labels/' + label_type + '/labels.npy', bboxes)
Esempio n. 2
0
def main(label_type):
    wildcard = '/*/*/' if label_type == 'train' else '/*/'
    dataset_path = 'data/ILSVRC2015/'
    annotationPath = dataset_path + 'Annotations/'
    imagePath = dataset_path + 'Data/'

    if not DEBUG:
        if not os.path.exists(os.path.join('labels', label_type)):
            os.makedirs(os.path.join('labels', label_type))
        imageNameFile = open('labels/' + label_type + '/image_names.txt', 'w')

    videos = sorted(glob.glob(annotationPath + 'VID/' + label_type + wildcard))

    bboxes = []
    imNum = 0
    totalImages = len(glob.glob(annotationPath + 'VID/' + label_type + wildcard + '*.xml'))
    print('totalImages', totalImages)
    classes = {
            'n01674464': 1,
            'n01662784': 2,
            'n02342885': 3,
            'n04468005': 4,
            'n02509815': 5,
            'n02084071': 6,
            'n01503061': 7,
            'n02324045': 8,
            'n02402425': 9,
            'n02834778': 10,
            'n02419796': 11,
            'n02374451': 12,
            'n04530566': 13,
            'n02118333': 14,
            'n02958343': 15,
            'n02510455': 16,
            'n03790512': 17,
            'n02391049': 18,
            'n02121808': 19,
            'n01726692': 20,
            'n02062744': 21,
            'n02503517': 22,
            'n02691156': 23,
            'n02129165': 24,
            'n02129604': 25,
            'n02355227': 26,
            'n02484322': 27,
            'n02411705': 28,
            'n02924116': 29,
            'n02131653': 30,
            }

    for vv,video in enumerate(videos):
        labels = sorted(glob.glob(video + '*.xml'))
        images = [label.replace('Annotations', 'Data').replace('xml', 'JPEG') for label in labels]
        trackColor = dict()
        for ii,imageName in enumerate(images):
            if imNum % 100 == 0:
                print('imNum %d of %d = %.2f%%' % (imNum, totalImages, imNum * 100.0 / totalImages))
            if not DEBUG:
                # Leave off initial bit of path so we can just add parent dir to path later.
                imageNameFile.write(imageName + '\n')
            label = labels[ii]
            labelTree = ET.parse(label)
            imgSize = get_image_size(images[ii])
            area = imgSize[0] * imgSize[1]
            if DEBUG:
                print('\n%s' % images[ii])
                image = cv2.imread(images[ii])
                print('video', vv, 'image', ii)
            for obj in labelTree.findall('object'):
                cls = obj.find('name').text
                assert cls in classes
                classInd = classes[cls]

                occl = int(obj.find('occluded').text)
                trackId = int(obj.find('trackid').text)
                bbox = obj.find('bndbox')
                bbox = [int(bbox.find('xmin').text),
                        int(bbox.find('ymin').text),
                        int(bbox.find('xmax').text),
                        int(bbox.find('ymax').text),
                        vv, trackId, imNum, classInd, occl]

                if DEBUG:
                    print('name', obj.find('name').text, '\n')
                    print(bbox)
                    if trackId not in trackColor:
                        trackColor[trackId] = [random.random() * 255 for _ in range(3)]
                    drawing.drawRect(image, bbox[:4], 3, trackColor[trackId])
                bboxes.append(bbox)
            if DEBUG:
                cv2.imshow('image', image)
                cv2.waitKey(1)

            imNum += 1

    bboxes = np.array(bboxes)
    # Reorder by video_id, then track_id, then video image number so all labels for a single track are next to each other.
    # This only matters if a single image could have multiple tracks.
    order = np.lexsort((bboxes[:,6], bboxes[:,5], bboxes[:,4]))
    bboxes = bboxes[order,:]
    if not DEBUG:
        np.save('labels/' + label_type + '/labels.npy', bboxes)
Esempio n. 3
0
def main(args):
    num_unrolls = args.num_unrolls
    batch_size = args.batch_size
    timing = args.timing
    debug = args.debug or args.output

    device = pt_util.setup_devices(args.device)[0]
    np.set_printoptions(suppress=True)
    np.set_printoptions(precision=4)

    # pool = mp.Pool(min(batch_size, mp.cpu_count()))

    time_str = python_util.get_time_str()
    checkpoint_path = os.path.join(LOG_DIR, "checkpoints")
    if not os.path.exists(checkpoint_path):
        os.makedirs(checkpoint_path)

    train_logger = None
    if not debug:
        tensorboard_dir = os.path.join(LOG_DIR, "tensorboard",
                                       time_str + "_train")
        if not os.path.exists(tensorboard_dir):
            os.makedirs(tensorboard_dir)
        train_logger = tensorboard_logger.Logger(tensorboard_dir)

    data_loader = pt_dataset.get_data_loader(num_unrolls, batch_size,
                                             args.num_threads)
    batch_iter = iter(data_loader)

    network = Re3SmallNet(device, args)
    network.setup_optimizer(1e-5)
    network.to(device)
    network.train()

    start_iter = 0
    if args.restore:
        print("Restoring")
        start_iter = pt_util.restore_from_folder(
            network,
            checkpoint_path,
        )
        print("Restored", start_iter)

    if debug:
        cv2.namedWindow("debug", cv2.WINDOW_NORMAL)
        cv2.resizeWindow("debug", OUTPUT_WIDTH, OUTPUT_HEIGHT)

    try:
        time_total = 0.000001
        num_iters = 0
        iteration = start_iter
        # Run training iterations in the main thread.

        while iteration < args.max_steps:
            if train_logger is not None and iteration % 1000 == 0:
                train_logger.network_conv_summary(network, iteration)
            if iteration == 10000:
                network.update_learning_rate(1e-6)
            if (iteration - 1) % 10 == 0:
                current_time_start = time.time()

            start_solver = time.time()
            # Timers: initial data read time | data time | forward time | backward time | total time
            timers = np.zeros(5)

            try:
                image_sequences = next(batch_iter)
            except StopIteration:
                batch_iter = iter(data_loader)
                image_sequences = next(batch_iter)
            timers[0] = time.time() - start_solver

            outputs = []
            labels = []
            images = []
            noisy_boxes = [None for _ in range(len(image_sequences))]
            mirrored = np.random.random(batch_size) < 0.5
            real_motion = np.random.random(batch_size) < REAL_MOTION_PROB
            use_network_outs = np.random.random(batch_size) < USE_NETWORK_PROB
            lstm_state = None
            network_outs = [None for _ in range(len(image_sequences))]
            for dd in range(num_unrolls):
                batch_images = []
                batch_labels = []
                process_t_start = time.time()

                for ii, vals in enumerate(image_sequences):
                    image_sequence = vals["images"]
                    label_sequence = vals["labels"]
                    image0, image1, xyxy_labels, noisy_box = pt_dataset.get_next_image_crops(
                        image_sequence,
                        label_sequence,
                        dd,
                        noisy_boxes[ii],
                        mirrored[ii],
                        real_motion[ii],
                        network_outs[ii],
                    )
                    batch_images.append((image0, image1))
                    batch_labels.append(xyxy_labels)
                    noisy_boxes[ii] = noisy_box

                images.append(batch_images)
                labels.append(batch_labels)
                image_tensor = pt_util.from_numpy(batch_images)
                timers[1] += time.time() - process_t_start
                forward_t_start = time.time()
                output = network(image_tensor, lstm_state)
                outputs.append(output)
                output = pt_util.to_numpy_array(output)
                for ii in range(batch_size):
                    if use_network_outs[ii]:
                        network_outs[ii] = output[ii]
                lstm_state = network.lstm_state
                timers[2] += time.time() - forward_t_start

            backward_t_start = time.time()
            labels = pt_util.from_numpy(labels)
            network.optimizer.zero_grad()
            outputs = torch.stack(outputs)
            loss_value = network.loss(
                outputs, labels.to(dtype=outputs.dtype, device=network.device))
            loss_value.backward()
            network.optimizer.step()
            loss_value = loss_value.item()
            timers[3] = time.time() - backward_t_start

            end_solver = time.time()
            timers[4] = time.time() - start_solver
            time_total += end_solver - start_solver
            per_image_timers = timers / (num_unrolls * batch_size)

            if train_logger is not None and iteration % 10 == 0:
                train_logger.dict_log(
                    {
                        "losses/loss": loss_value,
                        "stats/data_read_time": timers[0],
                        "stats/data_time": timers[1],
                        "stats/forward_time": timers[2],
                        "stats/backward_time": timers[3],
                        "stats/total_time": timers[4],
                        "per_image_stats/data_read_time": per_image_timers[0],
                        "per_image_stats/data_time": per_image_timers[1],
                        "per_image_stats/forward_time": per_image_timers[2],
                        "per_image_stats/backward_time": per_image_timers[3],
                        "per_image_stats/total_time": per_image_timers[4],
                    },
                    iteration,
                )

            num_iters += 1
            iteration += 1
            if timing and (iteration - 1) % 10 == 0:
                print("Iteration:       %d" % (iteration - 1))
                print("Loss:            %.3f" % loss_value)
                print("Average Time:    %.3f" % (time_total / num_iters))
                print("Current Time:    %.3f" % (end_solver - start_solver))
                if num_iters > 20:
                    print("Current Average: %.3f" %
                          ((time.time() - current_time_start) / 10))
                print("")

            # Save a checkpoint and remove old ones.
            if iteration % 500 == 0 or iteration == args.max_steps:
                pt_util.save(network,
                             LOG_DIR +
                             "/checkpoints/iteration_%07d.pt" % iteration,
                             num_to_keep=1)

            # Every once in a while save a checkpoint that isn't ever removed except by hand.
            if iteration % 10000 == 0 or iteration == args.max_steps:
                pt_util.save(
                    network, LOG_DIR +
                    "/checkpoints/long_checkpoints/iteration_%07d.pt" %
                    iteration)
            if not debug:
                if args.run_val and (num_iters == 1 or iteration % 1000 == 0):
                    # Run a validation set eval in a separate process.
                    def test_func():
                        test_iter_on = iteration
                        print("Staring test iter", test_iter_on)
                        import subprocess
                        import json

                        command = [
                            "python",
                            "test_net.py",
                            "--video-sample-rate",
                            str(10),
                            "--no-display",
                            "-v",
                            str(args.val_device),
                        ]
                        subprocess.call(command)
                        result = json.load(open("results.json", "r"))
                        train_logger.dict_log(
                            {
                                "eval/robustness":
                                result["robustness"],
                                "eval/lost_targets":
                                result["lostTarget"],
                                "eval/mean_iou":
                                result["meanIou"],
                                "eval/avg_measure":
                                (result["meanIou"] + result["robustness"]) / 2,
                            },
                            test_iter_on,
                        )
                        os.remove("results.json")
                        print("Ending test iter", test_iter_on)

                    test_thread = threading.Thread(target=test_func)
                    test_thread.daemon = True
                    test_thread.start()
            if args.output:
                # Look at some of the outputs.
                print("new batch")
                images = (np.array(images).transpose(
                    (1, 0, 2, 3, 4, 5)).reshape(
                        (batch_size, num_unrolls, 2, CROP_SIZE, CROP_SIZE, 3)))
                labels = pt_util.to_numpy_array(labels).transpose(1, 0, 2)
                outputs = pt_util.to_numpy_array(outputs).transpose(1, 0, 2)
                for bb in range(batch_size):
                    for dd in range(num_unrolls):
                        image0 = images[bb, dd, 0, ...]
                        image1 = images[bb, dd, 1, ...]

                        label = labels[bb, dd, :]
                        xyxy_label = label / 10
                        label_box = xyxy_label * CROP_PAD

                        output = outputs[bb, dd, ...]
                        xyxy_pred = output / 10
                        output_box = xyxy_pred * CROP_PAD

                        drawing.drawRect(
                            image0,
                            bb_util.xywh_to_xyxy(
                                np.full((4, 1), 0.5) * CROP_SIZE), 2,
                            [0, 255, 0])
                        drawing.drawRect(image1, xyxy_label * CROP_SIZE, 2,
                                         [0, 255, 0])
                        drawing.drawRect(image1, xyxy_pred * CROP_SIZE, 2,
                                         [255, 0, 0])

                        plots = [image0, image1]
                        subplot = drawing.subplot(plots,
                                                  1,
                                                  2,
                                                  outputWidth=OUTPUT_WIDTH,
                                                  outputHeight=OUTPUT_HEIGHT,
                                                  border=5)
                        cv2.imshow("debug", subplot[:, :, ::-1])
                        cv2.waitKey(0)
    except Exception as e:
        import traceback

        traceback.print_exc()
    finally:
        # Save if error or killed by ctrl-c.
        if not debug:
            print("Saving...")
            pt_util.save(network,
                         LOG_DIR +
                         "/checkpoints/iteration_%07d.pt" % iteration,
                         num_to_keep=-1)
Esempio n. 4
0
def main(FLAGS):
    global PORT, delta, REPLAY_BUFFER_SIZE
    delta = FLAGS.delta
    batchSize = FLAGS.batch_size
    timing = FLAGS.timing
    debug = FLAGS.debug or FLAGS.output
    PORT = FLAGS.port

    os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.cuda_visible_devices)
    np.set_printoptions(suppress=True)
    np.set_printoptions(precision=4)

    # Tensorflow setup
    if not os.path.exists(LOG_DIR):
        os.makedirs(LOG_DIR)
    if not os.path.exists(LOG_DIR + '/checkpoints'):
        os.makedirs(LOG_DIR + '/checkpoints')

    tf.Graph().as_default()
    tf.logging.set_verbosity(tf.logging.INFO)

    sess = tf_util.Session()

    # Create the nodes for single image forward passes for learning to fix mistakes.
    # Parameters here are shared with the learned network.
    if ',' in FLAGS.cuda_visible_devices:
        with tf.device('/gpu:1'):
            forwardNetworkImagePlaceholder = tf.placeholder(
                tf.uint8, shape=(2, CROP_SIZE, CROP_SIZE, 3))
            prevLstmState = tuple([
                tf.placeholder(tf.float32, shape=(1, LSTM_SIZE))
                for _ in range(4)
            ])
            initialLstmState = tuple(
                [np.zeros((1, LSTM_SIZE)) for _ in range(4)])
            networkOutputs, state1, state2 = network.inference(
                forwardNetworkImagePlaceholder,
                num_unrolls=1,
                train=False,
                prevLstmState=prevLstmState,
                reuse=False)
    else:
        forwardNetworkImagePlaceholder = tf.placeholder(tf.uint8,
                                                        shape=(2, CROP_SIZE,
                                                               CROP_SIZE, 3))
        prevLstmState = tuple([
            tf.placeholder(tf.float32, shape=(1, LSTM_SIZE)) for _ in range(4)
        ])
        initialLstmState = tuple([np.zeros((1, LSTM_SIZE)) for _ in range(4)])
        networkOutputs, state1, state2 = network.inference(
            forwardNetworkImagePlaceholder,
            num_unrolls=1,
            train=False,
            prevLstmState=prevLstmState,
            reuse=False)

    tf_dataset_obj = tf_dataset.Dataset(sess,
                                        delta,
                                        batchSize * 2,
                                        PORT,
                                        debug=FLAGS.debug)
    tf_dataset_obj.initialize_tf_placeholders(forwardNetworkImagePlaceholder,
                                              prevLstmState, networkOutputs,
                                              state1, state2)

    tf_dataset_iterator = tf_dataset_obj.get_dataset(batchSize)
    imageBatch, labelsBatch = tf_dataset_iterator.get_next()
    imageBatch = tf.reshape(imageBatch,
                            (batchSize * delta * 2, CROP_SIZE, CROP_SIZE, 3))
    labelsBatch = tf.reshape(labelsBatch, (batchSize * delta, -1))

    learningRate = tf.placeholder(tf.float32)
    imagePlaceholder = tf.placeholder(tf.uint8,
                                      shape=(batchSize, delta * 2, CROP_SIZE,
                                             CROP_SIZE, 3))
    labelPlaceholder = tf.placeholder(tf.float32, shape=(batchSize, delta, 4))

    if ',' in FLAGS.cuda_visible_devices:
        with tf.device('/gpu:0'):
            tfOutputs = network.inference(imageBatch,
                                          num_unrolls=delta,
                                          train=True,
                                          reuse=True)
            tfLossFull, tfLoss = network.loss(tfOutputs, labelsBatch)
            train_op = network.training(tfLossFull, learningRate)
    else:
        tfOutputs = network.inference(imageBatch,
                                      num_unrolls=delta,
                                      train=True,
                                      reuse=True)
        tfLossFull, tfLoss = network.loss(tfOutputs, labelsBatch)
        train_op = network.training(tfLossFull, learningRate)

    loss_summary_op = tf.summary.merge([
        tf.summary.scalar('loss', tfLoss),
        tf.summary.scalar('l2_regularizer', tfLossFull - tfLoss),
    ])

    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    longSaver = tf.train.Saver()

    # Initialize the network and load saved parameters.
    sess.run(init)
    startIter = 0
    if FLAGS.restore:
        print('Restoring')
        startIter = tf_util.restore_from_dir(
            sess, os.path.join(LOG_DIR, 'checkpoints'))
    if not debug:
        tt = time.localtime()
        time_str = ('%04d_%02d_%02d_%02d_%02d_%02d' %
                    (tt.tm_year, tt.tm_mon, tt.tm_mday, tt.tm_hour, tt.tm_min,
                     tt.tm_sec))
        summary_writer = tf.summary.FileWriter(
            LOG_DIR + '/train/' + time_str + '_n_' + str(delta) + '_b_' +
            str(batchSize), sess.graph)
        summary_full = tf.summary.merge_all()
        conv_var_list = [
            v for v in tf.trainable_variables()
            if 'conv' in v.name and 'weight' in v.name and
            (v.get_shape().as_list()[0] != 1 or v.get_shape().as_list()[1] != 1
             )
        ]
        for var in conv_var_list:
            tf_util.conv_variable_summaries(var,
                                            scope=var.name.replace('/',
                                                                   '_')[:-2])
        summary_with_images = tf.summary.merge_all()

    # Logging stuff
    robustness_ph = tf.placeholder(tf.float32, shape=[])
    lost_targets_ph = tf.placeholder(tf.float32, shape=[])
    mean_iou_ph = tf.placeholder(tf.float32, shape=[])
    avg_ph = tf.placeholder(tf.float32, shape=[])
    if FLAGS.run_val:
        val_gpu = None if FLAGS.val_device == '0' else FLAGS.val_device
        test_tracker = re3_tracker.CopiedRe3Tracker(sess, train_vars, val_gpu)
        test_runner = test_net.TestTrackerRunner(test_tracker)
        with tf.name_scope('test'):
            test_summary_op = tf.summary.merge([
                tf.summary.scalar('robustness', robustness_ph),
                tf.summary.scalar('lost_targets', lost_targets_ph),
                tf.summary.scalar('mean_iou', mean_iou_ph),
                tf.summary.scalar('avg_iou_robustness', avg_ph),
            ])

    if debug:
        cv2.namedWindow('debug', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('debug', OUTPUT_WIDTH, OUTPUT_HEIGHT)

    sess.graph.finalize()

    try:
        timeTotal = 0.000001
        numIters = 0
        iteration = startIter
        # Run training iterations in the main thread.
        while iteration < FLAGS.max_steps:
            if (iteration - 1) % 10 == 0:
                currentTimeStart = time.time()

            startSolver = time.time()
            if debug:
                _, outputs, lossValue, images, labels, = sess.run(
                    [train_op, tfOutputs, tfLoss, imageBatch, labelsBatch],
                    feed_dict={learningRate: LEARNING_RATE})
                debug_feed_dict = {
                    imagePlaceholder: images,
                    labelPlaceholder: labels,
                }
            else:
                if iteration % 10 == 0:
                    _, lossValue, loss_summary = sess.run(
                        [train_op, tfLoss, loss_summary_op],
                        feed_dict={learningRate: LEARNING_RATE})
                    summary_writer.add_summary(loss_summary, iteration)
                else:
                    _, lossValue = sess.run(
                        [train_op, tfLoss],
                        feed_dict={learningRate: LEARNING_RATE})
            endSolver = time.time()

            numIters += 1
            iteration += 1

            timeTotal += (endSolver - startSolver)
            if timing and (iteration - 1) % 10 == 0:
                print('Iteration:       %d' % (iteration - 1))
                print('Loss:            %.3f' % lossValue)
                print('Average Time:    %.3f' % (timeTotal / numIters))
                print('Current Time:    %.3f' % (endSolver - startSolver))
                if numIters > 20:
                    print('Current Average: %.3f' %
                          ((time.time() - currentTimeStart) / 10))
                print('')

            # Save a checkpoint and remove old ones.
            if iteration % 500 == 0 or iteration == FLAGS.max_steps:
                checkpoint_file = os.path.join(LOG_DIR, 'checkpoints',
                                               'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=iteration)
                print("Saving checkpoint at " + checkpoint_file)
                if FLAGS.clearSnapshots:
                    files = glob.glob(LOG_DIR + '/checkpoints/*')
                    for file in files:
                        basename = os.path.basename(file)
                        if os.path.isfile(file) and str(
                                iteration
                        ) not in file and 'checkpoint' not in basename:
                            os.remove(file)
            # Every once in a while save a checkpoint that isn't ever removed except by hand.
            if iteration % 10000 == 0 or iteration == FLAGS.max_steps:
                if not os.path.exists(LOG_DIR +
                                      '/checkpoints/long_checkpoints'):
                    os.makedirs(LOG_DIR + '/checkpoints/long_checkpoints')
                checkpoint_file = os.path.join(LOG_DIR,
                                               'checkpoints/long_checkpoints',
                                               'model.ckpt')
                longSaver.save(sess, checkpoint_file, global_step=iteration)
            if not debug:
                if (numIters == 1 or iteration % 100 == 0
                        or iteration == FLAGS.max_steps):
                    # Write out the full graph sometimes.
                    if (numIters == 1 or iteration == FLAGS.max_steps):
                        print('Running detailed summary')
                        run_options = tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE)
                        run_metadata = tf.RunMetadata()
                        _, summary_str = sess.run(
                            [train_op, summary_with_images],
                            options=run_options,
                            run_metadata=run_metadata,
                            feed_dict={learningRate: LEARNING_RATE})
                        summary_writer.add_run_metadata(
                            run_metadata, 'step_%07d' % iteration)
                    elif iteration % 1000 == 0:
                        _, summary_str = sess.run(
                            [train_op, summary_with_images],
                            feed_dict={learningRate: LEARNING_RATE})
                        print('Running image summary')
                    else:
                        print('Running summary')
                        _, summary_str = sess.run(
                            [train_op, summary_full],
                            feed_dict={learningRate: LEARNING_RATE})
                    summary_writer.add_summary(summary_str, iteration)
                    summary_writer.flush()
                if (FLAGS.run_val and (numIters == 1 or iteration % 500 == 0)):
                    # Run a validation set eval in a separate thread.
                    def test_func(test_iter_on):
                        print('Starting test iter', test_iter_on)
                        test_runner.reset()
                        result = test_runner.run_test(
                            dataset=FLAGS.val_dataset, display=False)
                        summary_str = sess.run(
                            test_summary_op,
                            feed_dict={
                                robustness_ph:
                                result['robustness'],
                                lost_targets_ph:
                                result['lostTarget'],
                                mean_iou_ph:
                                result['meanIou'],
                                avg_ph:
                                (result['meanIou'] + result['robustness']) / 2,
                            })
                        summary_writer.add_summary(summary_str, test_iter_on)
                        os.remove('results.json')
                        print('Ending test iter', test_iter_on)

                    test_thread = threading.Thread(target=test_func,
                                                   args=(iteration, ))
                    test_thread.start()
            if FLAGS.output:
                # Look at some of the outputs.
                print('new batch')
                images = debug_feed_dict[imagePlaceholder].astype(
                    np.uint8).reshape(
                        (batchSize, delta, 2, CROP_SIZE, CROP_SIZE, 3))
                labels = debug_feed_dict[labelPlaceholder].reshape(
                    (batchSize, delta, 4))
                outputs = outputs.reshape((batchSize, delta, 4))
                for bb in range(batchSize):
                    for dd in range(delta):
                        image0 = images[bb, dd, 0, ...]
                        image1 = images[bb, dd, 1, ...]

                        label = labels[bb, dd, :]
                        xyxyLabel = label / 10
                        labelBox = xyxyLabel * CROP_PAD

                        output = outputs[bb, dd, ...]
                        xyxyPred = output / 10
                        outputBox = xyxyPred * CROP_PAD

                        drawing.drawRect(
                            image0,
                            bb_util.xywh_to_xyxy(
                                np.full((4, 1), .5) * CROP_SIZE), 2,
                            [255, 0, 0])
                        drawing.drawRect(image1, xyxyLabel * CROP_SIZE, 2,
                                         [0, 255, 0])
                        drawing.drawRect(image1, xyxyPred * CROP_SIZE, 2,
                                         [255, 0, 0])

                        plots = [image0, image1]
                        subplot = drawing.subplot(plots,
                                                  1,
                                                  2,
                                                  outputWidth=OUTPUT_WIDTH,
                                                  outputHeight=OUTPUT_HEIGHT,
                                                  border=5)
                        cv2.imshow('debug', subplot[:, :, ::-1])
                        cv2.waitKey(1)
    except:
        # Save if error or killed by ctrl-c.
        if not debug:
            print('Saving...')
            checkpoint_file = os.path.join(LOG_DIR, 'checkpoints',
                                           'model.ckpt')
            saver.save(sess, checkpoint_file, global_step=iteration)
        raise
Esempio n. 5
0
    def get_data_sequence(self):
        try:
            # Preallocate the space for the images and labels.
            tImage = np.zeros((self.delta, 2, CROP_SIZE, CROP_SIZE, 3),
                              dtype=np.uint8)
            xywhLabels = np.zeros((self.delta, 4), dtype=np.float32)

            mirrored = random.random() < 0.5
            useSimulator = random.random() < USE_SIMULATOR
            gtType = random.random()
            realMotion = random.random() < REAL_MOTION_PROB

            # Initialize first frame (give the network context).
            if useSimulator:
                # Initialize the simulation and run through a few frames.
                trackingObj, trackedObjects, background = simulator.create_new_track(
                )
                for _ in range(random.randint(0, 200)):
                    simulator.step(trackedObjects)
                    bbox = trackingObj.get_object_box()
                    occlusion = simulator.measure_occlusion(
                        bbox, trackingObj.occluder_boxes, cropPad=1)
                    if occlusion > .2:
                        break
                for _ in range(1000):
                    bbox = trackingObj.get_object_box()
                    occlusion = simulator.measure_occlusion(
                        bbox, trackingObj.occluder_boxes, cropPad=1)
                    if occlusion < 0.01:
                        break
                    simulator.step(trackedObjects)
                initBox = trackingObj.get_object_box()
                if self.debug:
                    images = [
                        simulator.get_image_for_frame(trackedObjects,
                                                      background)
                    ]
                else:
                    images = [np.zeros((SIMULATION_HEIGHT, SIMULATION_WIDTH))]

            else:
                # Read a new data sequence from batch cache and get the ground truth.
                (batchKey, images) = self.getData()
                gtKey = batchKey
                imageIndex = self.key_lookup[gtKey]
                initBox = self.datasets[gtKey[0]][imageIndex, :4].copy()
            if self.debug:
                bboxes = []
                cropBBoxes = []

            # bboxPrev starts at the initial box and is the best guess (or gt) for the image0 location.
            # noisyBox holds the bboxPrev estimate plus some noise.
            bboxPrev = initBox
            lstmState = None

            for dd in range(self.delta):
                # bboxOn is the gt location in image1
                if useSimulator:
                    bboxOn = trackingObj.get_object_box()
                else:
                    newKey = list(gtKey)
                    newKey[3] += dd
                    newKey = tuple(newKey)
                    imageIndex = self.key_lookup[newKey]
                    bboxOn = self.datasets[newKey[0]][imageIndex, :4].copy()
                if dd == 0:
                    noisyBox = bboxOn.copy()
                elif not realMotion and not useSimulator and gtType >= USE_NETWORK_PROB:
                    noisyBox = self.add_noise(bboxOn, bboxOn,
                                              images[0].shape[1],
                                              images[0].shape[0])
                else:
                    noisyBox = self.fix_bbox_intersection(
                        bboxPrev, bboxOn, images[0].shape[1],
                        images[0].shape[0])

                if useSimulator:
                    patch = simulator.render_patch(bboxPrev, background,
                                                   trackedObjects)
                    tImage[dd, 0, ...] = patch
                    if dd > 0:
                        simulator.step(trackedObjects)
                        bboxOn = trackingObj.get_object_box()
                        noisyBox = self.fix_bbox_intersection(
                            bboxPrev, bboxOn, images[0].shape[1],
                            images[0].shape[0])
                else:
                    tImage[dd, 0, ...] = im_util.get_cropped_input(
                        images[max(dd - 1, 0)], bboxPrev, CROP_PAD,
                        CROP_SIZE)[0]

                if useSimulator:
                    patch = simulator.render_patch(noisyBox, background,
                                                   trackedObjects)
                    tImage[dd, 1, ...] = patch
                    if self.debug:
                        images.append(
                            simulator.get_image_for_frame(
                                trackedObjects, background))
                else:
                    tImage[dd, 1, ...] = im_util.get_cropped_input(
                        images[dd], noisyBox, CROP_PAD, CROP_SIZE)[0]

                shiftedBBox = bb_util.to_crop_coordinate_system(
                    bboxOn, noisyBox, CROP_PAD, 1)
                shiftedBBoxXYWH = bb_util.xyxy_to_xywh(shiftedBBox)
                xywhLabels[dd, :] = shiftedBBoxXYWH

                if gtType < USE_NETWORK_PROB:
                    # Run through a single forward pass to get the next box estimate.
                    if dd < self.delta - 1:
                        if dd == 0:
                            lstmState = self.initialLstmState

                        feed_dict = {
                            self.forwardNetworkImagePlaceholder: tImage[dd,
                                                                        ...],
                            self.prevLstmState: lstmState
                        }
                        networkOuts, s1, s2 = self.sess.run(
                            [self.networkOutputs, self.state1, self.state2],
                            feed_dict=feed_dict)
                        lstmState = (s1[0], s1[1], s2[0], s2[1])

                        xyxyPred = networkOuts.squeeze() / 10
                        outputBox = bb_util.from_crop_coordinate_system(
                            xyxyPred, noisyBox, CROP_PAD, 1)

                        bboxPrev = outputBox
                        if self.debug:
                            bboxes.append(outputBox)
                            cropBBoxes.append(xyxyPred)
                else:
                    bboxPrev = bboxOn

                if self.debug:
                    # Look at the inputs to make sure they are correct.
                    image0 = tImage[dd, 0, ...].copy()
                    image1 = tImage[dd, 1, ...].copy()

                    xyxyLabel = bb_util.xywh_to_xyxy(
                        xywhLabels[dd, :].squeeze())
                    print('xyxy raw', xyxyLabel, 'actual',
                          xyxyLabel * CROP_PAD)
                    label = np.zeros((CROP_PAD, CROP_PAD))
                    drawing.drawRect(label, xyxyLabel * CROP_PAD, 0, 1)
                    drawing.drawRect(
                        image0,
                        bb_util.xywh_to_xyxy(np.full((4, 1), .5) * CROP_SIZE),
                        2, [255, 0, 0])
                    bigImage0 = images[max(dd - 1, 0)].copy()
                    bigImage1 = images[dd].copy()
                    if dd < len(cropBBoxes):
                        drawing.drawRect(bigImage1, bboxes[dd], 5, [255, 0, 0])
                        drawing.drawRect(image1, cropBBoxes[dd] * CROP_SIZE, 1,
                                         [0, 255, 0])
                        print('pred raw', cropBBoxes[dd], 'actual',
                              cropBBoxes[dd] * CROP_PAD)
                    print('\n')

                    label[0, 0] = 1
                    label[0, 1] = 0
                    plots = [bigImage0, bigImage1, image0, image1]
                    subplot = drawing.subplot(plots,
                                              2,
                                              2,
                                              outputWidth=OUTPUT_WIDTH,
                                              outputHeight=OUTPUT_HEIGHT,
                                              border=5)
                    cv2.imshow('debug', subplot[:, :, ::-1])
                    cv2.waitKey(1)

            if mirrored:
                tImage = np.fliplr(tImage.transpose(2, 3, 4, 0, 1)).transpose(
                    3, 4, 0, 1, 2)
                xywhLabels[..., 0] = 1 - xywhLabels[..., 0]

            tImage = tImage.reshape([self.delta * 2] + list(tImage.shape[2:]))
            xyxyLabels = bb_util.xywh_to_xyxy(xywhLabels.T).T * 10
            xyxyLabels = xyxyLabels.astype(np.float32)
            return tImage, xyxyLabels
        except Exception as e:
            import traceback
            traceback.print_exc()
            import pdb
            pdb.set_trace()
            print('exception')
Esempio n. 6
0
def main(dataset_path, label_type):
    wildcard = "/*/*/" if label_type == "train" else "/*/"
    #dataset_path = "data/ILSVRC2015/"
    #dataset_path = sys.argv[1]
    annotationPath = dataset_path + "Annotations/"
    imagePath = dataset_path + "Data/"

    if not DEBUG:
        if not os.path.exists(os.path.join("labels", label_type)):
            os.makedirs(os.path.join("labels", label_type))
        imageNameFile = open("labels/" + label_type + "/image_names.txt", "w")

    videos = sorted(glob.glob(annotationPath + "VID/" + label_type + wildcard))

    bboxes = []
    imNum = 0
    totalImages = len(
        glob.glob(annotationPath + "VID/" + label_type + wildcard + "*.xml"))
    print("totalImages", totalImages)
    classes = {
        "n01674464": 1,
        "n01662784": 2,
        "n02342885": 3,
        "n04468005": 4,
        "n02509815": 5,
        "n02084071": 6,
        "n01503061": 7,
        "n02324045": 8,
        "n02402425": 9,
        "n02834778": 10,
        "n02419796": 11,
        "n02374451": 12,
        "n04530566": 13,
        "n02118333": 14,
        "n02958343": 15,
        "n02510455": 16,
        "n03790512": 17,
        "n02391049": 18,
        "n02121808": 19,
        "n01726692": 20,
        "n02062744": 21,
        "n02503517": 22,
        "n02691156": 23,
        "n02129165": 24,
        "n02129604": 25,
        "n02355227": 26,
        "n02484322": 27,
        "n02411705": 28,
        "n02924116": 29,
        "n02131653": 30,
    }

    for vv, video in enumerate(videos):
        labels = sorted(glob.glob(video + "*.xml"))
        images = [
            label.replace("Annotations", "Data").replace("xml", "JPEG")
            for label in labels
        ]
        trackColor = dict()
        for ii, imageName in enumerate(images):
            if imNum % 100 == 0:
                print("imNum %d of %d = %.2f%%" %
                      (imNum, totalImages, imNum * 100.0 / totalImages))
            if not DEBUG:
                # Leave off initial bit of path so we can just add parent dir to path later.
                imageNameFile.write(imageName + "\n")
            label = labels[ii]
            labelTree = ET.parse(label)
            imgSize = get_image_size(images[ii])
            area = imgSize[0] * imgSize[1]
            if DEBUG:
                print("\n%s" % images[ii])
                image = cv2.imread(images[ii])
                print("video", vv, "image", ii)
            for obj in labelTree.findall("object"):
                cls = obj.find("name").text
                assert cls in classes
                classInd = classes[cls]

                occl = int(obj.find("occluded").text)
                trackId = int(obj.find("trackid").text)
                bbox = obj.find("bndbox")
                bbox = [
                    int(bbox.find("xmin").text),
                    int(bbox.find("ymin").text),
                    int(bbox.find("xmax").text),
                    int(bbox.find("ymax").text),
                    vv,
                    trackId,
                    imNum,
                    classInd,
                    occl,
                ]

                if DEBUG:
                    print("name", obj.find("name").text, "\n")
                    print(bbox)
                    if trackId not in trackColor:
                        trackColor[trackId] = [
                            random.random() * 255 for _ in xrange(3)
                        ]
                    drawing.drawRect(image, bbox[:4], 3, trackColor[trackId])
                bboxes.append(bbox)
            if DEBUG:
                cv2.imshow("image", image)
                cv2.waitKey(1)

            imNum += 1

    bboxes = np.array(bboxes)
    # Reorder by video_id, then track_id, then video image number so all labels for a single track are next to each other.
    # This only matters if a single image could have multiple tracks.
    order = np.lexsort((bboxes[:, 6], bboxes[:, 5], bboxes[:, 4]))
    bboxes = bboxes[order, :]
    if not DEBUG:
        np.save("labels/" + label_type + "/labels.npy", bboxes)
Esempio n. 7
0
            drawing.drawRect(newImage, scale_bbox(prevLoc, CROP_PAD), 3,
                             [0, 255, 0])
            cv2.imwrite('images_full/%07d.png' % seqInd, newImage[:, :, ::-1])
            seqInd += 1
        step(trackedObjects)
    return sequence


if __name__ == '__main__':
    set_seed(0)
    from re3_utils.util import drawing
    NUM_SEQUENCES = 10
    SEQUENCE_LENGTH = 200

    if not os.path.exists('images'):
        os.mkdir('images')
        os.mkdir('images_full')
    make_paths()
    times = []
    imCount = 0

    for xx in range(NUM_SEQUENCES):
        startTime = time.time()
        sequence = get_image_sequence(SEQUENCE_LENGTH, imCount, True)
        times.append((time.time() - startTime) / SEQUENCE_LENGTH)
        for (image, bbox) in sequence:
            imCount += 1
            drawing.drawRect(image, bbox, 1, [255, 0, 0])
            cv2.imwrite('images/%07d.png' % imCount, image[:, :, ::-1])
        print('average time per frame %.5f' % np.mean(times))