Beispiel #1
0
def reset_session(args):
    hem.message('Resetting variables...')
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    hem.message('Restoring checkpoint...')
    latest = tf.train.latest_checkpoint(args.dir)
    print(latest)
    saver.restore(sess, latest)
Beispiel #2
0
def calculate_metrics(dataset_name, dataset_handle, n_batches):
    reset_session(args)
    hem.message('Calculating metrics for {} set...'.format(dataset_name))
    # accumulate results
    results = sess.run([m1, m2, y, y2], feed_dict={handle_placeholder: dataset_handle})
    g_metrics = Counter(results[0]) + Counter(results[1])
    mean_image = np.concatenate((results[2], results[3]), axis=0)
    for i in range(n_batches - 1):
        results = sess.run([m1, m2, y, y2], feed_dict={handle_placeholder: dataset_handle})
        g_metrics = g_metrics + Counter(results[0]) + Counter(results[1])
        mean_image = np.concatenate((mean_image, results[2], results[3]), axis=0)
    # average metrics
    n = n_batches * 2
    hem.message('Model metrics:')
    for k in ['t1', 't2', 't3', 'abs_rel_diff', 'squared_rel_diff', 'linear_rmse', 'log_rmse', 'scale_invariant_log_rmse']:
        print('\t{}: {:.3f}'.format(k, g_metrics[k]/n))
    # process mean image
    mean_image = np.mean(mean_image, axis=0)
    mean_depth, mean_depth_colorized = colorize_depthmap(mean_image)
    # print('IMAGE:', os.path.join(args.dir, 'metrics', 'test_mean.png'))
    cv2.imwrite(os.path.join(args.dir, 'metrics', '{}_mean.png'.format(dataset_name)), mean_depth)
    cv2.imwrite(os.path.join(args.dir, 'metrics', '{}_mean_colorized.png'.format(dataset_name)), mean_depth_colorized)

    # calculate metrics using mean dataset depth
    mean_image_batch = np.stack([mean_image]*args.batch_size, axis=0)
    results = sess.run([m_mean_1, m_mean_2], feed_dict={handle_placeholder: dataset_handle,
                                                        mean_image_placeholder: mean_image_batch})
    mean_metrics = Counter(results[0]) + Counter(results[1])
    for i in range(n_batches - 1):
        results = sess.run([m_mean_1, m_mean_2], feed_dict={handle_placeholder: dataset_handle,
                                                            mean_image_placeholder: mean_image_batch})
        mean_metrics = mean_metrics + Counter(results[0]) + Counter(results[1])
    hem.message('Mean metrics:')
    for k in ['t1', 't2', 't3', 'abs_rel_diff', 'squared_rel_diff', 'linear_rmse', 'log_rmse', 'scale_invariant_log_rmse']:
        print('\t{}: {:.3f}'.format(k, mean_metrics[k]/n))

    # calculate metrics using g = 0
    results = sess.run([m_g0_1, m_g0_2], feed_dict={handle_placeholder: dataset_handle,
                                                    mean_image_placeholder: zero_image_batch})
    # print(results)
    zero_metrics = Counter(results[0]) + Counter(results[1])
    for i in range(n_batches - 1):
        results = sess.run([m_g0_1, m_g0_2], feed_dict={handle_placeholder: dataset_handle,
                                                            mean_image_placeholder: zero_image_batch})
        # print(results)
        zero_metrics = zero_metrics + Counter(results[0]) + Counter(results[1])
    hem.message('Zero metrics:')
    for k in ['t1', 't2', 't3', 'abs_rel_diff', 'squared_rel_diff', 'linear_rmse', 'log_rmse', 'scale_invariant_log_rmse']:
        print('\t{}: {:.3f}'.format(k, zero_metrics[k]/n))
Beispiel #3
0
    y_hat = g
    # y_0 = g_0
    return x, y, g, y_hat, y_bar

def cgan_mean_nodes(tower=0):
    x = graph.as_graph_element('tower_{}/input_preprocess/Reshape'.format(tower)).outputs[0]
    y = graph.as_graph_element('tower_{}/input_preprocess/Reshape_1'.format(tower)).outputs[0]
    y_bar = graph.as_graph_element('tower_{}/input_preprocess/Mean'.format(tower)).outputs[0]
    # g_0 = graph.as_graph_element('tower_{}/generator/zeros_like'.format(tower)).outputs[0]
    g = graph.as_graph_element('tower_{}/generator/decoder/transpose_1'.format(tower)).outputs[0]
    y_hat = graph.as_graph_element('tower_{}/generator/add'.format(tower)).outputs[0]
    # y_0 = graph.as_graph_element('tower_{}/generator/add_1'.format(tower)).outputs[0]
    return x, y, g, y_hat, y_bar


hem.message('Parsing arguments...')
args = hem.parse_args()

hem.message('Loading metafile and graph data...')
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
saver = tf.train.import_meta_graph(os.path.join(args.dir, 'checkpoint-50.meta'))
graph = tf.get_default_graph()

hem.message('Loading dataset...')
x, handle, iterators = hem.get_dataset_tensors(args)
sess.run(iterators['train']['x'].initializer)
sess.run(iterators['validate']['x'].initializer)
train_handle = sess.run(iterators['train']['handle'])
validate_handle = sess.run(iterators['validate']['handle'])
handle_placeholder = graph.as_graph_element('input_pipeline/Placeholder').outputs[0]
# mean_image_placeholder = graph.as_graph_element('Placeholder').outputs[0]
Beispiel #4
0
def parse_args(display=False):
    # parse command line arguments
    ######################################################################
    parser = hem.CustomArgumentParser(
        description='Autoencoder training harness.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        fromfile_prefix_chars='@',
        conflict_handler='resolve',
        epilog="""Example: 
                                      python train.py @path/to/config_file 
                                      --dir workspace/model_test 
                                      --lr 0.1""")
    parser._action_groups.pop()

    data_args = parser.add_argument_group('Data')
    optimizer_args = parser.add_argument_group('Optimizer')
    train_args = parser.add_argument_group('Training')
    misc_args = parser.add_argument_group('Miscellaneous')

    # TODO add support for specifying additional directories for data and model plugins

    # misc settings
    add = misc_args.add_argument
    add('--seed',
        type=int,
        help="Useful for debugging. Randomized each execution if not set.")
    add('--n_gpus',
        type=int,
        default=1,
        help="""Number of GPUs to use for simultaneous training. Model will be 
        duplicated on each device and results averaged on CPU.""")
    add('--profile',
        default=False,
        action='store_true',
        help="""Enables runtime metadata collection during training that is 
        viewable in TensorBoard.""")
    add('--check_numerics',
        default=False,
        action='store_true',
        help=
        """Enables numeric checks for nan/inf in gradients for more detailed 
        error reporting.""")
    add('--model',
        type=lambda s: s.lower(),
        default='fc',
        help="Name of model to train.")

    # training settings
    add = train_args.add_argument
    add('--epochs',
        default='3',
        help="""Number of epochs to train for during this run. Use an integer to
        denote the max number of epochs to train for, or `+n` for an 
        additional n epochs from a saved checkpoint.""")
    add('--batch_size',
        type=int,
        default=256,
        help="Batch size to use, per device.")
    add('--epoch_size',
        type=int,
        default=-1,
        help="""Number of iterations to use per epoch. Defaults to using the 
        entire dataset.""")
    add('--dir',
        type=str,
        default='workspace/{}'.format(uuid.uuid4()),
        help=
        """Location to store checkpoints, logs, etc. If this location is populated 
        by a previous run then training will be continued from last checkpoint."""
        )
    add('--max_to_keep',
        type=int,
        default=0,
        help=
        """Max (most recent) number of saved sessions to keep, once per epoch. 
                    Set to 0 to keep every one.""")
    add('--test_epochs',
        nargs='*',
        default=[],
        type=int,
        help=
        """List of epochs where the model should be run against the Test dataset.
                    Leave blank to run at the end of training (--epochs argument)."""
        )

    # optimizer settings
    add = optimizer_args.add_argument
    add('--optimizer',
        type=lambda s: s.lower(),
        default='rmsprop',
        help="Optimizer to use during training.")
    add('--lr',
        type=float,
        default=0.001,
        help="Learning rate of optimizer (if supported).")
    add('--loss',
        type=lambda s: s.lower(),
        default='l1',
        help="Loss function used by model during training (if supported).")
    add('--momentum',
        type=float,
        default=0.01,
        help="Momentum value used by optimizer (if supported).")
    add('--decay',
        type=float,
        default=0.9,
        help="Decay value used by optimizer (if supported).")
    add('--centered',
        default=False,
        action='store_true',
        help="Enables centering in RMSProp optimizer.")
    add('--beta1',
        type=float,
        default=0.9,
        help="Value for optimizer's beta_1 (if supported).")
    add('--beta2',
        type=float,
        default=0.999,
        help="Value for optimizer's beta_2 (if supported).")

    # data/pipeline settings
    add = data_args.add_argument
    add('--dataset',
        type=lambda s: s.lower(),
        default='floorplan',
        help="Name of dataset to use.")
    add('--shuffle',
        default=True,
        action='store_true',
        help="""Set this to shuffle the dataset every epoch.""")
    add('--buffer_size',
        type=int,
        default=10000,
        help="""Size of the data buffer.""")
    add('--cache_dir',
        default=None,
        help="""Cache dataset to the directory specified. If not provided, 
        will attempt to cache to memory.""")
    add('--raw_dataset_dir',
        default='/tmp',
        help="Location of raw dataset files, if needed")
    add('--dataset_dir',
        default='datasets',
        help="Location of prepared tfrecord files for the requested dataset.")
    add('--n_threads',
        type=int,
        default=multiprocessing.cpu_count(),
        help="""Number of threads to use for processing datasets.""")

    # parse main/general arguments
    args, leftover_args = parser.parse_known_args()
    # parse dataset-specific arguments
    for k, v in hem.get_dataset(args.dataset).arguments().items():
        parser.add_argument(k, **v)
    args, leftover_args = parser.parse_known_args(leftover_args,
                                                  namespace=args)

    # parse model-specific arguments
    model = hem.get_model(args.model)
    for k, v in model.arguments().items():
        parser.add_argument(k, **v)
    args, leftover_args = parser.parse_known_args(leftover_args,
                                                  namespace=args)
    if len(leftover_args) > 0:
        hem.message(
            'WARNING: unknown and unused arguments provided: {}'.format(
                leftover_args),
            format_style=hem.WARNING)

    # set seed (useful for debugging purposes)
    if args.seed is None:
        args.seed = os.urandom(4)
    random.seed(args.seed)

    if display:
        for a in vars(args):
            v = getattr(args, a)
            print('    {} = {}'.format(a, v))

    return args
Beispiel #5
0
def train(model, iterators, handle, sv, args, reset=False):

    try:
        checkpoint_path = os.path.join(args.dir, 'checkpoint')
        losses = hem.collection_to_dict(tf.get_collection('losses'))

        with sv.sv.managed_session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # summary_train_writer.add_graph(sess.graph, global_step=global_step)
            # initialize
            start_time = time.time()
            if reset:
                sess.run(sv.reset_global_step)
                sess.run(sv.reset_global_epoch)
            current_step = int(sess.run(sv.global_step))
            current_epoch = int(sess.run(sv.global_epoch))

            # set max epochs based on +n or n format
            max_epochs = current_epoch + int(
                args.epochs[1:]) if '+' in args.epochs else int(args.epochs)
            # initialize datasets
            for k, v in iterators.items():
                sess.run(iterators[k]['x'].initializer)
            # get handles for datasets
            training_handle = sess.run(iterators['train']['handle'])
            validation_handle = sess.run(iterators['validate']['handle'])
            if 'test' in iterators and iterators['test']['handle'] is not None:
                test_handle = sess.run(iterators['test']['handle'])

            # save model params before any training has been done
            if current_step == 0:
                hem.message('Generating baseline summaries and checkpoint...')
                sv.sv.saver.save(sess,
                                 save_path=checkpoint_path,
                                 global_step=sv.global_step)
                sv.summary_writers['train'].add_summary(
                    sess.run(sv.summary_op,
                             feed_dict={handle: validation_handle}),
                    global_step=sess.run(sv.global_step))

            hem.message('Starting training...')
            for epoch in range(current_epoch, max_epochs):
                prog_bar = tqdm(range(iterators['train']['batches']),
                                desc='Epoch {:3d}'.format(epoch + 1),
                                unit='batch')
                running_total = None
                for i in prog_bar:
                    # train and display status
                    status = model.train(sess, args, {handle: training_handle})
                    hem.update_moving_average(status, running_total, prog_bar)
                    # record 10 extra summaries (per epoch) in the first 3 epochs
                    if epoch < 3 and i % int(
                        (iterators['train']['batches'] / 10)) == 0:
                        sv.summary_writers['train'].add_summary(
                            sess.run(sv.summary_op,
                                     feed_dict={handle: training_handle}),
                            global_step=sess.run(sv.global_step))
                    elif epoch >= 3 and i % int(
                        (iterators['train']['batches'] / 3)) == 0:
                        sv.summary_writers['train'].add_summary(
                            sess.run(sv.summary_op,
                                     feed_dict={handle: training_handle}),
                            global_step=sess.run(sv.global_step))
                    sess.run(sv.increment_global_step)
                    # print('global step:', sess.run(sv.global_step))

                # update epoch count
                sess.run(sv.increment_global_epoch)
                current_epoch = int(sess.run(sv.global_epoch))
                # generate end-of-epoch summaries
                sv.summary_writers['train'].add_summary(
                    sess.run(sv.summary_op,
                             feed_dict={handle: training_handle}),
                    global_step=sess.run(sv.global_step))

                # save checkpoint
                sv.sv.saver.save(sess,
                                 save_path=checkpoint_path,
                                 global_step=sv.global_epoch)
                # perform validation
                hem.inference(sess, losses, sv.summary_op,
                              iterators['validate']['batches'], handle,
                              validation_handle, 'Validation',
                              sv.summary_writers['validate'], sv.global_step)
                # perform testing, if asked
                if (epoch + 1) in args.test_epochs:
                    hem.inference(sess, losses, iterators['test']['batches'],
                                  handle, test_handle, 'Test',
                                  sv.summary_writers['test'], sv.global_step)

            hem.message('\nTraining complete! Elapsed time: {}s'.format(
                int(time.time() - start_time)))

    except Exception as e:
        print('Caught unexpected exception during training:', e, e.message)
        sys.exit(-1)
Beispiel #6
0
# vargs = {'dir': 'hi',
#          'epochs': 100,
#          'batch_size': 512,
#          'epoch_size': -1,
#          'max_to_keep': 0,
#          'n_gpus': 2,
#          'n_threads': 6,
#          }

# TODO Use tf.tile to duplicate dataset into two branches, one for estimator and one for GAN

# TODO
# 1. argument parsing only parses one model and discards the remaining ones
# 2.

hem.message('Welcome to Hem')
hem.message('Initializing...')
args = hem.parse_args(display=True)
hem.init_working_dir(args)
vargs = vars(args)

hem.message('Initializing dataset...')
x, handle, iterators = hem.get_dataset_tensors(args)

hem.message('Initializing model...')
estimator_model = hem.get_model('mean_depth_estimator')(x, args)

vargs['g_arch'] = 'E2'
vargs['d_arch'] = 'E2'
sampler_model = hem.get_model('experimental_sampler')(x, estimator_model, args)
Beispiel #7
0
def process_example(scene,
                    frame,
                    g,
                    y_hat,
                    args,
                    x_stride=10,
                    y_stride=10,
                    save_images=True):
    path = '/mnt/research/datasets/nyuv2/preprocessed/' + scene + '/' + frame
    # path = os.path.join('/mnt/research/datasets/nyuv2/preprocessed/', scene, frame)
    # read in originals
    i, d = read_originals(path)
    # i, d = read_originals('/mnt/research/datasets/nyuv2/preprocessed/kitchen_0025/scene_281')
    name = scene + "_" + frame
    original_image, original_depth = write_to_disk(i, d, name, args,
                                                   save_images)

    # build up the batches to feed in
    hem.message('building patches...')
    image_batch = build_batch(i, x_stride=x_stride, y_stride=y_stride)
    depth_batch = build_batch(d,
                              x_stride=x_stride,
                              y_stride=y_stride,
                              channels=1)

    hem.message('generating results...')
    g_results = forward_inference(g, image_batch, depth_batch)
    y_hat_results = forward_inference(y_hat, image_batch, depth_batch)
    # g_results, y_hat_results = forward_inference(g, y_hat, image_batch, depth_batch)
    reconstructed_image_g, reconstructed_depth_g = reconstruct(
        image_batch, g_results, x_stride=x_stride, y_stride=y_stride)
    reconstructed_image_y_hat, reconstructed_depth_y_hat = reconstruct(
        image_batch, y_hat_results, x_stride=x_stride, y_stride=y_stride)

    # reconstructed image
    reconstructed_image = reconstructed_image_g * 255.0
    if save_images:
        cv2.imwrite(
            os.path.join(args.dir, 'images',
                         name + '_reconstructed_image.png'),
            reconstructed_image)
    # variance map
    reconstructed_var = (reconstructed_depth_g - reconstructed_depth_g.min()
                         ) / (reconstructed_depth_g.max() -
                              reconstructed_depth_g.min()) * 10.0
    reconstructed_var = reconstructed_var / 10.0 * 255.0
    reconstructed_var = reconstructed_var.astype(np.uint8)
    # reconstructed_var = cv2.applyColorMap(reconstructed_var, cv2.COLORMAP_BONE)
    if save_images:
        cv2.imwrite(
            os.path.join(args.dir, 'images',
                         name + '_reconstructed_variance.png'),
            reconstructed_var)
    # depth map
    reconstructed_depth = reconstructed_depth_y_hat / 10.0 * 255.0
    reconstructed_depth = reconstructed_depth.astype(np.uint8)
    reconstructed_depth = cv2.applyColorMap(reconstructed_depth,
                                            cv2.COLORMAP_JET)
    if save_images:
        cv2.imwrite(
            os.path.join(args.dir, 'images',
                         name + '_reconstructed_depth.png'),
            reconstructed_depth)
    reconstructed_var = np.concatenate(
        (reconstructed_var, reconstructed_var, reconstructed_var), axis=2)
    montage = np.concatenate((original_image, original_depth,
                              reconstructed_depth, reconstructed_var),
                             axis=1)
    if save_images:
        cv2.imwrite(os.path.join(args.dir, 'images', name + '_montage.png'),
                    montage)
    # calculate rmse
    print('\trmse for {}/{}:'.format(scene, frame),
          rmse(d, reconstructed_depth_y_hat))

    return montage
Beispiel #8
0
#     reconstructed_depth = reconstructed_depth.astype(np.uint8)
#     reconstructed_depth = cv2.applyColorMap(reconstructed_depth, cv2.COLORMAP_JET)
#     if save_images:
#         cv2.imwrite(os.path.join(args.dir, 'images', name + '_reconstructed_depth.png'), reconstructed_depth)
#     reconstructed_var = np.concatenate((reconstructed_var, reconstructed_var, reconstructed_var), axis=2)
#     montage = np.concatenate((original_image, original_depth, reconstructed_depth, reconstructed_var), axis=1)
#     if save_images:
#         cv2.imwrite(os.path.join(args.dir, 'images', name + '_montage.png'), montage)
#     # calculate rmse
#     print('\trmse for {}/{}:'.format(scene, frame), rmse(d, reconstructed_depth_y_hat))
#
#     return montage

if __name__ == '__main__':

    hem.message('Parsing arguments...')
    args = hem.parse_args()

    hem.message('Loading metafile and graph data...')
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    x_ph = tf.placeholder(tf.float32, (512, 3, 65, 65))
    y_ph = tf.placeholder(tf.float32, (512, 1, 65, 65))
    # load graph, but replace input tensors with placeholders for feeding
    checkpoint_num = 50
    saver = tf.train.import_meta_graph(
        os.path.join(args.dir, 'checkpoint-{}.meta'.format(checkpoint_num)),
        input_map={
            "tower_0/input_preprocess/tower_0_x:0": x_ph,
            "tower_0/input_preprocess/tower_0_y:0": y_ph
        })