Beispiel #1
0
def train(H, test_images):
    '''
    Setup computation graph, run 2 prefetch data threads, and then run the main loop
    '''

    if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir'])

    ckpt_file = H['save_dir'] + '/save.ckpt'
    with open(H['save_dir'] + '/hypes.json', 'w') as f:
        json.dump(H, f, indent=4)

    x_in = tf.placeholder(tf.float32)
    confs_in = tf.placeholder(tf.float32)
    boxes_in = tf.placeholder(tf.float32)
    q = {}
    enqueue_op = {}
    for phase in ['train', 'test']:
        dtypes = [tf.float32, tf.float32, tf.float32]
        grid_size = H['grid_width'] * H['grid_height']
        shapes = (
            [H['image_height'], H['image_width'], 3],
            [grid_size, H['rnn_len'], H['num_classes']],
            [grid_size, H['rnn_len'], 4],
        )
        q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes)
        enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in))

    def make_feed(d):
        return {
            x_in: d['image'],
            confs_in: d['confs'],
            boxes_in: d['boxes'],
            learning_rate: H['solver']['learning_rate']
        }

    def thread_loop(sess, enqueue_op, phase, gen):
        for d in gen:
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))

    (config, loss, accuracy, summary_op, train_op, smooth_op, global_step,
     learning_rate) = build(H, q)

    saver = tf.train.Saver(max_to_keep=None)
    writer = tf.summary.FileWriter(logdir=H['save_dir'], flush_secs=10)

    with tf.Session(config=config) as sess:
        tf.train.start_queue_runners(sess=sess)
        for phase in ['train', 'test']:
            # enqueue once manually to avoid thread start delay
            gen = train_utils.load_data_gen(H,
                                            phase,
                                            jitter=H['solver']['use_jitter'])
            d = gen.next()
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))
            t = threading.Thread(target=thread_loop,
                                 args=(sess, enqueue_op, phase, gen))
            t.daemon = True
            t.start()

        tf.set_random_seed(H['solver']['rnd_seed'])
        sess.run(tf.global_variables_initializer())
        writer.add_graph(sess.graph)
        weights_str = H['solver']['weights']
        if len(weights_str) > 0:
            print('Restoring from: %s' % weights_str)
            saver.restore(sess, weights_str)
        else:
            init_fn = slim.assign_from_checkpoint_fn(
                '%s/data/%s' %
                (os.path.dirname(os.path.realpath(__file__)), H['slim_ckpt']),
                [
                    x for x in tf.global_variables()
                    if x.name.startswith(H['slim_basename'])
                    and H['solver']['opt'] not in x.name
                ])
            #init_fn = slim.assign_from_checkpoint_fn(
            #'%s/data/inception_v1.ckpt' % os.path.dirname(os.path.realpath(__file__)),
            #[x for x in tf.global_variables() if x.name.startswith('InceptionV1') and not H['solver']['opt'] in x.name])
            init_fn(sess)

        # train model for N iterations
        start = time.time()
        max_iter = H['solver'].get('max_iter', 800000)
        for i in xrange(max_iter):
            display_iter = H['logging']['display_iter']
            adjusted_lr = (
                H['solver']['learning_rate'] *
                0.5**max(0, (i / H['solver']['learning_rate_step']) - 2))
            lr_feed = {learning_rate: adjusted_lr}

            if i % display_iter != 0:
                # train network
                batch_loss_train, _ = sess.run([loss['train'], train_op],
                                               feed_dict=lr_feed)
            else:
                # test network every N iterations; log additional info
                if i > 0:
                    dt = (time.time() - start) / (H['batch_size'] *
                                                  display_iter)
                start = time.time()
                (train_loss, test_accuracy, summary_str, _,
                 _) = sess.run([
                     loss['train'],
                     accuracy['test'],
                     summary_op,
                     train_op,
                     smooth_op,
                 ],
                               feed_dict=lr_feed)
                writer.add_summary(summary_str, global_step=global_step.eval())
                print_str = string.join([
                    'Step: %d', 'lr: %f', 'Train Loss: %.2f',
                    'Softmax Test Accuracy: %.1f%%', 'Time/image (ms): %.1f'
                ], ', ')
                print(print_str % (i, adjusted_lr, train_loss, test_accuracy *
                                   100, dt * 1000 if i > 0 else 0))

            if global_step.eval() % H['logging'][
                    'save_iter'] == 0 or global_step.eval() == max_iter - 1:
                saver.save(sess, ckpt_file, global_step=global_step)
Beispiel #2
0
def train(H, test_images):
    '''
    Setup computation graph, run 2 prefetch data threads, and then run the main loop
    '''

    if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir'])

    ckpt_file = H['save_dir'] + '/save.ckpt'
    with open(H['save_dir'] + '/hypes.json', 'w') as f:
        json.dump(H, f, indent=4)

    x_in = tf.placeholder(tf.float32)
    confs_in = tf.placeholder(tf.float32)
    boxes_in = tf.placeholder(tf.float32)
    q = {}
    enqueue_op = {}
    for phase in ['train', 'test']:
        dtypes = [tf.float32, tf.float32, tf.float32]
        grid_size = H['grid_width'] * H['grid_height']
        shapes = (
            [H['image_height'], H['image_width'], 3],
            [grid_size, H['rnn_len'], H['num_classes']],
            [grid_size, H['rnn_len'], 4],
            )
        q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes)
        enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in))

    def make_feed(d):
        return {x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'],
                learning_rate: H['solver']['learning_rate']}

    def thread_loop(sess, enqueue_op, phase, gen):
        for d in gen:
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))

    (config, loss, accuracy, summary_op, train_op,
     smooth_op, global_step, learning_rate, encoder_net) = build(H, q)

    saver = tf.train.Saver(max_to_keep=None)
    writer = tf.train.SummaryWriter(
        logdir=H['save_dir'],
        flush_secs=10
    )

    with tf.Session(config=config) as sess:
        tf.train.start_queue_runners(sess=sess)
        for phase in ['train', 'test']:
            # enqueue once manually to avoid thread start delay
            gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter'])
            d = gen.next()
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))
            t = tf.train.threading.Thread(target=thread_loop,
                                          args=(sess, enqueue_op, phase, gen))
            t.daemon = True
            t.start()

        tf.set_random_seed(H['solver']['rnd_seed'])
        sess.run(tf.initialize_all_variables())
        writer.add_graph(sess.graph)
        weights_str = H['solver']['weights']
        if len(weights_str) > 0:
            print('Restoring from: %s' % weights_str)
            saver.restore(sess, weights_str)

        # train model for N iterations
        start = time.time()
        max_iter = H['solver'].get('max_iter', 10000000)
        for i in xrange(max_iter):
            display_iter = H['logging']['display_iter']
            adjusted_lr = (H['solver']['learning_rate'] *
                           0.5 ** max(0, (i / H['solver']['learning_rate_step']) - 2))
            lr_feed = {learning_rate: adjusted_lr}

            if i % display_iter != 0:
                # train network
                batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed)
            else:
                # test network every N iterations; log additional info
                if i > 0:
                    dt = (time.time() - start) / (H['batch_size'] * display_iter)
                start = time.time()
                (train_loss, test_accuracy, summary_str,
                    _, _) = sess.run([loss['train'], accuracy['test'],
                                      summary_op, train_op, smooth_op,
                                     ], feed_dict=lr_feed)
                writer.add_summary(summary_str, global_step=global_step.eval())
                print_str = string.join([
                    'Step: %d',
                    'lr: %f',
                    'Train Loss: %.2f',
                    'Test Accuracy: %.1f%%',
                    'Time/image (ms): %.1f'
                ], ', ')
                print(print_str %
                      (i, adjusted_lr, train_loss,
                       test_accuracy * 100, dt * 1000 if i > 0 else 0))

            if global_step.eval() % H['logging']['save_iter'] == 0 or global_step.eval() == max_iter - 1:
                saver.save(sess, ckpt_file, global_step=global_step)
Beispiel #3
0
def train(H, test_images):
    if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir'])

    ckpt_file = H['save_dir'] + '/save.ckpt'
    with open(H['save_dir'] + '/hypes.json', 'w') as f:
        json.dump(H, f, indent=4)

    x_in = tf.placeholder(tf.float32)
    confs_in = tf.placeholder(tf.float32)
    boxes_in = tf.placeholder(tf.float32)
    q = {}
    enqueue_op = {}
    for phase in ['train', 'test']:
        dtypes = [tf.float32, tf.float32, tf.float32]
        grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
        shapes = (
            [H['arch']['image_height'], H['arch']['image_width'], 3],
            [grid_size, H['arch']['rnn_len'], H['arch']['num_classes']],
            [grid_size, H['arch']['rnn_len'], 4],
        )
        q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes)
        enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in))

    def make_feed(d):
        return {
            x_in: d['image'],
            confs_in: d['confs'],
            boxes_in: d['boxes'],
            learning_rate: H['solver']['learning_rate']
        }

    def MyLoop(sess, enqueue_op, phase, gen):
        for d in gen:
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))

    (config, loss, accuracy, summary_op, train_op, W_norm, test_image,
     test_pred_boxes, test_pred_confidences, test_true_boxes,
     test_true_confidences, smooth_op, global_step,
     learning_rate) = build(H, q)

    saver = tf.train.Saver(max_to_keep=None)
    writer = tf.train.SummaryWriter(logdir=H['save_dir'], flush_secs=10)

    test_image_to_log = tf.placeholder(
        tf.uint8, [H['arch']['image_height'], H['arch']['image_width'], 3])
    log_image_name = tf.placeholder(tf.string)
    log_image = tf.image_summary(log_image_name,
                                 tf.expand_dims(test_image_to_log, 0))

    with tf.Session(config=config) as sess:
        threads = []
        for phase in ['train', 'test']:
            # enqueue once manually to avoid thread start delay
            gen = train_utils.load_data_gen(H,
                                            phase,
                                            jitter=H['solver']['use_jitter'])
            d = next(gen)
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))
            threads.append(
                tf.train.threading.Thread(target=MyLoop,
                                          args=(sess, enqueue_op, phase, gen)))
            threads[-1].start()

        tf.set_random_seed(H['solver']['rnd_seed'])
        sess.run(tf.initialize_all_variables())

        weights_str = H['solver']['weights']
        if len(weights_str) > 0:
            print('Restoring from: %s' % weights_str)
            saver.restore(sess, weights_str)

        # train model for N iterations
        for i in range(10000000):
            display_iter = 10
            adjusted_lr = (
                H['solver']['learning_rate'] *
                0.5**max(0, (i / H['solver']['learning_rate_step']) - 2))
            lr_feed = {learning_rate: adjusted_lr}
            if i % display_iter == 0:
                if i > 0:
                    dt = (time.time() - start) / (H['arch']['batch_size'] *
                                                  display_iter)
                start = time.time()
                (batch_loss_train, test_accuracy, weights_norm, summary_str,
                 np_test_image, np_test_pred_boxes, np_test_pred_confidences,
                 np_test_true_boxes, np_test_true_confidences, _,
                 _) = sess.run([
                     loss['train'],
                     accuracy['test'],
                     W_norm,
                     summary_op,
                     test_image,
                     test_pred_boxes,
                     test_pred_confidences,
                     test_true_boxes,
                     test_true_confidences,
                     train_op,
                     smooth_op,
                 ],
                               feed_dict=lr_feed)
                pred_true = [("%d_pred_output" % (i % 3), np_test_pred_boxes,
                              np_test_pred_confidences),
                             ("%d_true_output" % (i % 3), np_test_true_boxes,
                              np_test_true_confidences)]

                for name, boxes, confidences in pred_true:
                    test_output_to_log = train_utils.add_rectangles(
                        np_test_image, confidences, boxes, H["arch"])[0]
                    assert test_output_to_log.shape == (
                        H['arch']['image_height'], H['arch']['image_width'], 3)
                    feed = {
                        test_image_to_log: test_output_to_log,
                        log_image_name: name
                    }
                    test_image_summary_str = sess.run(log_image,
                                                      feed_dict=feed)
                    writer.add_summary(test_image_summary_str,
                                       global_step=global_step.eval())
                writer.add_summary(summary_str, global_step=global_step.eval())
                print_str = ', '.join([
                    'Step: %d', 'lr: %f', 'Train Loss: %.2f',
                    'Test Accuracy: %.1f%%', 'Time/image (ms): %.1f'
                ])
                print(print_str %
                      (i, adjusted_lr, batch_loss_train, test_accuracy * 100,
                       dt * 1000 if i > 0 else 0))
            else:
                batch_loss_train, _ = sess.run([loss['train'], train_op],
                                               feed_dict=lr_feed)

            if global_step.eval() % 1000 == 0:
                saver.save(sess, ckpt_file, global_step=global_step)
Beispiel #4
0
def train(H, test_images):
    if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir'])

    ckpt_file = H['save_dir'] + '/save.ckpt'
    with open(H['save_dir'] + '/hypes.json', 'w') as f:
        json.dump(H, f, indent=4)

    x_in = tf.placeholder(tf.float32)
    confs_in = tf.placeholder(tf.float32)
    boxes_in = tf.placeholder(tf.float32)
    q = {}
    enqueue_op = {}
    for phase in ['train', 'test']:
        dtypes = [tf.float32, tf.float32, tf.float32]
        grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
        shapes = (
            [H['arch']['image_height'], H['arch']['image_width'], 3],
            [grid_size, H['arch']['rnn_len'], H['arch']['num_classes']],
            [grid_size, H['arch']['rnn_len'], 4],
            )
        q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes)
        enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in))

    def make_feed(d):
        return {x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'],
                learning_rate: H['solver']['learning_rate']}

    def MyLoop(sess, enqueue_op, phase, gen):
        for d in gen:
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))

    (config, loss, accuracy, summary_op, train_op, W_norm,
     test_image, test_pred_boxes, test_pred_confidences,
     test_true_boxes, test_true_confidences,
     smooth_op, global_step, learning_rate) = build(H, q)

    saver = tf.train.Saver(max_to_keep=None)
    writer = tf.train.SummaryWriter(
        logdir=H['save_dir'], 
        flush_secs=10
    )

    test_image_to_log = tf.placeholder(tf.uint8,
                                       [H['arch']['image_height'], H['arch']['image_width'], 3])
    log_image_name = tf.placeholder(tf.string)
    log_image = tf.image_summary(log_image_name, tf.expand_dims(test_image_to_log, 0))

    with tf.Session(config=config) as sess:
        threads = []
        for phase in ['train', 'test']:
            # enqueue once manually to avoid thread start delay
            gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter'])
            d = gen.next()
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))
            threads.append(tf.train.threading.Thread(target=MyLoop,
                                                     args=(sess, enqueue_op, phase, gen)))
            threads[-1].start()

        tf.set_random_seed(H['solver']['rnd_seed'])
        sess.run(tf.initialize_all_variables())

        weights_str = H['solver']['weights']
        if len(weights_str) > 0:
            print('Restoring from: %s' % weights_str)
            saver.restore(sess, weights_str)

        # train model for N iterations
        for i in xrange(10000000):
            display_iter = 10
            adjusted_lr = (H['solver']['learning_rate'] *
                           0.5 ** max(0, (i / H['solver']['learning_rate_step']) - 2))
            lr_feed = {learning_rate: adjusted_lr}
            if i % display_iter == 0:
                if i > 0:
                    dt = (time.time() - start) / (H['arch']['batch_size'] * display_iter)
                start = time.time()
                (batch_loss_train, test_accuracy, weights_norm, 
                    summary_str, np_test_image, np_test_pred_boxes,
                    np_test_pred_confidences, np_test_true_boxes,
                    np_test_true_confidences, _, _) = sess.run([
                         loss['train'], accuracy['test'], W_norm, 
                         summary_op, test_image, test_pred_boxes,
                         test_pred_confidences, test_true_boxes, test_true_confidences,
                         train_op, smooth_op,
                        ], feed_dict=lr_feed)
                pred_true = [("%d_pred_output" % (i % 3), np_test_pred_boxes, np_test_pred_confidences),
                             ("%d_true_output" % (i % 3), np_test_true_boxes, np_test_true_confidences)]

                for name, boxes, confidences in pred_true:
                    test_output_to_log = train_utils.add_rectangles(np_test_image,
                                                                    confidences,
                                                                    boxes,
                                                                    H["arch"])[0]
                    assert test_output_to_log.shape == (H['arch']['image_height'],
                                                        H['arch']['image_width'], 3)
                    feed = {test_image_to_log: test_output_to_log, log_image_name: name}
                    test_image_summary_str = sess.run(log_image, feed_dict=feed)
                    writer.add_summary(test_image_summary_str, global_step=global_step.eval())
                writer.add_summary(summary_str, global_step=global_step.eval())
                print_str = string.join([
                    'Step: %d',
                    'lr: %f',
                    'Train Loss: %.2f',
                    'Test Accuracy: %.1f%%',
                    'Time/image (ms): %.1f'
                ], ', ')
                print(print_str % 
                      (i, adjusted_lr, batch_loss_train,
                       test_accuracy * 100, dt * 1000 if i > 0 else 0))
            else:
                batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed)

            if global_step.eval() % 1000 == 0: 
                saver.save(sess, ckpt_file, global_step=global_step)
Beispiel #5
0
    def train(self, output_model_path):
        train_images = self.train_images
        test_images = self.test_images
        settings = self.settings

        logger.info(
            'Invoking train() with {} training images and {} test images)'.
            format(len(train_images), len(test_images)))

        tf.reset_default_graph()

        start_ts = time.time()

        num_steps = settings['num_steps']

        x_in = tf.placeholder(tf.float32)
        confs_in = tf.placeholder(tf.float32)
        boxes_in = tf.placeholder(tf.float32)
        queue = {}
        enqueue_op = {}
        for phase in ['train', 'test']:
            dtypes = [tf.float32, tf.float32, tf.float32]
            grid_size = settings['grid_width'] * settings['grid_height']
            shapes = (
                [settings['image_height'], settings['image_width'], 3],
                [grid_size, settings['rnn_len'], settings['num_classes']],
                [grid_size, settings['rnn_len'], 4],
            )
            queue[phase] = tf.FIFOQueue(capacity=30,
                                        dtypes=dtypes,
                                        shapes=shapes)
            enqueue_op[phase] = queue[phase].enqueue(
                (x_in, confs_in, boxes_in))

        def make_feed(d):
            return {
                x_in: d['image'],
                confs_in: d['confs'],
                boxes_in: d['boxes'],
                learning_rate: settings['solver']['learning_rate']
            }

        def thread_loop(sess, enqueue_op, phase, gen, stop_event):
            for d in gen:
                if stop_event.is_set():
                    return
                sess.run(enqueue_op[phase], feed_dict=make_feed(d))

        (config, loss, accuracy, train_op, smooth_op, global_step,
         learning_rate) = build(settings, queue)

        saver = tf.train.Saver(max_to_keep=None)

        logger.info('train() initialization took {}s'.format(time.time() -
                                                             start_ts))

        threads = []
        with tf.Session(config=config) as sess:
            for phase in ['train', 'test']:
                # enqueue once manually to avoid thread start delay
                if phase == 'train':
                    gen = train_utils.load_data_gen(
                        settings,
                        train_images,
                        jitter=settings['solver']['use_jitter'])
                if phase == 'test':
                    gen = train_utils.load_data_gen(settings, test_images)
                d = gen.next()
                sess.run(enqueue_op[phase], feed_dict=make_feed(d))
                thread_stop_event = threading.Event()
                thread = threading.Thread(target=thread_loop,
                                          args=(sess, enqueue_op, phase, gen,
                                                thread_stop_event))
                thread.stop_event = thread_stop_event
                threads.append(thread)
                thread.daemon = True
                thread.start()

            tf.set_random_seed(settings['solver']['rnd_seed'])
            sess.run(tf.global_variables_initializer())
            init_fn = slim.assign_from_checkpoint_fn(
                settings['base_model_ckpt'], [
                    x for x in tf.global_variables()
                    if x.name.startswith(settings['base_name'])
                    and settings['solver']['opt'] not in x.name
                ])
            init_fn(sess)

            # train model for N iterations
            start = time.time()
            for i in xrange(num_steps):
                display_iter = settings['logging']['display_iter']
                adjusted_lr = (settings['solver']['learning_rate'] * 0.5**max(
                    0, (i / settings['solver']['learning_rate_step']) - 2))
                lr_feed = {learning_rate: adjusted_lr}

                if i % display_iter != 0:
                    # train network
                    batch_loss_train, _ = sess.run([loss['train'], train_op],
                                                   feed_dict=lr_feed)
                else:
                    # test network every N iterations; log additional info
                    if i > 0:
                        dt = (time.time() - start) / (settings['batch_size'] *
                                                      display_iter)
                    start = time.time()
                    (train_loss, test_accuracy, _,
                     _) = sess.run([
                         loss['train'],
                         accuracy['test'],
                         train_op,
                         smooth_op,
                     ],
                                   feed_dict=lr_feed)
                    print_str = string.join([
                        'Step: %d', 'lr: %f', 'Train Loss: %.2f',
                        'Softmax Test Accuracy: %.1f%%',
                        'Time/image (ms): %.1f'
                    ], ', ')
                    logger.info(print_str %
                                (i, adjusted_lr, train_loss, test_accuracy *
                                 100, dt * 1000 if i > 0 else 0))

            for thread in threads:
                thread.stop_event.set()

            create_empty_model(output_model_path)
            transfer_model_meta(self.scaffold_path, output_model_path)
            save_model_state(sess, output_model_path)

            benchmark_info = {
                'adjusted_lr': float(adjusted_lr),
                'train_loss': float(train_loss),
                'test_accuracy': float(test_accuracy)
            }
            save_model_benchmark_info(output_model_path, benchmark_info)
            return benchmark_info