Ejemplo n.º 1
0
    def test_process_image_and_label(self):
        """Test DataPreprocessor.process_image_and_label.
        """
        dataset = tf.data.Dataset.from_tensor_slices({
            'image':
            tf.constant(np.zeros((3, 3, 3))),
            'label':
            tf.constant(np.zeros((3, 1)))
        })

        dut = DataPreprocessor(dataset)

        # Lambda function with the external parameter @p a.
        dut.process_image_and_label(lambda image, label, a:
                                    (image - a - 1, label + a + 1),
                                    a=1)

        batch = dut.dataset.make_one_shot_iterator().get_next()

        image_equal_op = tf.equal(batch['image'],
                                  tf.constant(-np.ones((3, 3)) * 2))
        label_equal_op = tf.equal(batch['label'],
                                  tf.constant(np.ones((1, )) * 2))

        with self.test_session() as sess:
            for _ in range(3):
                for op in sess.run((image_equal_op, label_equal_op)):
                    self.assertTrue(np.all(op))
Ejemplo n.º 2
0
def prepare_dataset() -> None:
    """Download dataset and split into training/dev/test data for experiments
    
    :return: None
    :rtype: None
    """
    logger = logging.getLogger(__name__)
    logger.info('Running {}'.format(prepare_dataset.__name__))

    cfg = ConfigLoader.load_config()
    random_seed = cfg['preprocessing']['random_seed']
    RandUtils.set_random_seed(random_seed)

    logger.info('Downloading dataset and preparing splits')
    dp = DataPreprocessor(cfg['paths']['data_dir'])
    dataset_path = dp.download_dataset()
    dp.split_dataset(dataset_path)
    logger.info('Done')
Ejemplo n.º 3
0
    resume_fullpath = None
    if options['resume_path']:
        resume_fullpath = os.path.expanduser(options['resume_path'])
    finetune_from = None
    if options['finetune_from']:
        finetune_from = options['finetune_from']

    # Data part should live in cpu.
    with tf.device('/cpu:0'):
        # Read from tfrecord format data made by src.data.tfrecord.TFRecordWriter.
        train_dataset = read_tfrecord(
            os.path.join(options['tfdata_dir'], 'train_*.tfrecord'))
        train_data_processor = DataPreprocessor(
            dataset=train_dataset,
            num_parallel_calls=options['num_parallel_calls'],
            batch_size=options['batch_size'],
            shuffle_buffer_size=options['shuffle_buffer_size'],
            prefetch_buffer_size=options['prefetch_buffer_size'])

        val_dataset = read_tfrecord(
            os.path.join(options['tfdata_dir'], 'val_*.tfrecord'))
        val_data_processor = DataPreprocessor(
            dataset=val_dataset,
            num_parallel_calls=options['num_parallel_calls'],
            batch_size=1,
            shuffle_buffer_size=1,
            prefetch_buffer_size=1)

        # Pre-process training data.
        # Add more pre-procesing blocks.
        logging.info('Preprocess train data')
Ejemplo n.º 4
0
    if not os.path.isdir(save_dir_fullpath):
        raise AttributeError('--save_dir should be existing directory path.')
    save_dir_fullpath = os.path.join(save_dir_fullpath, str(datetime.now()))
    os.makedirs(save_dir_fullpath)
    logging.info('Created save directory to {}'.format(save_dir_fullpath))

    ckpt_fullpath = os.path.expanduser(options['ckpt_path'])

    # Data part should live in cpu.
    with tf.device('/cpu:0'):
        # Read from tfrecord format data made by src.data.tfrecord.TFRecordWriter.
        test_dataset = read_tfrecord(
            os.path.join(options['tfdata_dir'], 'test_*.tfrecord'))
        test_data_processor = DataPreprocessor(
            dataset=test_dataset,
            num_parallel_calls=options['num_parallel_calls'],
            batch_size=options['batch_size'],
            shuffle_buffer_size=None,
            prefetch_buffer_size=1)

        # Pre-process test data.
        # Add more pre-procesing blocks.
        logging.info('Preprocess test data')
        if options['test_resized_height'] is not None and options[
                'test_resized_width'] is not None:
            logging.info('Resize image to ({}, {})'.format(
                options['test_resized_height'], options['test_resized_width']))
            test_data_processor.process_image_and_label(
                resize_image_and_label,
                size=tf.constant((options['test_resized_height'],
                                  options['test_resized_width'])))
Ejemplo n.º 5
0
def main():
    input_params = args_parser().parse_args()
    input_params = build_preprocessor_params(input_params)
    input_params = DataPreprocessorConfig(*input_params)
    DataPreprocessor(input_params).preprocess()