Ejemplo n.º 1
0
    def generate_tfrecords(self):
        """
        Generate tensorflow records file
        :return:
        """
        def _read_training_example_index_file(_index_file_path):

            assert ops.exists(_index_file_path)

            _example_gt_path_info = []
            _example_gt_binary_path_info = []
            _example_gt_instance_path_info = []

            with open(_index_file_path, 'r') as _file:
                for _line in _file:
                    _example_info = _line.rstrip('\r').rstrip('\n').split(' ')
                    _example_gt_path_info.append(_example_info[0])
                    _example_gt_binary_path_info.append(_example_info[1])
                    _example_gt_instance_path_info.append(_example_info[2])

            ret = {
                'gt_path_info': _example_gt_path_info,
                'gt_binary_path_info': _example_gt_binary_path_info,
                'gt_instance_path_info': _example_gt_instance_path_info
            }

            return ret

        # make save dirs
        os.makedirs(self._tfrecords_save_dir, exist_ok=True)

        # start generating training example tfrecords
        LOG.info('Start generating training example tfrecords')

        # collecting train images paths info
        train_image_paths_info = _read_training_example_index_file(
            self._train_example_index_file_path)
        train_gt_images_paths = train_image_paths_info['gt_path_info']
        train_gt_binary_images_paths = train_image_paths_info[
            'gt_binary_path_info']
        train_gt_instance_images_paths = train_image_paths_info[
            'gt_instance_path_info']
        train_tfrecords_paths = ops.join(self._tfrecords_save_dir,
                                         'tusimple_train.tfrecords')
        tf_io_pipline_tools.write_example_tfrecords(
            train_gt_images_paths, train_gt_binary_images_paths,
            train_gt_instance_images_paths, train_tfrecords_paths)
        LOG.info('Generating training example tfrecords complete')

        # start generating validation example tfrecords
        LOG.info('Start generating validation example tfrecords')

        # collecting validation images paths info
        val_image_paths_info = _read_training_example_index_file(
            self._val_example_index_file_path)
        val_gt_images_paths = val_image_paths_info['gt_path_info']
        val_gt_binary_images_paths = val_image_paths_info[
            'gt_binary_path_info']
        val_gt_instance_images_paths = val_image_paths_info[
            'gt_instance_path_info']
        val_tfrecords_paths = ops.join(self._tfrecords_save_dir,
                                       'tusimple_val.tfrecords')
        tf_io_pipline_tools.write_example_tfrecords(
            val_gt_images_paths, val_gt_binary_images_paths,
            val_gt_instance_images_paths, val_tfrecords_paths)
        LOG.info('Generating validation example tfrecords complete')

        # generate test example tfrecords
        LOG.info('Start generating testing example tfrecords')

        # collecting test images paths info
        test_image_paths_info = _read_training_example_index_file(
            self._test_example_index_file_path)
        test_gt_images_paths = test_image_paths_info['gt_path_info']
        test_gt_binary_images_paths = test_image_paths_info[
            'gt_binary_path_info']
        test_gt_instance_images_paths = test_image_paths_info[
            'gt_instance_path_info']
        test_tfrecords_paths = ops.join(self._tfrecords_save_dir,
                                        'tusimple_test.tfrecords')
        tf_io_pipline_tools.write_example_tfrecords(
            test_gt_images_paths, test_gt_binary_images_paths,
            test_gt_instance_images_paths, test_tfrecords_paths)
        LOG.info('Generating testing example tfrecords complete')

        return
    def generate_tfrecords(self, save_dir, step_size=10000):
        """
        Generate tensorflow records file
        :param save_dir:
        :param step_size: generate a tfrecord every step_size examples
        :return:
        """

        def _read_training_example_index_file(_index_file_path):

            assert ops.exists(_index_file_path)

            _example_gt_path_info = []
            _example_gt_binary_path_info = []
            _example_gt_instance_path_info = []

            with open(_index_file_path, 'r') as _file:
                for _line in _file:
                    _example_info = _line.rstrip('\r').rstrip('\n').split(' ')
                    _example_gt_path_info.append(_example_info[0])
                    _example_gt_binary_path_info.append(_example_info[1])
                    _example_gt_instance_path_info.append(_example_info[2])

            ret = {
                'gt_path_info': _example_gt_path_info,
                'gt_binary_path_info': _example_gt_binary_path_info,
                'gt_instance_path_info': _example_gt_instance_path_info
            }

            return ret

        def _split_writing_tfrecords_task(
                _example_gt_paths, _example_gt_binary_paths, _example_gt_instance_paths, _flags='train'):

            _split_example_gt_paths = []
            _split_example_gt_binary_paths = []
            _split_example_gt_instance_paths = []
            _split_tfrecords_save_paths = []

            for i in range(0, len(_example_gt_paths), step_size):
                _split_example_gt_paths.append(_example_gt_paths[i:i + step_size])
                _split_example_gt_binary_paths.append(_example_gt_binary_paths[i:i + step_size])
                _split_example_gt_instance_paths.append(_example_gt_instance_paths[i:i + step_size])

                if i + step_size > len(_example_gt_paths):
                    _split_tfrecords_save_paths.append(
                        ops.join(save_dir, '{:s}_{:d}_{:d}.tfrecords'.format(_flags, i, len(_example_gt_paths))))
                else:
                    _split_tfrecords_save_paths.append(
                        ops.join(save_dir, '{:s}_{:d}_{:d}.tfrecords'.format(_flags, i, i + step_size)))

            ret = {
                'gt_paths': _split_example_gt_paths,
                'gt_binary_paths': _split_example_gt_binary_paths,
                'gt_instance_paths': _split_example_gt_instance_paths,
                'tfrecords_paths': _split_tfrecords_save_paths
            }

            return ret

        # make save dirs
        os.makedirs(save_dir, exist_ok=True)

        # start generating training example tfrecords
        log.info('Start generating training example tfrecords')

        # collecting train images paths info
        train_image_paths_info = _read_training_example_index_file(self._train_example_index_file_path)
        train_gt_images_paths = train_image_paths_info['gt_path_info']
        train_gt_binary_images_paths = train_image_paths_info['gt_binary_path_info']
        train_gt_instance_images_paths = train_image_paths_info['gt_instance_path_info']

        # split training images according step size
        train_split_result = _split_writing_tfrecords_task(
            train_gt_images_paths, train_gt_binary_images_paths, train_gt_instance_images_paths, _flags='train')
        train_example_gt_paths = train_split_result['gt_paths']
        train_example_gt_binary_paths = train_split_result['gt_binary_paths']
        train_example_gt_instance_paths = train_split_result['gt_instance_paths']
        train_example_tfrecords_paths = train_split_result['tfrecords_paths']

        for index, example_gt_paths in enumerate(train_example_gt_paths):
            tf_io_pipline_tools.write_example_tfrecords(
                example_gt_paths,
                train_example_gt_binary_paths[index],
                train_example_gt_instance_paths[index],
                train_example_tfrecords_paths[index]
            )

        log.info('Generating training example tfrecords complete')

        # start generating validation example tfrecords
        log.info('Start generating validation example tfrecords')

        # collecting validation images paths info
        val_image_paths_info = _read_training_example_index_file(self._val_example_index_file_path)
        val_gt_images_paths = val_image_paths_info['gt_path_info']
        val_gt_binary_images_paths = val_image_paths_info['gt_binary_path_info']
        val_gt_instance_images_paths = val_image_paths_info['gt_instance_path_info']

        # split validation images according step size
        val_split_result = _split_writing_tfrecords_task(
            val_gt_images_paths, val_gt_binary_images_paths, val_gt_instance_images_paths, _flags='val')
        val_example_gt_paths = val_split_result['gt_paths']
        val_example_gt_binary_paths = val_split_result['gt_binary_paths']
        val_example_gt_instance_paths = val_split_result['gt_instance_paths']
        val_example_tfrecords_paths = val_split_result['tfrecords_paths']

        for index, example_gt_paths in enumerate(val_example_gt_paths):
            tf_io_pipline_tools.write_example_tfrecords(
                example_gt_paths,
                val_example_gt_binary_paths[index],
                val_example_gt_instance_paths[index],
                val_example_tfrecords_paths[index]
            )

        log.info('Generating validation example tfrecords complete')

        # generate test example tfrecords
        log.info('Start generating testing example tfrecords')

        # collecting test images paths info
        test_image_paths_info = _read_training_example_index_file(self._test_example_index_file_path)
        test_gt_images_paths = test_image_paths_info['gt_path_info']
        test_gt_binary_images_paths = test_image_paths_info['gt_binary_path_info']
        test_gt_instance_images_paths = test_image_paths_info['gt_instance_path_info']

        # split validating images according step size
        test_split_result = _split_writing_tfrecords_task(
            test_gt_images_paths, test_gt_binary_images_paths, test_gt_instance_images_paths, _flags='test')
        test_example_gt_paths = test_split_result['gt_paths']
        test_example_gt_binary_paths = test_split_result['gt_binary_paths']
        test_example_gt_instance_paths = test_split_result['gt_instance_paths']
        test_example_tfrecords_paths = test_split_result['tfrecords_paths']

        for index, example_gt_paths in enumerate(test_example_gt_paths):
            tf_io_pipline_tools.write_example_tfrecords(
                example_gt_paths,
                test_example_gt_binary_paths[index],
                test_example_gt_instance_paths[index],
                test_example_tfrecords_paths[index]
            )

        log.info('Generating testing example tfrecords complete')

        return
    def generate_tfrecords(self, save_dir, step_size=10000):
        """
        Generate tensorflow records file
        :param save_dir:
        :param step_size: generate a tfrecord every step_size examples
        :return:
        """
        def _read_training_example_index_file(_index_file_path):

            assert ops.exists(_index_file_path)

            _example_rain_path_info = []
            _example_clean_path_info = []

            with open(_index_file_path, 'r') as _file:
                for _line in _file:
                    _example_info = _line.rstrip('\r').rstrip('\n').split(' ')
                    _example_rain_path_info.append(_example_info[0])
                    _example_clean_path_info.append(_example_info[1])

            return _example_rain_path_info, _example_clean_path_info

        def _split_writing_tfrecords_task(_example_paths,
                                          _example_labels,
                                          _flags='train'):

            _split_example_paths = []
            _split_example_labels = []
            _split_tfrecords_save_paths = []

            for i in range(0, len(_example_paths), step_size):
                _split_example_paths.append(_example_paths[i:i + step_size])
                _split_example_labels.append(_example_labels[i:i + step_size])

                if i + step_size > len(_example_paths):
                    _split_tfrecords_save_paths.append(
                        ops.join(
                            save_dir, '{:s}_{:d}_{:d}.tfrecords'.format(
                                _flags, i, len(_example_paths))))
                else:
                    _split_tfrecords_save_paths.append(
                        ops.join(
                            save_dir, '{:s}_{:d}_{:d}.tfrecords'.format(
                                _flags, i, i + step_size)))

            return _split_example_paths, _split_example_labels, _split_tfrecords_save_paths

        # make save dirs
        os.makedirs(save_dir, exist_ok=True)

        # generate training example tfrecords
        log.info('Generating training example tfrecords')

        train_rain_images_paths, train_clean_images_paths = _read_training_example_index_file(
            self._train_example_index_file_path)
        train_rain_images_paths_split, train_clean_images_paths_split, train_tfrecords_save_paths = \
            _split_writing_tfrecords_task(train_rain_images_paths, train_clean_images_paths, _flags='train')

        for index, example_paths in enumerate(train_rain_images_paths_split):
            tf_io_pipline_tools.write_example_tfrecords(
                example_paths, train_clean_images_paths_split[index],
                train_tfrecords_save_paths[index])

        log.info('Generate training example tfrecords complete')

        # generate val example tfrecords
        log.info('Generating validation example tfrecords')

        val_rain_images_paths, val_clean_images_paths = _read_training_example_index_file(
            self._val_example_index_file_path)
        val_rain_images_paths_split, val_clean_images_paths_split, val_tfrecords_save_paths = \
            _split_writing_tfrecords_task(val_rain_images_paths, val_clean_images_paths, _flags='val')

        for index, example_paths in enumerate(val_rain_images_paths_split):
            tf_io_pipline_tools.write_example_tfrecords(
                example_paths, val_clean_images_paths_split[index],
                val_tfrecords_save_paths[index])

        log.info('Generate validation example tfrecords complete')

        # generate test example tfrecords
        log.info('Generating testing example tfrecords')

        test_rain_images_paths, test_clean_images_paths = _read_training_example_index_file(
            self._test_example_index_file_path)
        test_rain_images_paths_split, test_clean_images_paths_split, test_tfrecords_save_paths = \
            _split_writing_tfrecords_task(test_rain_images_paths, test_clean_images_paths, _flags='test')

        for index, example_paths in enumerate(test_rain_images_paths_split):
            tf_io_pipline_tools.write_example_tfrecords(
                example_paths, test_clean_images_paths_split[index],
                test_tfrecords_save_paths[index])

        log.info('Generate testing example tfrecords complete')

        return
    def generate_tfrecords(self, save_dir, step_size=10000):
        """
        Generate tensorflow records file
        :param save_dir:
        :param step_size: generate a tfrecord every step_size examples
        :return:
        """
        def _read_training_example_index_file(_index_file_path):
            """
            从train.txt val.txt test.txt中读取数据,将其中的数据保存在dict中
            :param _index_file_path: train.txt或val.txt或test.txt
            :return: ret:一个字典,包含‘gt_path_info’, 'gt_binary_paht_info'的字典
            """

            assert ops.exists(_index_file_path)

            _example_gt_path_info = []
            _example_gt_binary_path_info = []

            with open(_index_file_path, 'r') as _file:
                for _line in _file:
                    _example_info = _line.rstrip('\r').rstrip('\n').split(' ')
                    _example_gt_path_info.append(_example_info[0])
                    _example_gt_binary_path_info.append(_example_info[1])

            ret = {
                'gt_path_info': _example_gt_path_info,
                'gt_binary_path_info': _example_gt_binary_path_info,
            }

            return ret

        def _split_writing_tfrecords_task(_example_gt_paths,
                                          _example_gt_binary_paths,
                                          _flags='train'):
            """
            为了防止tf.record文件过大,内存溢出所做的操作;将各部分数据信息分成若干个相同大小的部分
            :param _example_gt_paths: list,images下图片的地址
            :param _example_gt_binary_paths: list, gt_image_binary下标签图片的地址
            :param _flags: 'train'、'val'、'test'
            :return:
            """

            _split_example_gt_paths = []
            _split_example_gt_binary_paths = []
            _split_tfrecords_save_paths = []

            for i in range(0, len(_example_gt_paths), step_size):
                _split_example_gt_paths.append(_example_gt_paths[i:i +
                                                                 step_size])
                _split_example_gt_binary_paths.append(
                    _example_gt_binary_paths[i:i + step_size])

                if i + step_size > len(_example_gt_paths):
                    _split_tfrecords_save_paths.append(
                        ops.join(
                            save_dir, '{:s}_{:d}_{:d}.tfrecords'.format(
                                _flags, i, len(_example_gt_paths))))
                else:
                    _split_tfrecords_save_paths.append(
                        ops.join(
                            save_dir, '{:s}_{:d}_{:d}.tfrecords'.format(
                                _flags, i, i + step_size)))

            ret = {
                'gt_paths': _split_example_gt_paths,
                'gt_binary_paths': _split_example_gt_binary_paths,
                'tfrecords_paths': _split_tfrecords_save_paths
            }

            return ret

        os.makedirs(save_dir, exist_ok=True)

        log.info('Start generating training example tfrecords')

        train_image_paths_info = _read_training_example_index_file(
            self._train_example_index_file_path)
        train_gt_images_paths = train_image_paths_info['gt_path_info']
        train_gt_binary_images_paths = train_image_paths_info[
            'gt_binary_path_info']

        train_split_result = _split_writing_tfrecords_task(
            train_gt_images_paths,
            train_gt_binary_images_paths,
            _flags='train')
        train_example_gt_paths = train_split_result['gt_paths']
        train_example_gt_binary_paths = train_split_result['gt_binary_paths']
        train_example_tfrecords_paths = train_split_result['tfrecords_paths']

        for index, example_gt_paths in enumerate(train_example_gt_paths):
            tf_io_pipline_tools.write_example_tfrecords(
                example_gt_paths, train_example_gt_binary_paths[index],
                train_example_tfrecords_paths[index])

        log.info('Generating training example tfrecords complete')

        log.info('Start generating validation example tfrecords')

        val_image_paths_info = _read_training_example_index_file(
            self._val_example_index_file_path)
        val_gt_images_paths = val_image_paths_info['gt_path_info']
        val_gt_binary_images_paths = val_image_paths_info[
            'gt_binary_path_info']

        val_split_result = _split_writing_tfrecords_task(
            val_gt_images_paths, val_gt_binary_images_paths, _flags='val')
        val_example_gt_paths = val_split_result['gt_paths']
        val_example_gt_binary_paths = val_split_result['gt_binary_paths']
        val_example_tfrecords_paths = val_split_result['tfrecords_paths']

        for index, example_gt_paths in enumerate(val_example_gt_paths):
            tf_io_pipline_tools.write_example_tfrecords(
                example_gt_paths, val_example_gt_binary_paths[index],
                val_example_tfrecords_paths[index])

        log.info('Generating validation example tfrecords complete')

        log.info('Start generating testing example tfrecords')

        test_image_paths_info = _read_training_example_index_file(
            self._test_example_index_file_path)
        test_gt_images_paths = test_image_paths_info['gt_path_info']
        test_gt_binary_images_paths = test_image_paths_info[
            'gt_binary_path_info']

        test_split_result = _split_writing_tfrecords_task(
            test_gt_images_paths, test_gt_binary_images_paths, _flags='test')
        test_example_gt_paths = test_split_result['gt_paths']
        test_example_gt_binary_paths = test_split_result['gt_binary_paths']
        test_example_tfrecords_paths = test_split_result['tfrecords_paths']

        for index, example_gt_paths in enumerate(test_example_gt_paths):
            tf_io_pipline_tools.write_example_tfrecords(
                example_gt_paths, test_example_gt_binary_paths[index],
                test_example_tfrecords_paths[index])

        log.info('Generating testing example tfrecords complete')

        return