Exemple #1
0
 def _get_processed_image(img_store):
     if isinstance(img_store, str):
         res_single = read_image_and_resize(
             get_abs_path(src_path, img_store))
     else:
         res_single = read_image_and_resize(img_store)
     return res_single if res_single is not None else default_image
Exemple #2
0
 def _get_processed_image(img_store):
     if isinstance(img_store, str):
         return read_image_and_resize(
             get_abs_path(src_path, img_store)
         )
     else:
         return read_image_and_resize(img_store)
Exemple #3
0
def is_image(src_path: str, img_entry: Union[bytes, str], column: str) -> bool:
    if not isinstance(img_entry, str):
        return False
    try:
        import imghdr

        path = get_abs_path(src_path, img_entry)
        bytes_obj = get_bytes_obj_from_path(path)
        if isinstance(bytes_obj, bytes):
            return imghdr.what(None, bytes_obj) is not None
        return imghdr.what(bytes_obj) is not None
    except Exception as e:
        logger.warning(
            f"While assessing potential image in is_image() for column {column}, encountered exception: {e}"
        )
        return False
Exemple #4
0
def get_image_from_path(src_path, img_entry, ret_bytes=False):
    """
    skimage.io.imread() can read filenames or urls
    imghdr.what() can read filenames or bytes
    """
    if not isinstance(img_entry, str):
        return img_entry
    if is_http(img_entry):
        if ret_bytes:
            return get_image_from_http_bytes(img_entry)
        return img_entry
    if src_path or os.path.isabs(img_entry):
        return get_abs_path(src_path, img_entry)
    with open_file(img_entry, 'rb') as f:
        if ret_bytes:
            return f.read()
        return f
Exemple #5
0
def get_image_from_path(
        src_path: Union[str, torch.Tensor],
        img_entry: Union[str, bytes],
        ret_bytes: bool = False
) -> Union[BytesIO, BinaryIO, TextIO, bytes, str]:
    if not isinstance(img_entry, str):
        return img_entry
    if is_http(img_entry):
        if ret_bytes:
            # Returns BytesIO.
            return get_image_from_http_bytes(img_entry)
        return img_entry
    if src_path or os.path.isabs(img_entry):
        return get_abs_path(src_path, img_entry)
    with open_file(img_entry, "rb") as f:
        if ret_bytes:
            return f.read()
        return f
Exemple #6
0
    def add_feature_data(feature, dataset_df, dataset, metadata,
                         preprocessing_parameters):
        set_default_value(feature['preprocessing'], 'in_memory',
                          preprocessing_parameters['in_memory'])

        if not 'audio_feature' in preprocessing_parameters:
            raise ValueError(
                'audio_feature dictionary has to be present in preprocessing '
                'for audio.')
        if not TYPE in preprocessing_parameters['audio_feature']:
            raise ValueError(
                'type has to be present in audio_feature dictionary '
                'for audio.')

        src_path = None
        # this is not super nice, but works both and DFs and lists
        first_path = '.'
        for first_path in dataset_df[feature[NAME]]:
            break
        if hasattr(dataset_df, 'src'):
            src_path = os.path.dirname(os.path.abspath(dataset_df.src))
        if src_path is None and not os.path.isabs(first_path):
            raise ValueError('Audio file paths must be absolute')

        num_audio_utterances = len(dataset_df)
        padding_value = preprocessing_parameters['padding_value']
        normalization_type = preprocessing_parameters['norm']
        feature_name = feature[NAME]

        feature_dim = metadata[feature_name]['feature_dim']
        max_length = metadata[feature_name]['max_length']
        audio_feature_dict = preprocessing_parameters['audio_feature']
        audio_file_length_limit_in_s = preprocessing_parameters[
            'audio_file_length_limit_in_s']

        if num_audio_utterances == 0:
            raise ValueError(
                'There are no audio files in the dataset provided.')
        audio_stats = {
            'count': 0,
            'mean': 0,
            'var': 0,
            'std': 0,
            'max': 0,
            'min': float('inf'),
            'cropped': 0,
            'max_length_in_s': audio_file_length_limit_in_s
        }

        if feature['preprocessing']['in_memory']:
            dataset[feature[NAME]] = np.empty(
                (num_audio_utterances, max_length, feature_dim),
                dtype=np.float32)
            for i, path in enumerate(dataset_df[feature[NAME]]):
                filepath = get_abs_path(src_path, path)
                audio_feature = AudioFeatureMixin._read_audio_and_transform_to_feature(
                    filepath, audio_feature_dict, feature_dim, max_length,
                    padding_value, normalization_type, audio_stats)

                dataset[feature[NAME]][i, :, :] = audio_feature

            audio_stats['std'] = np.sqrt(audio_stats['var'] /
                                         float(audio_stats['count']))
            print_statistics = ("{} audio files loaded.\n"
                                "Statistics of audio file lengths:\n"
                                "- mean: {:.4f}\n"
                                "- std: {:.4f}\n"
                                "- max: {:.4f}\n"
                                "- min: {:.4f}\n"
                                "- cropped audio_files: {}\n"
                                "Max length was given as {}s").format(
                                    audio_stats['count'], audio_stats['mean'],
                                    audio_stats['std'], audio_stats['max'],
                                    audio_stats['min'], audio_stats['cropped'],
                                    audio_stats['max_length_in_s'])
            logger.debug(print_statistics)
Exemple #7
0
    def add_feature_data(feature, input_df, proc_df, metadata,
                         preprocessing_parameters, backend,
                         skip_save_processed_input):
        in_memory = preprocessing_parameters['in_memory']
        if PREPROCESSING in feature and 'in_memory' in feature[PREPROCESSING]:
            in_memory = feature[PREPROCESSING]['in_memory']

        num_processes = preprocessing_parameters['num_processes']
        if PREPROCESSING in feature and 'num_processes' in feature[
                PREPROCESSING]:
            num_processes = feature[PREPROCESSING]['num_processes']

        src_path = None
        if SRC in metadata:
            src_path = os.path.dirname(os.path.abspath(metadata.get(SRC)))

        num_images = len(input_df[feature[COLUMN]])
        if num_images == 0:
            raise ValueError('There are no images in the dataset provided.')

        first_img_entry = next(iter(input_df[feature[COLUMN]]))
        logger.debug('Detected image feature type is {}'.format(
            type(first_img_entry)))

        if not isinstance(first_img_entry, str) \
                and not isinstance(first_img_entry, np.ndarray):
            raise ValueError(
                'Invalid image feature data type.  Detected type is {}, '
                'expect either string for file path or numpy array.'.format(
                    type(first_img_entry)))

        first_img_entry = get_image_from_path(src_path, first_img_entry)

        (should_resize, width, height, num_channels,
         user_specified_num_channels,
         first_image) = ImageFeatureMixin._finalize_preprocessing_parameters(
             preprocessing_parameters, first_img_entry, src_path,
             input_df[feature[COLUMN]])

        metadata[feature[NAME]][PREPROCESSING]['height'] = height
        metadata[feature[NAME]][PREPROCESSING]['width'] = width
        metadata[feature[NAME]][PREPROCESSING]['num_channels'] = num_channels

        read_image_and_resize = partial(
            ImageFeatureMixin._read_image_and_resize,
            img_width=width,
            img_height=height,
            should_resize=should_resize,
            num_channels=num_channels,
            resize_method=preprocessing_parameters['resize_method'],
            user_specified_num_channels=user_specified_num_channels)

        # TODO: alternatively use get_average_image() for unreachable images
        default_image = get_gray_default_image(height, width, num_channels)

        # check to see if the active backend can support lazy loading of
        # image features from the hdf5 cache.
        backend.check_lazy_load_supported(feature)

        if in_memory or skip_save_processed_input:
            # Number of processes to run in parallel for preprocessing
            metadata[
                feature[NAME]][PREPROCESSING]['num_processes'] = num_processes
            metadata[feature[NAME]]['reshape'] = (height, width, num_channels)

            # Split the dataset into pools only if we have an explicit request to use
            # multiple processes. In case we have multiple input images use the
            # standard code anyway.
            if backend.supports_multiprocessing and (num_processes > 1
                                                     or num_images > 1):
                all_img_entries = [
                    get_abs_path(src_path, img_entry) if isinstance(
                        img_entry, str) else img_entry
                    for img_entry in input_df[feature[COLUMN]]
                ]

                with Pool(num_processes) as pool:
                    logger.debug(
                        'Using {} processes for preprocessing images'.format(
                            num_processes))
                    res = pool.map(read_image_and_resize, all_img_entries)
                    proc_df[feature[PROC_COLUMN]] = [
                        x if x is not None else default_image for x in res
                    ]
            else:
                # If we're not running multiple processes and we are only processing one
                # image just use this faster shortcut, bypassing multiprocessing.Pool.map
                logger.debug(
                    'No process pool initialized. Using internal process for preprocessing images'
                )

                # helper function for handling single image
                def _get_processed_image(img_store):
                    if isinstance(img_store, str):
                        res_single = read_image_and_resize(
                            get_abs_path(src_path, img_store))
                    else:
                        res_single = read_image_and_resize(img_store)
                    return res_single if res_single is not None else default_image

                proc_df[feature[PROC_COLUMN]] = backend.df_engine.map_objects(
                    input_df[feature[COLUMN]], _get_processed_image)
        else:

            all_img_entries = [
                get_abs_path(src_path, img_entry) if isinstance(
                    img_entry, str) else img_entry
                for img_entry in input_df[feature[COLUMN]]
            ]

            data_fp = backend.cache.get_cache_path(metadata.get(SRC),
                                                   metadata.get(CHECKSUM),
                                                   TRAINING)
            with upload_h5(data_fp) as h5_file:
                # todo future add multiprocessing/multithreading
                image_dataset = h5_file.create_dataset(
                    feature[PROC_COLUMN] + '_data',
                    (num_images, height, width, num_channels),
                    dtype=np.uint8)
                for i, img_entry in enumerate(all_img_entries):
                    res = read_image_and_resize(img_entry)
                    image_dataset[
                        i, :height, :
                        width, :] = res if res is not None else default_image
                h5_file.flush()

            proc_df[feature[PROC_COLUMN]] = np.arange(num_images)
        return proc_df
Exemple #8
0
 def read_audio(path):
     filepath = get_abs_path(src_path, path)
     return soundfile.read(filepath)
    def add_feature_data(feature, dataset_df, data, metadata,
                         preprocessing_parameters):
        set_default_value(feature['preprocessing'], 'in_memory',
                          preprocessing_parameters['in_memory'])
        set_default_value(feature['preprocessing'], 'num_processes',
                          preprocessing_parameters['num_processes'])
        csv_path = None
        if hasattr(dataset_df, 'csv'):
            csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))

        num_images = len(dataset_df)
        if num_images == 0:
            raise ValueError('There are no images in the dataset provided.')

        first_image_path = dataset_df[feature['name']][0]
        if csv_path is None and not os.path.isabs(first_image_path):
            raise ValueError('Image file paths must be absolute')

        first_image_path = get_abs_path(csv_path, first_image_path)

        (should_resize, width, height, num_channels,
         user_specified_num_channels,
         first_image) = ImageBaseFeature._finalize_preprocessing_parameters(
             preprocessing_parameters, first_image_path)

        metadata[feature['name']]['preprocessing']['height'] = height
        metadata[feature['name']]['preprocessing']['width'] = width
        metadata[
            feature['name']]['preprocessing']['num_channels'] = num_channels

        read_image_and_resize = partial(
            ImageBaseFeature._read_image_and_resize,
            img_width=width,
            img_height=height,
            should_resize=should_resize,
            num_channels=num_channels,
            resize_method=preprocessing_parameters['resize_method'],
            user_specified_num_channels=user_specified_num_channels)
        all_file_paths = [
            get_abs_path(csv_path, file_path)
            for file_path in dataset_df[feature['name']]
        ]

        if feature['preprocessing']['in_memory']:
            # Number of processes to run in parallel for preprocessing
            num_processes = feature['preprocessing']['num_processes']
            metadata[feature['name']]['preprocessing'][
                'num_processes'] = num_processes

            data[feature['name']] = np.empty(
                (num_images, height, width, num_channels), dtype=np.uint8)
            # Split the dataset into pools only if we have an explicit request to use
            # multiple processes. In case we have multiple input images use the
            # standard code anyway.
            if num_processes > 1 or num_images > 1:
                with Pool(num_processes) as pool:
                    logger.warning(
                        'Using {} processes for preprocessing images'.format(
                            num_processes))
                    data[feature['name']] = np.array(
                        pool.map(read_image_and_resize, all_file_paths))
            # If we're not running multiple processes and we are only processing one
            # image just use this faster shortcut, bypassing multiprocessing.Pool.map
            else:
                logger.warning(
                    'No process pool initialized. Using one process for preprocessing images'
                )
                img = read_image_and_resize(all_file_paths[0])
                data[feature['name']] = np.array([img])
        else:
            data_fp = os.path.splitext(dataset_df.csv)[0] + '.hdf5'
            mode = 'w'
            if os.path.isfile(data_fp):
                mode = 'r+'

            with h5py.File(data_fp, mode) as h5_file:
                # TODO add multiprocessing/multithreading
                image_dataset = h5_file.create_dataset(
                    feature['name'] + '_data',
                    (num_images, height, width, num_channels),
                    dtype=np.uint8)
                for i, filepath in enumerate(all_file_paths):
                    image_dataset[i, :height, :width, :] = (
                        read_image_and_resize(filepath))

            data[feature['name']] = np.arange(num_images)
Exemple #10
0
def test_get_abs_path():
    assert get_abs_path("a", "b.jpg") == "a/b.jpg"
    assert get_abs_path(None, "b.jpg") == "b.jpg"
Exemple #11
0
) -> Union[BinaryIO, TextIO, bytes]:
=======
=======
    src_path: Union[str, torch.Tensor], img_entry: Union[str, bytes], ret_bytes: bool = False
>>>>>>> upstream/master
) -> Union[BytesIO, BinaryIO, TextIO, bytes, str]:
>>>>>>> upstream/master
    if not isinstance(img_entry, str):
        return img_entry
    if is_http(img_entry):
        if ret_bytes:
            # Returns BytesIO.
            return get_image_from_http_bytes(img_entry)
        return img_entry
    if src_path or os.path.isabs(img_entry):
        return get_abs_path(src_path, img_entry)
    with open_file(img_entry, "rb") as f:
        if ret_bytes:
            return f.read()
        return f


def is_image(src_path: str, img_entry: Union[bytes, str]) -> bool:
    if not isinstance(img_entry, str):
        return False
    try:
        import imghdr

        img = get_image_from_path(src_path, img_entry, True)
        if isinstance(img, bytes):
            return imghdr.what(None, img) is not None
Exemple #12
0
    def add_feature_data(feature_config, input_df, proc_df, metadata,
                         preprocessing_parameters, backend,
                         skip_save_processed_input):
        set_default_value(feature_config["preprocessing"], "in_memory",
                          preprocessing_parameters["in_memory"])

        name = feature_config[NAME]
        column = input_df[feature_config[COLUMN]]

        num_audio_files = len(column)
        if num_audio_files == 0:
            raise ValueError(
                "There are no audio files in the dataset provided.")

        first_audio_entry = next(iter(column))
        logging.debug(
            f"Detected audio feature type is {type(first_audio_entry)}")

        if not isinstance(first_audio_entry, str) and not isinstance(
                first_audio_entry, torch.Tensor):
            raise ValueError(
                "Invalid audio feature data type.  Detected type is {}, "
                "expected either string for local/remote file path or Torch Tensor."
                .format(type(first_audio_entry)))

        src_path = None
        if SRC in metadata:
            if isinstance(first_audio_entry,
                          str) and not has_remote_protocol(first_audio_entry):
                src_path = os.path.dirname(os.path.abspath(metadata.get(SRC)))
        abs_path_column = backend.df_engine.map_objects(
            column, lambda row: get_abs_path(src_path, row)
            if isinstance(row, str) else row)

        num_audio_utterances = len(input_df[feature_config[COLUMN]])
        padding_value = preprocessing_parameters["padding_value"]
        normalization_type = preprocessing_parameters["norm"]

        feature_dim = metadata[name]["feature_dim"]
        max_length = metadata[name]["max_length"]
        audio_feature_dict = {
            key: value
            for key, value in preprocessing_parameters.items()
            if key in AUDIO_FEATURE_KEYS and value is not None
        }
        audio_file_length_limit_in_s = preprocessing_parameters[
            "audio_file_length_limit_in_s"]

        if num_audio_utterances == 0:
            raise ValueError(
                "There are no audio files in the dataset provided.")

        if feature_config[PREPROCESSING]["in_memory"]:
            audio_features = AudioFeatureMixin._process_in_memory(
                abs_path_column,
                audio_feature_dict,
                feature_dim,
                max_length,
                padding_value,
                normalization_type,
                audio_file_length_limit_in_s,
                backend,
            )
            proc_df[feature_config[PROC_COLUMN]] = audio_features
        else:
            backend.check_lazy_load_supported(feature_config)

        return proc_df
Exemple #13
0
def test_get_abs_path():
    assert get_abs_path('a', 'b.jpg') == 'a/b.jpg'
    assert get_abs_path(None, 'b.jpg') == 'b.jpg'
Exemple #14
0
    def add_feature_data(feature, input_df, proc_df, metadata,
                         preprocessing_parameters, backend):
        set_default_value(feature[PREPROCESSING], 'in_memory',
                          preprocessing_parameters['in_memory'])
        set_default_value(feature[PREPROCESSING], 'num_processes',
                          preprocessing_parameters['num_processes'])
        src_path = None
        if hasattr(input_df, 'src'):
            src_path = os.path.dirname(os.path.abspath(input_df.src))

        num_images = len(input_df)
        if num_images == 0:
            raise ValueError('There are no images in the dataset provided.')

        first_path = next(iter(input_df[feature[COLUMN]]))

        if src_path is None and not os.path.isabs(first_path):
            raise ValueError('Image file paths must be absolute')

        first_path = get_abs_path(src_path, first_path)

        (should_resize, width, height, num_channels,
         user_specified_num_channels,
         first_image) = ImageFeatureMixin._finalize_preprocessing_parameters(
             preprocessing_parameters, first_path)

        metadata[feature[NAME]][PREPROCESSING]['height'] = height
        metadata[feature[NAME]][PREPROCESSING]['width'] = width
        metadata[feature[NAME]][PREPROCESSING]['num_channels'] = num_channels

        read_image_and_resize = partial(
            ImageFeatureMixin._read_image_and_resize,
            img_width=width,
            img_height=height,
            should_resize=should_resize,
            num_channels=num_channels,
            resize_method=preprocessing_parameters['resize_method'],
            user_specified_num_channels=user_specified_num_channels)

        if feature[PREPROCESSING]['in_memory']:
            # Number of processes to run in parallel for preprocessing
            num_processes = feature[PREPROCESSING]['num_processes']
            metadata[
                feature[NAME]][PREPROCESSING]['num_processes'] = num_processes

            # Split the dataset into pools only if we have an explicit request to use
            # multiple processes. In case we have multiple input images use the
            # standard code anyway.
            if backend.supports_multiprocessing and (num_processes > 1
                                                     or num_images > 1):
                all_file_paths = [
                    get_abs_path(src_path, file_path)
                    for file_path in input_df[feature[NAME]]
                ]

                with Pool(num_processes) as pool:
                    logger.debug(
                        'Using {} processes for preprocessing images'.format(
                            num_processes))
                    proc_df[feature[PROC_COLUMN]] = pool.map(
                        read_image_and_resize, all_file_paths)
            else:
                # If we're not running multiple processes and we are only processing one
                # image just use this faster shortcut, bypassing multiprocessing.Pool.map
                logger.debug(
                    'No process pool initialized. Using internal process for preprocessing images'
                )

                proc_df[feature[PROC_COLUMN]] = backend.df_engine.map_objects(
                    input_df[feature[COLUMN]],
                    lambda file_path: read_image_and_resize(
                        get_abs_path(src_path, file_path)))
        else:
            backend.check_lazy_load_supported(feature)

            all_file_paths = [
                get_abs_path(src_path, file_path)
                for file_path in input_df[feature[NAME]]
            ]

            data_fp = os.path.splitext(input_df.src)[0] + '.hdf5'
            mode = 'w'
            if os.path.isfile(data_fp):
                mode = 'r+'

            with h5py.File(data_fp, mode) as h5_file:
                # todo future add multiprocessing/multithreading
                image_dataset = h5_file.create_dataset(
                    feature[PROC_COLUMN] + '_data',
                    (num_images, height, width, num_channels),
                    dtype=np.uint8)
                for i, filepath in enumerate(all_file_paths):
                    image_dataset[i, :height, :width, :] = (
                        read_image_and_resize(filepath))
                h5_file.flush()

            proc_df[feature[PROC_COLUMN]] = np.arange(num_images)
        return proc_df
Exemple #15
0
    def add_feature_data(feature, dataset_df, data, metadata,
                         preprocessing_parameters):
        set_default_value(feature['preprocessing'], 'in_memory',
                          preprocessing_parameters['in_memory'])

        csv_path = None
        if hasattr(dataset_df, 'csv'):
            csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))

        num_images = len(dataset_df)
        if num_images == 0:
            raise ValueError('There are no images in the dataset provided.')

        height = 0
        width = 0
        should_resize = False
        if ('height' in preprocessing_parameters
                or 'width' in preprocessing_parameters):
            should_resize = True
            try:
                height = int(preprocessing_parameters[HEIGHT])
                width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError('Image height and width must be set and have '
                                 'positive integer values: ' + str(e))
            if height <= 0 or width <= 0:
                raise ValueError(
                    'Image height and width must be positive integers')

        # here if a width and height have not been specified
        # we assume that all images have the same width and height
        # thus the width and height of the first one are the same
        # of all the other ones
        if (csv_path is None
                and not os.path.isabs(dataset_df[feature['name']][0])):
            raise ValueError('Image file paths must be absolute')

        first_image = imread(
            get_abs_path(csv_path, dataset_df[feature['name']][0]))

        first_img_height = first_image.shape[0]
        first_img_width = first_image.shape[1]
        first_img_num_channels = num_channels_in_image(first_image)

        if height == 0 or width == 0:
            # User hasn't specified height and width
            height = first_img_height
            width = first_img_width

        # User specified num_channels in the model/feature definition
        user_specified_num_channels = False
        num_channels = first_img_num_channels
        if NUM_CHANNELS in preprocessing_parameters:
            user_specified_num_channels = True
            num_channels = preprocessing_parameters[NUM_CHANNELS]

        assert isinstance(
            num_channels,
            int), ValueError('Number of image channels needs to be an integer')

        metadata[feature['name']]['preprocessing']['height'] = height
        metadata[feature['name']]['preprocessing']['width'] = width
        metadata[
            feature['name']]['preprocessing']['num_channels'] = num_channels

        if feature['preprocessing']['in_memory']:
            data[feature['name']] = np.empty(
                (num_images, height, width, num_channels), dtype=np.uint8)
            for i in range(len(dataset_df)):
                filepath = get_abs_path(csv_path,
                                        dataset_df[feature['name']][i])

                img = ImageBaseFeature._read_image_and_resize(
                    filepath, width, height, should_resize, num_channels,
                    preprocessing_parameters['resize_method'],
                    user_specified_num_channels)
                try:
                    data[feature['name']][i, :, :, :] = img
                except:
                    logger.error(
                        "Images are not of the same size. "
                        "Expected size is {}, "
                        "current image size is {}."
                        "Images are expected to be all of the same size"
                        "or explicit image width and height are expected"
                        "to be provided. "
                        "Additional information: https://uber.github.io/ludwig/user_guide/#image-features-preprocessing"
                        .format(first_image.shape, img.shape))
                    raise
        else:
            data_fp = os.path.splitext(dataset_df.csv)[0] + '.hdf5'
            mode = 'w'
            if os.path.isfile(data_fp):
                mode = 'r+'
            with h5py.File(data_fp, mode) as h5_file:
                image_dataset = h5_file.create_dataset(
                    feature['name'] + '_data',
                    (num_images, height, width, num_channels),
                    dtype=np.uint8)
                for i in range(len(dataset_df)):
                    filepath = get_abs_path(csv_path,
                                            dataset_df[feature['name']][i])

                    img = ImageBaseFeature._read_image_and_resize(
                        filepath, width, height, should_resize, num_channels,
                        preprocessing_parameters['resize_method'],
                        user_specified_num_channels)

                    image_dataset[i, :height, :width, :] = img

            data[feature['name']] = np.arange(num_images)
Exemple #16
0
    def add_feature_data(
            feature,
            dataset_df,
            data,
            metadata,
            preprocessing_parameters
    ):
        set_default_value(
            feature['preprocessing'],
            'in_memory',
            preprocessing_parameters['in_memory']
        )

        if not 'audio_feature' in preprocessing_parameters:
            raise ValueError(
                'audio_feature dictionary has to be present in preprocessing '
                'for audio.')
        if not 'type' in preprocessing_parameters['audio_feature']:
            raise ValueError(
                'type has to be present in audio_feature dictionary '
                'for audio.')

        csv_path = None
        if hasattr(dataset_df, 'csv'):
            csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))
        if (csv_path is None and
                not os.path.isabs(dataset_df[feature['name']][0])):
            raise ValueError(
                'Audio file paths must be absolute'
            )

        num_audio_utterances = len(dataset_df)
        padding_value = preprocessing_parameters['padding_value']
        normalization_type = preprocessing_parameters['norm']
        feature_name = feature['name']

        feature_dim = metadata[feature_name]['feature_dim']
        max_length = metadata[feature_name]['max_length']
        audio_feature_dict = preprocessing_parameters['audio_feature']
        audio_file_length_limit_in_s = preprocessing_parameters[
            'audio_file_length_limit_in_s']

        if num_audio_utterances == 0:
            raise ValueError(
                'There are no audio files in the dataset provided.')
        audio_stats = {
            'count': 0,
            'mean': 0,
            'var': 0,
            'std': 0,
            'max': 0,
            'min': float('inf'),
            'cropped': 0,
            'max_length_in_s': audio_file_length_limit_in_s
        }

        if feature['preprocessing']['in_memory']:
            data[feature['name']] = np.empty(
                (num_audio_utterances, max_length, feature_dim),
                dtype=np.float32
            )
            for i in range(len(dataset_df)):
                filepath = get_abs_path(
                    csv_path,
                    dataset_df[feature['name']][i]
                )
                audio_feature = AudioBaseFeature._read_audio_and_transform_to_feature(
                    filepath, audio_feature_dict, feature_dim, max_length,
                    padding_value, normalization_type, audio_stats
                )

                data[feature['name']][i, :, :] = audio_feature

            audio_stats['std'] = np.sqrt(
                audio_stats['var'] / float(audio_stats['count']))
            print_statistics = """
            {} audio files loaded.
            Statistics of audio file lengths:
            - mean: {:.4f}
            - std: {:.4f}
            - max: {:.4f}
            - min: {:.4f}
            - cropped audio_files: {}
            Max length was given as {}.
            """.format(audio_stats['count'], audio_stats['mean'],
                       audio_stats['std'], audio_stats['max'],
                       audio_stats['min'], audio_stats['cropped'],
                       audio_stats['max_length_in_s'])
            print(print_statistics)
Exemple #17
0
    def add_feature_data(feature_config, input_df, proc_df, metadata,
                         preprocessing_parameters, backend,
                         skip_save_processed_input):
        set_default_value(feature_config[PREPROCESSING], "in_memory",
                          preprocessing_parameters["in_memory"])

        name = feature_config[NAME]
        column = input_df[feature_config[COLUMN]]

        src_path = None
        if SRC in metadata:
            src_path = os.path.dirname(os.path.abspath(metadata.get(SRC)))
        abs_path_column = backend.df_engine.map_objects(
            column,
            lambda row: get_abs_path(src_path, row)
            if isinstance(row, str) and not has_remote_protocol(row) else row,
        )

        (
            should_resize,
            width,
            height,
            num_channels,
            user_specified_num_channels,
        ) = ImageFeatureMixin._finalize_preprocessing_parameters(
            preprocessing_parameters, abs_path_column)

        metadata[name][PREPROCESSING]["height"] = height
        metadata[name][PREPROCESSING]["width"] = width
        metadata[name][PREPROCESSING]["num_channels"] = num_channels

        read_image_if_bytes_obj_and_resize = partial(
            ImageFeatureMixin._read_image_if_bytes_obj_and_resize,
            img_width=width,
            img_height=height,
            should_resize=should_resize,
            num_channels=num_channels,
            resize_method=preprocessing_parameters["resize_method"],
            user_specified_num_channels=user_specified_num_channels,
        )

        # TODO: alternatively use get_average_image() for unreachable images
        default_image = get_gray_default_image(num_channels, height, width)

        # check to see if the active backend can support lazy loading of
        # image features from the hdf5 cache.
        backend.check_lazy_load_supported(feature_config)

        in_memory = feature_config[PREPROCESSING]["in_memory"]
        if in_memory or skip_save_processed_input:
            metadata[name]["reshape"] = (num_channels, height, width)

            proc_col = backend.read_binary_files(
                abs_path_column, map_fn=read_image_if_bytes_obj_and_resize)
            proc_col = backend.df_engine.map_objects(
                proc_col, lambda row: row
                if row is not None else default_image)
            proc_df[feature_config[PROC_COLUMN]] = proc_col
        else:
            num_images = len(abs_path_column)

            data_fp = backend.cache.get_cache_path(wrap(metadata.get(SRC)),
                                                   metadata.get(CHECKSUM),
                                                   TRAINING)
            with upload_h5(data_fp) as h5_file:
                # todo future add multiprocessing/multithreading
                image_dataset = h5_file.create_dataset(
                    feature_config[PROC_COLUMN] + "_data",
                    (num_images, num_channels, height, width),
                    dtype=np.uint8)
                for i, img_entry in enumerate(abs_path_column):
                    res = read_image_if_bytes_obj_and_resize(img_entry)
                    image_dataset[
                        i, :height, :
                        width, :] = res if res is not None else default_image
                h5_file.flush()

            proc_df[feature_config[PROC_COLUMN]] = np.arange(num_images)
        return proc_df