Beispiel #1
0
    def add_feature_data(feature, dataset_df, data, metadata,
                         preprocessing_parameters):
        set_default_value(feature['preprocessing'], 'in_memory',
                          preprocessing_parameters['in_memory'])

        csv_path = None
        if hasattr(dataset_df, 'csv'):
            csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))

        num_images = len(dataset_df)
        if num_images == 0:
            raise ValueError('There are no images in the dataset provided.')

        height = 0
        width = 0
        should_resize = False
        if ('height' in preprocessing_parameters
                or 'width' in preprocessing_parameters):
            should_resize = True
            try:
                height = int(preprocessing_parameters[HEIGHT])
                width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError('Image height and width must be set and have '
                                 'positive integer values: ' + str(e))
            if height <= 0 or width <= 0:
                raise ValueError(
                    'Image height and width must be positive integers')

        # here if a width and height have not been specified
        # we assume that all images have the same width and height
        # thus the width and height of the first one are the same
        # of all the other ones
        if (csv_path is None
                and not os.path.isabs(dataset_df[feature['name']][0])):
            raise ValueError('Image file paths must be absolute')

        first_image = imread(
            get_abs_path(csv_path, dataset_df[feature['name']][0]))

        first_img_height = first_image.shape[0]
        first_img_width = first_image.shape[1]
        first_img_num_channels = num_channels_in_image(first_image)

        if height == 0 or width == 0:
            # User hasn't specified height and width
            height = first_img_height
            width = first_img_width

        # User specified num_channels in the model/feature definition
        user_specified_num_channels = False
        num_channels = first_img_num_channels
        if NUM_CHANNELS in preprocessing_parameters:
            user_specified_num_channels = True
            num_channels = preprocessing_parameters[NUM_CHANNELS]

        assert isinstance(
            num_channels,
            int), ValueError('Number of image channels needs to be an integer')

        metadata[feature['name']]['preprocessing']['height'] = height
        metadata[feature['name']]['preprocessing']['width'] = width
        metadata[
            feature['name']]['preprocessing']['num_channels'] = num_channels

        if feature['preprocessing']['in_memory']:
            data[feature['name']] = np.empty(
                (num_images, height, width, num_channels), dtype=np.int8)
            for i in range(len(dataset_df)):
                filepath = get_abs_path(csv_path,
                                        dataset_df[feature['name']][i])

                img = ImageBaseFeature._read_image_and_resize(
                    filepath, width, height, should_resize, num_channels,
                    preprocessing_parameters['resize_method'],
                    user_specified_num_channels)
                data[feature['name']][i, :, :, :] = img
        else:
            data_fp = os.path.splitext(dataset_df.csv)[0] + '.hdf5'
            mode = 'w'
            if os.path.isfile(data_fp):
                mode = 'r+'
            with h5py.File(data_fp, mode) as h5_file:
                image_dataset = h5_file.create_dataset(
                    feature['name'] + '_data',
                    (num_images, height, width, num_channels),
                    dtype=np.uint8)
                for i in range(len(dataset_df)):
                    filepath = get_abs_path(csv_path,
                                            dataset_df[feature['name']][i])

                    img = ImageBaseFeature._read_image_and_resize(
                        filepath, width, height, should_resize, num_channels,
                        preprocessing_parameters['resize_method'],
                        user_specified_num_channels)

                    image_dataset[i, :height, :width, :] = img

            data[feature['name']] = np.arange(num_images)
Beispiel #2
0
    def add_feature_data(
            feature,
            dataset_df,
            data,
            metadata,
            preprocessing_parameters
    ):
        set_default_value(
            feature['preprocessing'],
            'in_memory',
            preprocessing_parameters['in_memory']
        )

        if ('height' in preprocessing_parameters or
                'width' in preprocessing_parameters):
            should_resize = True
            try:
                provided_height = int(preprocessing_parameters[HEIGHT])
                provided_width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError(
                    'Image height and width must be set and have '
                    'positive integer values: ' + str(e)
                )
            if (provided_height <= 0 or provided_width <= 0):
                raise ValueError(
                    'Image height and width must be positive integers'
                )
        else:
            should_resize = False

        csv_path = None
        if hasattr(dataset_df, 'csv'):
            csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))

        num_images = len(dataset_df)

        height = 0
        width = 0
        num_channels = 1

        if num_images > 0:
            # here if a width and height have not been specified
            # we assume that all images have the same wifth and im_height
            # thus the width and height of the first one are the same
            # of all the other ones
            if (csv_path is None and
                    not os.path.isabs(dataset_df[feature['name']][0])):
                raise ValueError(
                    'Image file paths must be absolute'
                )

            first_image = imread(
                get_abs_path(
                    csv_path,
                    dataset_df[feature['name']][0]
                )
            )

            height = first_image.shape[0]
            width = first_image.shape[1]

            if first_image.ndim == 2:
                num_channels = 1
            else:
                num_channels = first_image.shape[2]

        if should_resize:
            height = provided_height
            width = provided_width

        metadata[feature['name']]['preprocessing']['height'] = height
        metadata[feature['name']]['preprocessing']['width'] = width
        metadata[feature['name']]['preprocessing'][
            'num_channels'] = num_channels

        if feature['preprocessing']['in_memory']:
            data[feature['name']] = np.empty(
                (num_images, height, width, num_channels),
                dtype=np.int8
            )
            for i in range(len(dataset_df)):
                img = imread(
                    get_abs_path(
                        csv_path,
                        dataset_df[feature['name']][i]
                    )
                )
                if img.ndim == 2:
                    img = img.reshape((img.shape[0], img.shape[1], 1))
                if should_resize:
                    img = resize_image(
                        img,
                        (height, width),
                        preprocessing_parameters['resize_method']
                    )
                # todo: temporary workaround for images with alpha channel, replace
                if img.ndim == 3 and img.shape[2] != num_channels:
                    img = img[:, :, :num_channels]
                data[feature['name']][i, :, :, :] = img
        else:
            data_fp = os.path.splitext(dataset_df.csv)[0] + '.hdf5'
            mode = 'w'
            if os.path.isfile(data_fp):
                mode = 'r+'
            with h5py.File(data_fp, mode) as h5_file:
                image_dataset = h5_file.create_dataset(
                    feature['name'] + '_data',
                    (num_images, height, width, num_channels),
                    dtype=np.uint8
                )
                for i in range(len(dataset_df)):
                    img = imread(
                        get_abs_path(
                            csv_path,
                            dataset_df[feature['name']][i]
                        )
                    )
                    if img.ndim == 2:
                        img = img.reshape((img.shape[0], img.shape[1], 1))
                    if should_resize:
                        img = resize_image(
                            img,
                            (height, width),
                            preprocessing_parameters['resize_method'],
                        )

                    image_dataset[i, :height, :width, :] = img

            data[feature['name']] = np.arange(num_images)
def test_get_abs_path():
    assert get_abs_path('a', 'b.jpg') == 'a/b.jpg'
    assert get_abs_path(None, 'b.jpg') == 'b.jpg'
    def add_feature_data(feature, dataset_df, data, metadata,
                         preprocessing_parameters):
        set_default_value(feature['preprocessing'], 'in_memory',
                          preprocessing_parameters['in_memory'])

        if (not 'audio_feature' in preprocessing_parameters):
            raise ValueError(
                'audio_feature dictionary has to be present in preprocessing for audio.'
            )
        if (not 'type' in preprocessing_parameters['audio_feature']):
            raise ValueError(
                'type key has to be present in audio_feature dictionary for audio.'
            )

        csv_path = None
        if hasattr(dataset_df, 'csv'):
            csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))
        if (csv_path is None
                and not os.path.isabs(dataset_df[feature['name']][0])):
            raise ValueError('Audio file paths must be absolute')

        num_audio_utterances = len(dataset_df)
        padding_value = preprocessing_parameters['padding_value']
        normalization_type = preprocessing_parameters['norm']
        feature_name = feature['name']

        feature_dim = metadata[feature_name]['feature_dim']
        max_length = metadata[feature_name]['max_length']
        audio_feature_dict = preprocessing_parameters['audio_feature']
        audio_file_length_limit_in_s = preprocessing_parameters[
            'audio_file_length_limit_in_s']

        if num_audio_utterances == 0:
            raise ValueError(
                'There are no audio files in the dataset provided.')
        audio_stats = {
            'count': 0,
            'mean': 0,
            'var': 0,
            'std': 0,
            'max': 0,
            'min': float('inf'),
            'cropped': 0,
            'max_length_in_s': audio_file_length_limit_in_s
        }

        if feature['preprocessing']['in_memory']:
            data[feature['name']] = np.empty(
                (num_audio_utterances, max_length, feature_dim),
                dtype=np.float32)
            for i in range(len(dataset_df)):
                filepath = get_abs_path(csv_path,
                                        dataset_df[feature['name']][i])
                audio_feature = AudioBaseFeature._read_audio_and_transform_to_feature(
                    filepath, audio_feature_dict, feature_dim, max_length,
                    padding_value, normalization_type, audio_stats)

                if (normalization_type == 'per_file'):
                    mean = np.mean(audio_feature, axis=0)
                    std = np.std(audio_feature, axis=0)
                    data[feature['name']][i, :, :] = np.divide(
                        (audio_feature - mean), std)
                elif (normalization_type == 'global'):
                    raise ValueError('not implemented yet')
                else:
                    data[feature['name']][i, :, :] = audio_feature

            audio_stats['std'] = np.sqrt(audio_stats['var'] /
                                         float(audio_stats['count']))
            print_statistics = """
            {} audio files loaded. 
            Statistics of audio file lengths:
            - mean: {:.4f}
            - std: {:.4f}
            - max: {:.4f}
            - min: {:.4f}
            - cropped audio_files: {}
            Max length was given as {}.
            """.format(audio_stats['count'], audio_stats['mean'],
                       audio_stats['std'], audio_stats['max'],
                       audio_stats['min'], audio_stats['cropped'],
                       audio_stats['max_length_in_s'])
            print(print_statistics)