예제 #1
0
    def __init__(self, *args, **kwargs):
        """Constructor"""

        # Run ProcessorMixin init
        ProcessorMixin.__init__(self, *args, **kwargs)

        # Run FeatureContainer init
        FeatureContainer.__init__(self, **kwargs)

        # Run super init to call init of mixins too
        super(FeatureReadingProcessor, self).__init__(*args, **kwargs)
예제 #2
0
    def process(self, data=None, store_processing_chain=False, **kwargs):
        """Extract features

        Parameters
        ----------
        data : AudioContainer
            Audio data to extract features

        store_processing_chain : bool
            Store processing chain to data container returned
            Default value False

        Returns
        -------
        FeatureContainer

        """

        from dcase_util.containers import FeatureContainer, AudioContainer

        if isinstance(data, AudioContainer):
            if store_processing_chain:
                if hasattr(data, 'processing_chain') and data.processing_chain.chain_item_exists(
                        processor_name='dcase_util.processors.' + self.__class__.__name__):
                    # Current processor is already in the processing chain, get that
                    processing_chain_item = data.processing_chain.chain_item(
                        processor_name='dcase_util.processors.' + self.__class__.__name__
                    )

                else:
                    # Create a new processing chain item
                    processing_chain_item = self.get_processing_chain_item()

                processing_chain_item.update({
                    'process_parameters': kwargs
                })

                if hasattr(data, 'processing_chain'):
                    data.processing_chain.push_processor(**processing_chain_item)
                    processing_chain = data.processing_chain

                else:
                    processing_chain = ProcessingChain().push_processor(**processing_chain_item)

            else:
                processing_chain = None

            return FeatureContainer(
                data=self.extract(y=data.get_focused()),
                time_resolution=self.hop_length_seconds,
                processing_chain=processing_chain
            )

        else:
            message = '{name}: Wrong input data type, type required [{input_type}].'.format(
                name=self.__class__.__name__,
                input_type=self.input_type)

            self.logger.exception(message)
            raise ValueError(message)
예제 #3
0
def test_aggregate_skew():
    data_target = numpy.array(
        [
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
            [0.0, 0.0],
        ]
    ).T

    container = FeatureContainer(
        data=data
    )

    agg = Aggregator(
        win_length_frames=2,
        hop_length_frames=1,
        recipe=['skew']
    )
    data_aggregated = agg.aggregate(data=container)

    numpy.testing.assert_array_equal(data_target, data_aggregated.data)
예제 #4
0
def test_aggregate_mean():
    data_target = numpy.array(
        [
            [0,   0],
            [0.5, 0.5],
            [1.5, 1.5],
            [2.5, 2.5],
            [3.5, 3.5],
            [4.5, 4.5],
            [5.5, 5.5],
            [6.5, 6.5],
            [7.5, 7.5],
            [8.5, 8.5],
            [9.5, 9.5],
        ]
    ).T

    container = FeatureContainer(
        data=data
    )

    agg = Aggregator(
        win_length_frames=2,
        hop_length_frames=1,
        recipe=['mean']
    )
    data_aggregated = agg.aggregate(data=container)

    numpy.testing.assert_array_equal(data_target, data_aggregated.data)
예제 #5
0
def test_aggregate_flatten():
    data_target = numpy.array(
        [
            [0, 0, 0, 0],
            [0, 0, 1, 1],
            [1, 1, 2, 2],
            [2, 2, 3, 3],
            [3, 3, 4, 4],
            [4, 4, 5, 5],
            [5, 5, 6, 6],
            [6, 6, 7, 7],
            [7, 7, 8, 8],
            [8, 8, 9, 9],
            [9, 9, 10, 10],
        ]
    ).T
    container = FeatureContainer(
        data=data
    )

    agg = Aggregator(
        win_length_frames=2,
        hop_length_frames=1,
        recipe=['flatten']
    )
    data_aggregated = agg.aggregate(data=container)

    numpy.testing.assert_array_equal(data_target, data_aggregated.data)
예제 #6
0
    def process(self,
                data=None,
                output_filename=None,
                store_processing_chain=False,
                **kwargs):
        """Data writing.

        Parameters
        ----------
        data : FeatureContainer
            Input feature data.
            Default value None

        output_filename : str
            Filename of the feature container to save.
            Default value None

        store_processing_chain : bool
            Store processing chain to data container
            Default value False

        Returns
        -------
        self

        """

        if data:
            container = FeatureContainer(data=data)

            if store_processing_chain:
                container.processing_chain = data.processing_chain

            if output_filename:
                # Load features from disk
                container.save(filename=output_filename)

            return container

        else:
            message = '{name}: No input data.'.format(
                name=self.__class__.__name__)

            self.logger.exception(message)
            raise ValueError(message)
예제 #7
0
    def feature_container(cls, filename=None):
        if filename is None:
            filename = cls.audio_filename()

        from dcase_util.containers import AudioContainer, FeatureContainer
        from dcase_util.features import MelExtractor
        audio_container = AudioContainer(filename=filename).load().mixdown()
        mel_extractor = MelExtractor(fs=audio_container.fs)
        feature_data = mel_extractor.extract(audio_container.data)
        feature_container = FeatureContainer(
            data=feature_data,
            time_resolution=mel_extractor.hop_length_seconds)

        return feature_container
예제 #8
0
def test_save():
    data_target = numpy.array(
        [
            [0, 0, 0, 0],
            [0, 0, 1, 1],
            [1, 1, 2, 2],
            [2, 2, 3, 3],
            [3, 3, 4, 4],
            [4, 4, 5, 5],
            [5, 5, 6, 6],
            [6, 6, 7, 7],
            [7, 7, 8, 8],
            [8, 8, 9, 9],
            [9, 9, 10, 10],
        ]
    ).T
    container = FeatureContainer(
        data=data
    )

    tmp = tempfile.NamedTemporaryFile('r+', suffix='.cpickle', dir=tempfile.gettempdir(), delete=False)
    try:
        agg = Aggregator(
            win_length_frames=2,
            hop_length_frames=1,
            recipe=['flatten']
        ).save(filename=tmp.name).load()
        data_aggregated = agg.aggregate(data=container)

        numpy.testing.assert_array_equal(data_target, data_aggregated.data)
    finally:
        try:
            tmp.close()
            os.unlink(tmp.name)
        except:
            pass
예제 #9
0
    def stack(self, repository, **kwargs):
        """Vector creation based on recipe

        Parameters
        ----------
        repository : RepositoryContainer
            Repository with needed data

        Returns
        -------
        FeatureContainer

        """

        # Check that all data matrices have same amount of frames
        frame_count = []
        time_resolution = []
        for recipe_part in self.recipe:
            label = recipe_part['label']
            stream_id = 0  # Default value
            if 'vector-index' in recipe_part:
                stream_id = recipe_part['vector-index']['stream']

            if repository.get_container(label=label,
                                        stream_id=stream_id).time_resolution:
                time_resolution.append(
                    repository.get_container(
                        label=label, stream_id=stream_id).time_resolution)

            frame_count.append(
                repository.get_container(label=label,
                                         stream_id=stream_id).length)

        if len(set(frame_count)) != 1:
            message = '{name}: Data matrices should have same number of frames {frame_count}'.format(
                name=self.__class__.__name__,
                frame_count=frame_count,
            )

            self.logger.exception(message)
            raise AssertionError(message)

        if len(set(time_resolution)) != 1:
            message = '{name}: Data matrices should have same time resolution {time_resolution}'.format(
                name=self.__class__.__name__,
                time_resolution=time_resolution,
            )

            self.logger.exception(message)
            raise AssertionError(message)

        # Stack data
        data_matrix = []
        for recipe_part in self.recipe:
            label = recipe_part['label']

            # Default values
            stream_id = 0
            if 'vector-index' in recipe_part:
                stream_id = recipe_part['vector-index']['stream']

            if ('vector-index' not in recipe_part
                    or ('vector-index' in recipe_part
                        and 'full' in recipe_part['vector-index']
                        and recipe_part['vector-index']['full'])):

                # Full matrix
                data_matrix.append(
                    repository.get_container(
                        label=label,
                        stream_id=stream_id).get_frames(frame_hop=self.hop))

            elif ('vector-index' in recipe_part
                  and 'vector' in recipe_part['vector-index']
                  and 'selection' in recipe_part['vector-index']
                  and recipe_part['vector-index']['selection']):

                index = numpy.array(recipe_part['vector-index']['vector'])

                # Selector vector
                data_matrix.append(
                    repository.get_container(label=label,
                                             stream_id=stream_id).get_frames(
                                                 vector_ids=index,
                                                 frame_hop=self.hop))

            elif ('vector-index' in recipe_part
                  and 'start' in recipe_part['vector-index']
                  and 'stop' in recipe_part['vector-index']):

                index = numpy.arange(recipe_part['vector-index']['start'],
                                     recipe_part['vector-index']['stop'])

                # Start and end index
                data_matrix.append(
                    repository.get_container(label=label,
                                             stream_id=stream_id).get_frames(
                                                 vector_ids=index,
                                                 frame_hop=self.hop))

        from dcase_util.containers import FeatureContainer

        return FeatureContainer(data=numpy.vstack(data_matrix),
                                time_resolution=time_resolution[0],
                                processing_chain=repository.processing_chain)
예제 #10
0
    def process(self,
                data=None,
                filename=None,
                focus_start=None,
                focus_stop=None,
                focus_duration=None,
                focus_start_seconds=None,
                focus_stop_seconds=None,
                focus_duration_seconds=None,
                store_processing_chain=False,
                **kwargs):
        """Data reading.

        Parameters
        ----------
        data : FeatureContainer
            Input feature data.
            Default value None

        filename : str
            Filename of the feature container to load.
            Default value None

        focus_start : int, optional
            Segment start, frame index of focus segment start.
            Default value None

        focus_stop : int, optional
            Segment end, Frame index of focus segment stop.
            Default value None

        focus_duration : int, optional
            Segment duration, Frame count of focus segment.
            Default value None

        focus_start_seconds : float > 0.0
            Segment start, seconds.
            Default value None

        focus_stop_seconds : float > 0.0
            Segment end, seconds.
            Default value None

        focus_duration_seconds : float
            Segment duration, seconds.
            Default value None

        store_processing_chain : bool
            Store processing chain to data container returned.
            Default value False

        Returns
        -------
        self

        """

        if data is None and self.input_type == ProcessingChainItemType.NONE:
            container = FeatureContainer()

            if filename:
                # Load features from disk
                container.load(filename=filename)

            if focus_start is not None and focus_duration is not None:
                # Set focus segment and channel
                container.set_focus(start=focus_start, duration=focus_duration)

            elif focus_start is not None and focus_stop is not None:
                # Set focus segment and channel
                container.set_focus(start=focus_start, stop=focus_stop)

            elif focus_start_seconds is not None and focus_duration_seconds is not None:
                # Set focus segment and channel
                container.set_focus(start_seconds=focus_start_seconds,
                                    duration_seconds=focus_duration_seconds)

            elif focus_start_seconds is not None and focus_stop_seconds is not None:
                # Set focus segment and channel
                container.set_focus(start_seconds=focus_start_seconds,
                                    stop_seconds=focus_stop_seconds)

            if store_processing_chain and not container.processing_chain:
                # Insert Reader processor only if processing chain is empty
                processing_chain_item = self.get_processing_chain_item()

                if 'process_parameters' not in processing_chain_item:
                    processing_chain_item['process_parameters'] = {}

                processing_chain_item['process_parameters'][
                    'filename'] = filename
                processing_chain_item['process_parameters'][
                    'focus_start'] = focus_start
                processing_chain_item['process_parameters'][
                    'focus_duration'] = focus_duration
                processing_chain_item['process_parameters'][
                    'focus_start'] = focus_start
                processing_chain_item['process_parameters'][
                    'focus_stop'] = focus_stop
                processing_chain_item['process_parameters'][
                    'focus_start_seconds'] = focus_start_seconds
                processing_chain_item['process_parameters'][
                    'focus_duration_seconds'] = focus_duration_seconds
                processing_chain_item['process_parameters'][
                    'focus_start_seconds'] = focus_start_seconds
                processing_chain_item['process_parameters'][
                    'focus_stop_seconds'] = focus_stop_seconds

                container.push_processing_chain_item(**processing_chain_item)

            return container

        else:
            message = '{name}: Wrong input data type, type required [{input_type}].'.format(
                name=self.__class__.__name__, input_type=self.input_type)

            self.logger.exception(message)
            raise ValueError(message)
예제 #11
0
def test_aggregate():
    data_target = numpy.array(
        [
            [0.5, 0.5],
            [1.5, 1.5],
            [2.5, 2.5],
            [3.5, 3.5],
            [4.5, 4.5],
            [5.5, 5.5],
            [6.5, 6.5],
            [7.5, 7.5],
            [8.5, 8.5],
            [9.5, 9.5],
        ]
    ).T

    container = FeatureContainer(
        data=data
    )

    agg = Aggregator(
        win_length_frames=2,
        hop_length_frames=1,
        recipe=['mean'],
        center=False,
        padding=False,
    )
    data_aggregated = agg.aggregate(data=container)

    numpy.testing.assert_array_equal(data_target, data_aggregated.data)

    data_target = numpy.array(
        [
            [5.0, 5.0]
        ]
    ).T
    container = FeatureContainer(
        data=data
    )
    agg = Aggregator(
        win_length_frames=11,
        hop_length_frames=11,
        recipe=['mean'],
        center=False,
        padding=False,
    )
    data_aggregated = agg.aggregate(data=container)
    numpy.testing.assert_array_equal(data_target, data_aggregated.data)

    data_target = numpy.array(
        [
            [1.5, 1.5],
            [5.5, 5.5],
        ]
    ).T
    container = FeatureContainer(
        data=data
    )
    agg = Aggregator(
        win_length_frames=4,
        hop_length_frames=4,
        recipe=['mean'],
        center=False,
        padding=False,
    )
    data_aggregated = agg.aggregate(data=container)
    numpy.testing.assert_array_equal(data_target, data_aggregated.data)

    data_target = numpy.array(
        [
            [3.5, 3.5],
            [7.5, 7.5],
        ]
    ).T
    container = FeatureContainer(
        data=data
    )
    agg = Aggregator(
        win_length_frames=4,
        hop_length_frames=4,
        recipe=['mean'],
        center=True,
        padding=False,
    )
    data_aggregated = agg.aggregate(data=container)
    numpy.testing.assert_array_equal(data_target, data_aggregated.data)

    data_target = numpy.array(
        [
            [0.25, 0.25],
            [3.5, 3.5],
            [7.5, 7.5],
        ]
    ).T
    container = FeatureContainer(
        data=data
    )
    agg = Aggregator(
        win_length_frames=4,
        hop_length_frames=4,
        recipe=['mean'],
        center=True,
        padding=True,
    )
    data_aggregated = agg.aggregate(data=container)
    numpy.testing.assert_array_equal(data_target, data_aggregated.data)