def __init__(self, *args, **kwargs): """Constructor""" # Run ProcessorMixin init ProcessorMixin.__init__(self, *args, **kwargs) # Run FeatureContainer init FeatureContainer.__init__(self, **kwargs) # Run super init to call init of mixins too super(FeatureReadingProcessor, self).__init__(*args, **kwargs)
def process(self, data=None, store_processing_chain=False, **kwargs): """Extract features Parameters ---------- data : AudioContainer Audio data to extract features store_processing_chain : bool Store processing chain to data container returned Default value False Returns ------- FeatureContainer """ from dcase_util.containers import FeatureContainer, AudioContainer if isinstance(data, AudioContainer): if store_processing_chain: if hasattr(data, 'processing_chain') and data.processing_chain.chain_item_exists( processor_name='dcase_util.processors.' + self.__class__.__name__): # Current processor is already in the processing chain, get that processing_chain_item = data.processing_chain.chain_item( processor_name='dcase_util.processors.' + self.__class__.__name__ ) else: # Create a new processing chain item processing_chain_item = self.get_processing_chain_item() processing_chain_item.update({ 'process_parameters': kwargs }) if hasattr(data, 'processing_chain'): data.processing_chain.push_processor(**processing_chain_item) processing_chain = data.processing_chain else: processing_chain = ProcessingChain().push_processor(**processing_chain_item) else: processing_chain = None return FeatureContainer( data=self.extract(y=data.get_focused()), time_resolution=self.hop_length_seconds, processing_chain=processing_chain ) else: message = '{name}: Wrong input data type, type required [{input_type}].'.format( name=self.__class__.__name__, input_type=self.input_type) self.logger.exception(message) raise ValueError(message)
def test_aggregate_skew(): data_target = numpy.array( [ [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], ] ).T container = FeatureContainer( data=data ) agg = Aggregator( win_length_frames=2, hop_length_frames=1, recipe=['skew'] ) data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data)
def test_aggregate_mean(): data_target = numpy.array( [ [0, 0], [0.5, 0.5], [1.5, 1.5], [2.5, 2.5], [3.5, 3.5], [4.5, 4.5], [5.5, 5.5], [6.5, 6.5], [7.5, 7.5], [8.5, 8.5], [9.5, 9.5], ] ).T container = FeatureContainer( data=data ) agg = Aggregator( win_length_frames=2, hop_length_frames=1, recipe=['mean'] ) data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data)
def test_aggregate_flatten(): data_target = numpy.array( [ [0, 0, 0, 0], [0, 0, 1, 1], [1, 1, 2, 2], [2, 2, 3, 3], [3, 3, 4, 4], [4, 4, 5, 5], [5, 5, 6, 6], [6, 6, 7, 7], [7, 7, 8, 8], [8, 8, 9, 9], [9, 9, 10, 10], ] ).T container = FeatureContainer( data=data ) agg = Aggregator( win_length_frames=2, hop_length_frames=1, recipe=['flatten'] ) data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data)
def process(self, data=None, output_filename=None, store_processing_chain=False, **kwargs): """Data writing. Parameters ---------- data : FeatureContainer Input feature data. Default value None output_filename : str Filename of the feature container to save. Default value None store_processing_chain : bool Store processing chain to data container Default value False Returns ------- self """ if data: container = FeatureContainer(data=data) if store_processing_chain: container.processing_chain = data.processing_chain if output_filename: # Load features from disk container.save(filename=output_filename) return container else: message = '{name}: No input data.'.format( name=self.__class__.__name__) self.logger.exception(message) raise ValueError(message)
def feature_container(cls, filename=None): if filename is None: filename = cls.audio_filename() from dcase_util.containers import AudioContainer, FeatureContainer from dcase_util.features import MelExtractor audio_container = AudioContainer(filename=filename).load().mixdown() mel_extractor = MelExtractor(fs=audio_container.fs) feature_data = mel_extractor.extract(audio_container.data) feature_container = FeatureContainer( data=feature_data, time_resolution=mel_extractor.hop_length_seconds) return feature_container
def test_save(): data_target = numpy.array( [ [0, 0, 0, 0], [0, 0, 1, 1], [1, 1, 2, 2], [2, 2, 3, 3], [3, 3, 4, 4], [4, 4, 5, 5], [5, 5, 6, 6], [6, 6, 7, 7], [7, 7, 8, 8], [8, 8, 9, 9], [9, 9, 10, 10], ] ).T container = FeatureContainer( data=data ) tmp = tempfile.NamedTemporaryFile('r+', suffix='.cpickle', dir=tempfile.gettempdir(), delete=False) try: agg = Aggregator( win_length_frames=2, hop_length_frames=1, recipe=['flatten'] ).save(filename=tmp.name).load() data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data) finally: try: tmp.close() os.unlink(tmp.name) except: pass
def stack(self, repository, **kwargs): """Vector creation based on recipe Parameters ---------- repository : RepositoryContainer Repository with needed data Returns ------- FeatureContainer """ # Check that all data matrices have same amount of frames frame_count = [] time_resolution = [] for recipe_part in self.recipe: label = recipe_part['label'] stream_id = 0 # Default value if 'vector-index' in recipe_part: stream_id = recipe_part['vector-index']['stream'] if repository.get_container(label=label, stream_id=stream_id).time_resolution: time_resolution.append( repository.get_container( label=label, stream_id=stream_id).time_resolution) frame_count.append( repository.get_container(label=label, stream_id=stream_id).length) if len(set(frame_count)) != 1: message = '{name}: Data matrices should have same number of frames {frame_count}'.format( name=self.__class__.__name__, frame_count=frame_count, ) self.logger.exception(message) raise AssertionError(message) if len(set(time_resolution)) != 1: message = '{name}: Data matrices should have same time resolution {time_resolution}'.format( name=self.__class__.__name__, time_resolution=time_resolution, ) self.logger.exception(message) raise AssertionError(message) # Stack data data_matrix = [] for recipe_part in self.recipe: label = recipe_part['label'] # Default values stream_id = 0 if 'vector-index' in recipe_part: stream_id = recipe_part['vector-index']['stream'] if ('vector-index' not in recipe_part or ('vector-index' in recipe_part and 'full' in recipe_part['vector-index'] and recipe_part['vector-index']['full'])): # Full matrix data_matrix.append( repository.get_container( label=label, stream_id=stream_id).get_frames(frame_hop=self.hop)) elif ('vector-index' in recipe_part and 'vector' in recipe_part['vector-index'] and 'selection' in recipe_part['vector-index'] and recipe_part['vector-index']['selection']): index = numpy.array(recipe_part['vector-index']['vector']) # Selector vector data_matrix.append( repository.get_container(label=label, stream_id=stream_id).get_frames( vector_ids=index, frame_hop=self.hop)) elif ('vector-index' in recipe_part and 'start' in recipe_part['vector-index'] and 'stop' in recipe_part['vector-index']): index = numpy.arange(recipe_part['vector-index']['start'], recipe_part['vector-index']['stop']) # Start and end index data_matrix.append( repository.get_container(label=label, stream_id=stream_id).get_frames( vector_ids=index, frame_hop=self.hop)) from dcase_util.containers import FeatureContainer return FeatureContainer(data=numpy.vstack(data_matrix), time_resolution=time_resolution[0], processing_chain=repository.processing_chain)
def process(self, data=None, filename=None, focus_start=None, focus_stop=None, focus_duration=None, focus_start_seconds=None, focus_stop_seconds=None, focus_duration_seconds=None, store_processing_chain=False, **kwargs): """Data reading. Parameters ---------- data : FeatureContainer Input feature data. Default value None filename : str Filename of the feature container to load. Default value None focus_start : int, optional Segment start, frame index of focus segment start. Default value None focus_stop : int, optional Segment end, Frame index of focus segment stop. Default value None focus_duration : int, optional Segment duration, Frame count of focus segment. Default value None focus_start_seconds : float > 0.0 Segment start, seconds. Default value None focus_stop_seconds : float > 0.0 Segment end, seconds. Default value None focus_duration_seconds : float Segment duration, seconds. Default value None store_processing_chain : bool Store processing chain to data container returned. Default value False Returns ------- self """ if data is None and self.input_type == ProcessingChainItemType.NONE: container = FeatureContainer() if filename: # Load features from disk container.load(filename=filename) if focus_start is not None and focus_duration is not None: # Set focus segment and channel container.set_focus(start=focus_start, duration=focus_duration) elif focus_start is not None and focus_stop is not None: # Set focus segment and channel container.set_focus(start=focus_start, stop=focus_stop) elif focus_start_seconds is not None and focus_duration_seconds is not None: # Set focus segment and channel container.set_focus(start_seconds=focus_start_seconds, duration_seconds=focus_duration_seconds) elif focus_start_seconds is not None and focus_stop_seconds is not None: # Set focus segment and channel container.set_focus(start_seconds=focus_start_seconds, stop_seconds=focus_stop_seconds) if store_processing_chain and not container.processing_chain: # Insert Reader processor only if processing chain is empty processing_chain_item = self.get_processing_chain_item() if 'process_parameters' not in processing_chain_item: processing_chain_item['process_parameters'] = {} processing_chain_item['process_parameters'][ 'filename'] = filename processing_chain_item['process_parameters'][ 'focus_start'] = focus_start processing_chain_item['process_parameters'][ 'focus_duration'] = focus_duration processing_chain_item['process_parameters'][ 'focus_start'] = focus_start processing_chain_item['process_parameters'][ 'focus_stop'] = focus_stop processing_chain_item['process_parameters'][ 'focus_start_seconds'] = focus_start_seconds processing_chain_item['process_parameters'][ 'focus_duration_seconds'] = focus_duration_seconds processing_chain_item['process_parameters'][ 'focus_start_seconds'] = focus_start_seconds processing_chain_item['process_parameters'][ 'focus_stop_seconds'] = focus_stop_seconds container.push_processing_chain_item(**processing_chain_item) return container else: message = '{name}: Wrong input data type, type required [{input_type}].'.format( name=self.__class__.__name__, input_type=self.input_type) self.logger.exception(message) raise ValueError(message)
def test_aggregate(): data_target = numpy.array( [ [0.5, 0.5], [1.5, 1.5], [2.5, 2.5], [3.5, 3.5], [4.5, 4.5], [5.5, 5.5], [6.5, 6.5], [7.5, 7.5], [8.5, 8.5], [9.5, 9.5], ] ).T container = FeatureContainer( data=data ) agg = Aggregator( win_length_frames=2, hop_length_frames=1, recipe=['mean'], center=False, padding=False, ) data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data) data_target = numpy.array( [ [5.0, 5.0] ] ).T container = FeatureContainer( data=data ) agg = Aggregator( win_length_frames=11, hop_length_frames=11, recipe=['mean'], center=False, padding=False, ) data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data) data_target = numpy.array( [ [1.5, 1.5], [5.5, 5.5], ] ).T container = FeatureContainer( data=data ) agg = Aggregator( win_length_frames=4, hop_length_frames=4, recipe=['mean'], center=False, padding=False, ) data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data) data_target = numpy.array( [ [3.5, 3.5], [7.5, 7.5], ] ).T container = FeatureContainer( data=data ) agg = Aggregator( win_length_frames=4, hop_length_frames=4, recipe=['mean'], center=True, padding=False, ) data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data) data_target = numpy.array( [ [0.25, 0.25], [3.5, 3.5], [7.5, 7.5], ] ).T container = FeatureContainer( data=data ) agg = Aggregator( win_length_frames=4, hop_length_frames=4, recipe=['mean'], center=True, padding=True, ) data_aggregated = agg.aggregate(data=container) numpy.testing.assert_array_equal(data_target, data_aggregated.data)