def __init__(self, data=None, stats=None, metadata=None, time_resolution=None, processing_chain=None, **kwargs): kwargs.update({ 'data': data, 'stats': stats, 'metadata': metadata, 'time_resolution': time_resolution, 'processing_chain': processing_chain }) # Run DataMatrix2DContainer init DataMatrix2DContainer.__init__(self, **kwargs) # Run super init super(FeatureContainer, self).__init__(**kwargs)
def process(self, data=None, store_processing_chain=False, **kwargs): """Process Parameters ---------- data : DataContainer Data store_processing_chain : bool Store processing chain to data container returned Default value False Returns ------- DataMatrix3DContainer """ from dcase_util.containers import AudioContainer, DataMatrix2DContainer if isinstance(data, AudioContainer): audio_data = data.data if data.channels == 1: audio_data = audio_data[numpy.newaxis, :] # Do processing container = self.sequencer.sequence(data=DataMatrix2DContainer( audio_data, time_resolution=1 / float(data.fs)), **kwargs) if store_processing_chain: # Get processing chain item processing_chain_item = self.get_processing_chain_item() # Update current processing parameters into chain item processing_chain_item.update({'process_parameters': kwargs}) # Push chain item into processing chain stored in the container container.processing_chain.push_processor( **processing_chain_item) return container else: message = '{name}: Wrong input data type, type required [{input_type}].'.format( name=self.__class__.__name__, input_type=self.input_type) self.logger.exception(message) raise ValueError(message)
def process(self, data=None, store_processing_chain=False, **kwargs): """Process the data with processing chain Parameters ---------- data : DataContainer Data store_processing_chain : bool Store processing chain to data container returned Default value False Returns ------- data : DataContainer Processed data """ for step_id, step in enumerate(self): # Loop through steps in the processing chain if isinstance(step, ProcessingChainItem): if step_id == 0 and data is None: # Inject data for the first item in the chain if step.processor_class.input_type == ProcessingChainItemType.DATA_CONTAINER: from dcase_util.containers import DataMatrix2DContainer data = DataMatrix2DContainer(**kwargs).load() elif step.processor_class.input_type == ProcessingChainItemType.DATA_REPOSITORY: from dcase_util.containers import DataRepository data = DataRepository(**kwargs).load() if 'preprocessing_callbacks' in step and isinstance( step['preprocessing_callbacks'], list): # Handle pre-processing callbacks assigned to current processor for method in step['preprocessing_callbacks']: if isinstance(method, dict): method_name = method.get('method_name') method_parameters = method.get('parameters') if hasattr(step.processor_class, method_name): getattr(step.processor_class, method_name)(**method_parameters) if hasattr(step.processor_class, 'process'): # Call process method of the processor if it exists # Get process parameters from step process_parameters = step.get('process_parameters', {}) # Update parameters with current parameters given process_parameters.update(kwargs) # Do actual processing data = step.processor_class.process( data=data, store_processing_chain=store_processing_chain, **process_parameters) return data
def as_matrix(self, label_list=None, filename=None, file_list=None, default_value=0): """Get probabilities as data matrix. If items has index defined, index is used to order columns. If items has filename, filename is used to order columns. Parameters ---------- label_list : list of str List of labels. If none given, labels in the container are used in alphabetical order. Default value None filename : str Filename to filter content. If none given, one given for class constructor is used. Default value None file_list : list of str List of filenames to included in the matrix. Default value None default_value : numerical Default value of the element in the matrix. Used in case there is no data for the element in the container. Returns ------- DataMatrix2DContainer """ data = self.filter(filename=filename, file_list=file_list) if label_list is None: label_list = data.unique_labels indices = data.unique_indices if file_list is None: file_list = data.unique_files if indices: matrix = numpy.ones( (len(label_list), len(indices))) * default_value for index in indices: current_column = data.filter(index=index) for item in current_column: if item.label in label_list: matrix[label_list.index(item.label), index] = item.probability from dcase_util.containers import DataMatrix2DContainer return DataMatrix2DContainer(data=matrix) elif file_list: matrix = numpy.ones( (len(label_list), len(file_list))) * default_value for file_id, filename in enumerate(file_list): current_column = data.filter(filename=filename) for item in current_column: if item.label in label_list: matrix[label_list.index(item.label), file_id] = item.probability from dcase_util.containers import DataMatrix2DContainer return DataMatrix2DContainer(data=matrix)
def sequence(self, data, shift=None, **kwargs): """Convert 2D data matrix into sequence of specified length 2D matrices Parameters ---------- data : DataContainer or numpy.ndarray Data shift : int Sequencing grid shift in frames. If none given, one given for class initializer is used. Value is kept inside data size. Parameter value is stored as new class stored value. Default value None Returns ------- DataMatrix3DContainer """ if shift: self.shift = shift from dcase_util.containers import DataContainer, DataMatrix2DContainer, DataMatrix3DContainer # Make copy of the data to prevent modifications to the original data data = copy.deepcopy(data) if isinstance(data, numpy.ndarray): if len(data.shape) == 2: data = DataMatrix2DContainer(data) if isinstance(data, DataContainer): # Make sure shift index is withing data self.shift = self.shift % data.length # Not the most efficient way as numpy stride_tricks would produce # faster code, however, opted for cleaner presentation this time. processed_data = [] if self.shift_border == 'shift': segment_indexes = numpy.arange(self.shift, data.length, self.hop_length) elif self.shift_border == 'roll': segment_indexes = numpy.arange(0, data.length, self.hop_length) if self.shift != 0: # Roll data data.data = numpy.roll(data.data, shift=-self.shift, axis=data.time_axis) else: message = '{name}: Unknown type for sequence border handling when doing temporal shifting ' \ '[{shift_border}].'.format( name=self.__class__.__name__, shift_border=self.shift_border ) self.logger.exception(message) raise ValueError(message) if self.padding: if len(segment_indexes) == 0: # Have at least one segment segment_indexes = numpy.array([0]) else: # Remove segments which are not full segment_indexes = segment_indexes[( segment_indexes + self.sequence_length - 1) < data.length] for segment_start_frame in segment_indexes: segment_end_frame = segment_start_frame + self.sequence_length frame_ids = numpy.array( range(segment_start_frame, segment_end_frame)) valid_frames = numpy.where( numpy.logical_and(frame_ids >= 0, frame_ids < data.length))[0] if len(valid_frames) / float( self.sequence_length ) > self.required_data_amount_per_segment: # Process segment only if it has minimum about of valid frames if self.padding == 'repeat': # Handle boundaries with repeated boundary vectors # If start of matrix, pad with first frame frame_ids[frame_ids < 0] = 0 # If end of the matrix, pad with last frame frame_ids[frame_ids > data.length - 1] = data.length - 1 # Append the segment processed_data.append( data.get_frames(frame_ids=frame_ids)) elif self.padding == 'zero': # Handle boundaries with zero padding # Initialize current segment with zero content current_segment = numpy.zeros( (data.vector_length, self.sequence_length)) # Copy data into correct position within the segment current_segment[:, valid_frames] = data.get_frames( frame_ids=frame_ids[valid_frames]) # Append the segment processed_data.append(current_segment) else: # Append the segment processed_data.append( data.get_frames(frame_ids=frame_ids)) if len(processed_data) == 0: message = '{name}: Cannot create valid segment, adjust segment length and hop size, or use ' \ 'padding flag. (Data length was {length})'.format( name=self.__class__.__name__, length=data.length ) self.logger.exception(message) raise IOError(message) return DataMatrix3DContainer( data=numpy.moveaxis(numpy.array(processed_data), 0, 2), time_resolution=None, processing_chain=data.processing_chain) else: message = '{name}: Unknown data container type.'.format( name=self.__class__.__name__, ) self.logger.exception(message) raise ValueError(message)
def process(self, data=None, label=None, focus_field=None, length_frames=None, length_seconds=None, store_processing_chain=False, **kwargs): """Encode metadata Parameters ---------- data : MetaDataContainer Meta data to encode. Give data in either through meta data container or directly with label parameter. label : str Class label to be hot focus_field : str Field from the meta data item to be used in encoding. If None, one given as parameter for class constructor is used. length_frames : int Length of encoded segment in frames. If None, one given as parameter for class constructor is used. length_seconds : float > 0.0 Length of encoded segment in seconds. If None, one given as parameter for class constructor is used. store_processing_chain : bool Store processing chain to data container returned Default value False Returns ------- DataMatrix2DContainer """ if data is None and label is None: message = '{name}: Give data or label parameter.'.format( name=self.__class__.__name__) self.logger.exception(message) raise ValueError(message) from dcase_util.containers import MetaDataContainer if data is not None and not isinstance(data, MetaDataContainer): message = '{name}: Wrong input data type, type required [{input_type}].'.format( name=self.__class__.__name__, input_type=self.input_type) self.logger.exception(message) raise ValueError(message) if focus_field is None: focus_field = self.focus_field if data is not None and len(data) > 0 and label is None: label = data[0].get(focus_field) # Do processing self.encoder.encode(label=label, length_frames=length_frames, length_seconds=length_seconds) if store_processing_chain: # Get processing chain item processing_chain_item = self.get_processing_chain_item() if 'process_parameters' not in processing_chain_item: processing_chain_item['process_parameters'] = {} processing_chain_item['process_parameters'][ 'focus_field'] = focus_field processing_chain_item['process_parameters'][ 'length_frames'] = length_frames # Create processing chain to be stored in the container, and push chain item into it if hasattr(data, 'processing_chain'): data.processing_chain.push_processor(**processing_chain_item) processing_chain = data.processing_chain else: processing_chain = ProcessingChain().push_processor( **processing_chain_item) else: processing_chain = None from dcase_util.containers import DataMatrix2DContainer container = DataMatrix2DContainer( data=self.encoder.data, label_list=self.encoder.label_list, time_resolution=self.encoder.time_resolution, processing_chain=processing_chain) return container