def test_save(): # Empty content DictContainer({}).save(filename=os.path.join(tempfile.gettempdir(), 'saved.yaml')) # Content data2 = { 'section1': { 'field1': 1, 'field2': [1, 2, 3, 4] }, 'section2': { 'field1': { 'field1': [1, 2, 3, 4] }, 'field2': [1, 2, 3, 4] } } DictContainer(data2).save(filename=os.path.join(tempfile.gettempdir(), 'saved.yaml')) d = DictContainer().load(filename=os.path.join(tempfile.gettempdir(), 'saved.yaml')) nose.tools.assert_dict_equal(d, data2)
def test_empty(): # Test #1 d = DictContainer({}) nose.tools.eq_(d.empty(), True) # Test #2 d = DictContainer({'sec': 1}) nose.tools.eq_(d.empty(), False)
def test_load(): # YAML tmp = tempfile.NamedTemporaryFile('r+', suffix='.yaml', dir='/tmp', delete=False) try: tmp.write('section:\n') tmp.write(' field1: 1\n') tmp.write(' field2: 2\n') tmp.close() m = DictContainer().load(filename=tmp.name) nose.tools.assert_dict_equal(m, {'section': {'field1': 1, 'field2': 2}}) finally: os.unlink(tmp.name) # Json tmp = tempfile.NamedTemporaryFile('r+', suffix='.json', dir='/tmp', delete=False) try: tmp.write('{"section":{"field1":1,"field2":2}}\n') tmp.close() m = DictContainer().load(filename=tmp.name) nose.tools.assert_dict_equal(m, {'section': {'field1': 1, 'field2': 2}}) finally: os.unlink(tmp.name) # pickle tmp = tempfile.NamedTemporaryFile('rb+', suffix='.pickle', dir='/tmp', delete=False) try: data2 = { 'section': { 'field1': 1, 'field2': 2 } } pickle.dump(data2, tmp, protocol=pickle.HIGHEST_PROTOCOL) tmp.close() m = DictContainer().load(filename=tmp.name) nose.tools.assert_dict_equal(m, {'section': {'field1': 1, 'field2': 2}}) finally: os.unlink(tmp.name) # msgpack tmp = tempfile.NamedTemporaryFile('rb+', suffix='.msgpack', dir='/tmp', delete=False) try: data2 = { 'section': { 'field1': 1, 'field2': 2 } } msgpack.dump(data2, tmp) tmp.close() m = DictContainer().load(filename=tmp.name) nose.tools.assert_dict_equal(m, {'section': {'field1': 1, 'field2': 2}}) finally: os.unlink(tmp.name) # Txt tmp = tempfile.NamedTemporaryFile('r+', suffix='.txt', dir='/tmp', delete=False) try: tmp.write('line1\n') tmp.write('line2\n') tmp.write('line3\n') tmp.close() m = DictContainer().load(filename=tmp.name) nose.tools.assert_dict_equal(m, {0: 'line1\n', 1: 'line2\n', 2: 'line3\n'}) finally: os.unlink(tmp.name)
def model_summary_string(keras_model, mode='keras'): """Model summary in a formatted string, similar to Keras model summary function. Parameters ---------- keras_model : keras model Keras model mode : str Summary mode ['extended', 'keras']. In case 'keras', standard Keras summary is returned. Default value keras Returns ------- str Model summary """ ui = FancyStringifier() output = '' output += ui.line('Model summary') + '\n' if mode == 'extended': layer_name_map = { 'BatchNormalization': 'BatchNorm', } import keras from distutils.version import LooseVersion import keras.backend as keras_backend output += ui.row('Layer type', 'Output', 'Param', 'Name', 'Connected to', 'Activ.', 'Init', widths=[15, 25, 10, 20, 25, 10, 10], indent=4) + '\n' output += ui.row('-', '-', '-', '-', '-', '-', '-') + '\n' for layer in keras_model.layers: connections = [] if LooseVersion(keras.__version__) >= LooseVersion('2.1.3'): for node_index, node in enumerate(layer._inbound_nodes): for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i].name inbound_node_index = node.node_indices[i] inbound_tensor_index = node.tensor_indices[i] connections.append(inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']') else: for node_index, node in enumerate(layer.inbound_nodes): for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i].name inbound_node_index = node.node_indices[i] inbound_tensor_index = node.tensor_indices[i] connections.append(inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']') config = DictContainer(layer.get_config()) layer_name = layer.__class__.__name__ if layer_name in layer_name_map: layer_name = layer_name_map[layer_name] if config.get_path( 'kernel_initializer.class_name') == 'VarianceScaling': init = str( config.get_path('kernel_initializer.config.distribution', '---')) elif config.get_path( 'kernel_initializer.class_name') == 'RandomUniform': init = 'uniform' else: init = '---' output += ui.row( layer_name, str(layer.output_shape), str(layer.count_params()), str(layer.name), str(connections[0]) if len(connections) > 0 else '---', str(config.get('activation', '---')), init) + '\n' trainable_count = int( numpy.sum([ keras_backend.count_params(p) for p in set(keras_model.trainable_weights) ])) non_trainable_count = int( numpy.sum([ keras_backend.count_params(p) for p in set(keras_model.non_trainable_weights) ])) output += ui.line('') + '\n' output += ui.line( 'Parameters', indent=4, ) + '\n' output += ui.data(indent=6, field='Total', value=trainable_count + non_trainable_count) + '\n' output += ui.data(indent=6, field='Trainable', value=trainable_count) + '\n' output += ui.data( indent=6, field='Non-Trainable', value=non_trainable_count) + '\n' else: output_buffer = [] keras_model.summary(print_fn=output_buffer.append) for line in output_buffer: output += ui.line(line, indent=4) + '\n' output += ui.line('') + '\n' output += ui.data( indent=4, field='Input shape', value=keras_model.input_shape) + '\n' output += ui.data( indent=4, field='Output shape', value=keras_model.output_shape) + '\n' return output
def __init__(self, filename=None, content_type=None, local_md5=None, remote_file=None, remote_md5=None, remote_bytes=None, **kwargs): """Constructor Parameters ---------- filename : str Local filename. content_type : str or list of str Content type, valid labels ['code', 'documentation', 'meta', 'audio', 'features']. local_md5 : str Checksum of local file (MD5). remote_file : str URL to remote filename. remote_md5 : str Checksum of remote file (MD5). remote_bytes : int Remote file size in bytes """ self.socket_timeout = 120 # Local self.filename = filename self.content_type = content_type self._local_md5 = local_md5 self._local_bytes = None self._local_modified = None # Remote self._remote_file = None self.remote_file = remote_file self.remote_md5 = remote_md5 self._remote_bytes = remote_bytes self._remote_status = None self._remote_modified = None # Run DictContainer init DictContainer.__init__(self, **kwargs) # Check remote url if self.remote_file is not None and validators.url(self.remote_file) is not True: message = '{name}: Remote file URL not valid [{url}]'.format( name=self.__class__.__name__, url=self.remote_file, ) self.logger.exception(message) raise ValueError(message) # Check local filename if self.filename is None: message = '{name}: Local file not set.'.format( name=self.__class__.__name__) self.logger.exception(message) raise ValueError(message) # Check content types if self.content_type is not None: # Validate content type if isinstance(self.content_type, str): self.content_type = [self.content_type] if isinstance(self.content_type, list): for content_type in self.content_type: if content_type not in self.valid_content_types: message = '{name}: Invalid content type given for file [{filename}], type [{content_type}]'.format( name=self.__class__.__name__, content_type=content_type, filename=self.remote_file ) self.logger.exception(message) raise ValueError(message)
def test_wrong_path2(): with dcase_util.utils.DisableLogger(): DictContainer(data).set_path(path=9, new_value=1)
def test_load_wrong_type2(): with dcase_util.utils.DisableLogger(): DictContainer().load(filename=os.path.join(tempfile.gettempdir(), 'wrong.abc'))
class DCASE2018_Task4_DevelopmentSet(AudioTaggingDataset): """DCASE 2018 Large-scale weakly labeled semi-supervised sound event detection in domestic environments """ def __init__(self, storage_name='DCASE2018-task4-development', data_path=None, local_path=None, included_content_types=None, **kwargs): """ Constructor Parameters ---------- storage_name : str Name to be used when storing dataset on disk data_path : str Root path where the dataset is stored. If None, os.path.join(tempfile.gettempdir(), 'dcase_util_datasets') is used. local_path : str Direct storage path setup for the dataset. If None, data_path and storage_name are used to create one. """ kwargs['included_content_types'] = included_content_types kwargs['data_path'] = data_path kwargs['storage_name'] = storage_name kwargs['local_path'] = local_path kwargs['dataset_group'] = 'event' kwargs['dataset_meta'] = { 'authors': 'Nicolas Turpault, Romain Serizel, Hamid Eghbal-zadeh, Ankit Parag Shah', 'title': 'Task 4 Large-scale weakly labeled semi-supervised sound event detection in domestic environments', 'url': 'https://github.com/DCASE-REPO/dcase2018_baseline/tree/master/task4/', 'audio_source': 'Field recording', 'audio_type': 'Natural', 'recording_device_model': None, 'microphone_model': None, 'licence': 'MIT' } kwargs['crossvalidation_folds'] = 2 kwargs['default_audio_extension'] = 'wav' kwargs['package_list'] = [ { 'content_type': 'meta', 'filename': 'dataset/metadata/train/weak.csv' }, { 'content_type': 'meta', 'filename': 'dataset/metadata/train/unlabel_in_domain.csv' }, { 'content_type': 'meta', 'filename': 'dataset/metadata/test/test.csv' }, { 'content_type': 'meta', 'filename': 'dataset/metadata/train/unlabel_out_of_domain.csv', }, ] super(DCASE2018_Task4_DevelopmentSet, self).__init__(**kwargs) def extract_packages(self): """Extract the dataset packages Raises ------ IOError Local package was not found. Returns ------- self """ # Make sure evaluation_setup directory exists Path().makedirs( path=os.path.join(self.local_path, self.evaluation_setup_folder)) log = FancyLogger() item_access_log_filename = os.path.join(self.local_path, 'item_access_error.log.csv') if 'audio' in self.included_content_types or self.included_content_types == [ 'all' ]: # mean process audio log.title("Download_data") log.info( "Once database is downloaded, do not forget to check your missing_files" ) non_existing_videos = pandas.DataFrame( columns=["filename", "error"]) log.line("check files exist or download data") # Collect file ids for package in self.package_list: if package.get('content_type') == "meta": base_filepath = os.path.splitext( package.get('filename').split('/')[-1])[0] if 'train' in package.get('filename'): result_audio_directory = os.path.join( self.local_path, 'dataset/audio/train', base_filepath) else: result_audio_directory = os.path.join( self.local_path, 'dataset/audio/test') missing_files = download(package.get('filename'), result_audio_directory, n_jobs=3) if not missing_files.empty: non_existing_videos = non_existing_videos.append( missing_files, ignore_index=True) # Save list of non-accessible videos ListDictContainer(non_existing_videos.to_dict(orient="records"), filename=item_access_log_filename).save( fields=['filename', 'error']) # Evaluation setup filenames train_filename_fold1 = self.evaluation_setup_filename( setup_part='train', fold=1, file_extension='csv') test_filename_fold1 = self.evaluation_setup_filename( setup_part='test', fold=1, file_extension='csv') train_filename_fold2 = self.evaluation_setup_filename( setup_part='train', fold=2, file_extension='csv') test_filename_fold2 = self.evaluation_setup_filename( setup_part='test', fold=2, file_extension='csv') evaluate_filename = self.evaluation_setup_filename( setup_part='evaluate', fold=2, file_extension='csv') # Check that evaluation setup exists evaluation_setup_exists = True if not os.path.isfile(train_filename_fold1) or not os.path.isfile(test_filename_fold1) \ or not os.path.isfile(train_filename_fold2) or not os.path.isfile(test_filename_fold2) \ or not os.path.isfile(evaluate_filename) or not self.meta_container.exists(): evaluation_setup_exists = False if not evaluation_setup_exists: # Evaluation setup was not found, generate one item_access_log_filename = os.path.join( self.local_path, 'item_access_error.log.csv') non_existing_videos = ListDictContainer().load( filename=item_access_log_filename, delimiter=',').get_field_unique('filename') train_meta_weak_fold1 = MetaDataContainer() audio_path = 'dataset/audio/train/weak' for item in MetaDataContainer().load(os.path.join( self.local_path, 'dataset/metadata/train/' 'weak.csv'), fields=["filename", "tags"], csv_header=True): if item.filename not in non_existing_videos: if not item.filename.endswith( self.default_audio_extension): item.filename = os.path.join( audio_path, os.path.splitext(item.filename)[0] + '.' + self.default_audio_extension) else: item.filename = Path(path=item.filename).modify( path_base=audio_path) # Only collect items which exists if audio present if 'audio' in self.included_content_types or 'all' in self.included_content_types: if os.path.isfile( os.path.join(self.local_path, item.filename)): train_meta_weak_fold1.append(item) else: train_meta_weak_fold1.append(item) train_meta_weak_fold1.save(filename=train_filename_fold1, csv_header=True, file_format="CSV") test_meta_unlabel_fold1 = MetaDataContainer() audio_path = 'dataset/audio/train/unlabel_in_domain' for item in MetaDataContainer().load(os.path.join( self.local_path, 'dataset/metadata/train/' 'unlabel_in_domain.csv'), csv_header=True): if item.filename not in non_existing_videos: # If not the right extension, change it if not item.filename.endswith( self.default_audio_extension): item.filename = os.path.join( audio_path, os.path.splitext(item.filename)[0] + '.' + self.default_audio_extension) else: item.filename = Path(path=item.filename).modify( path_base=audio_path) # Only collect items which exists if audio present if 'audio' in self.included_content_types or 'all' in self.included_content_types: if os.path.isfile( os.path.join(self.local_path, item.filename)): test_meta_unlabel_fold1.append(item) else: test_meta_unlabel_fold1.append(item) test_meta_unlabel_fold1.save(filename=test_filename_fold1, csv_header=True, file_format="CSV") # Fold 2 train is all the data used in fold 1 train_meta_weak_fold2 = MetaDataContainer() train_meta_weak_fold2 += MetaDataContainer().load( train_filename_fold1, csv_header=True, file_format="CSV") for item in MetaDataContainer().load(test_filename_fold1, csv_header=True, file_format="CSV"): item.tags = [] train_meta_weak_fold2.append(item) train_meta_weak_fold2.save(filename=train_filename_fold2, csv_header=True) # Evaluate meta is the groundtruth file with test annotations test.csv evaluate_meta = MetaDataContainer() audio_path = 'dataset/audio/test' for item in MetaDataContainer().load(os.path.join( self.local_path, 'dataset/metadata/test/test.csv'), csv_header=True): if item.filename not in non_existing_videos: if not item.filename.endswith( self.default_audio_extension): item.filename = os.path.join( audio_path, os.path.splitext(item.filename)[0] + '.' + self.default_audio_extension) else: item.filename = Path(path=item.filename).modify( path_base=audio_path) # Only collect items which exists if 'audio' in self.included_content_types or 'all' in self.included_content_types: if os.path.isfile( os.path.join(self.local_path, item.filename)): evaluate_meta.append(item) else: evaluate_meta.append(item) evaluate_meta.save(filename=evaluate_filename, csv_header=True, file_format="CSV") # Test meta is filenames of evaluation, labels will be predicted test_meta_strong_fold2 = MetaDataContainer() for filename in evaluate_meta.unique_files: test_meta_strong_fold2.append( MetaDataItem({'filename': filename})) test_meta_strong_fold2.save(filename=test_filename_fold2, csv_header=True, file_format="CSV") # meta_data is the default meta container containing all files of the dataset meta_data = MetaDataContainer() meta_data += MetaDataContainer().load(train_filename_fold1, csv_header=True, file_format="CSV") meta_data += MetaDataContainer().load(test_filename_fold1, csv_header=True, file_format="CSV") meta_data += MetaDataContainer().load(test_filename_fold2, csv_header=True, file_format="CSV") # Save meta meta_data.save(filename=self.meta_file) log.foot() return self def load_crossvalidation_data(self): """Load cross-validation into the container. Returns ------- self """ # Reset cross validation data and insert 'all_data' self.crossvalidation_data = DictContainer({ 'train': { 'all_data': self.meta_container }, 'test': { 'all_data': self.meta_container }, 'evaluate': { 'all_data': self.meta_container }, }) for crossvalidation_set in list(self.crossvalidation_data.keys()): for item in self.crossvalidation_data[crossvalidation_set][ 'all_data']: self.process_meta_item(item=item) # Load cross validation folds for fold in self.folds(): # Initialize data self.crossvalidation_data['train'][fold] = MetaDataContainer() self.crossvalidation_data['test'][fold] = MetaDataContainer() self.crossvalidation_data['evaluate'][fold] = MetaDataContainer() # Get filenames train_filename = self.evaluation_setup_filename( setup_part='train', fold=fold, file_extension="csv") test_filename = self.evaluation_setup_filename( setup_part='test', fold=fold, file_extension="csv") evaluate_filename = self.evaluation_setup_filename( setup_part='evaluate', fold=fold, file_extension="csv") if os.path.isfile(train_filename): # Training data for fold exists, load and process it self.crossvalidation_data['train'][fold] += MetaDataContainer( filename=train_filename).load() if os.path.isfile(test_filename): # Testing data for fold exists, load and process it self.crossvalidation_data['test'][fold] += MetaDataContainer( filename=test_filename).load() if os.path.isfile(evaluate_filename): # Evaluation data for fold exists, load and process it self.crossvalidation_data['evaluate'][ fold] += MetaDataContainer( filename=evaluate_filename).load() # Process items for item in self.crossvalidation_data['train'][fold]: self.process_meta_item(item=item) for item in self.crossvalidation_data['test'][fold]: self.process_meta_item(item=item) for item in self.crossvalidation_data['evaluate'][fold]: self.process_meta_item(item=item) return self
def load_crossvalidation_data(self): """Load cross-validation into the container. Returns ------- self """ # Reset cross validation data and insert 'all_data' self.crossvalidation_data = DictContainer({ 'train': { 'all_data': self.meta_container }, 'test': { 'all_data': self.meta_container }, 'evaluate': { 'all_data': self.meta_container }, }) for crossvalidation_set in list(self.crossvalidation_data.keys()): for item in self.crossvalidation_data[crossvalidation_set][ 'all_data']: self.process_meta_item(item=item) # Load cross validation folds for fold in self.folds(): # Initialize data self.crossvalidation_data['train'][fold] = MetaDataContainer() self.crossvalidation_data['test'][fold] = MetaDataContainer() self.crossvalidation_data['evaluate'][fold] = MetaDataContainer() # Get filenames train_filename = self.evaluation_setup_filename( setup_part='train', fold=fold, file_extension="csv") test_filename = self.evaluation_setup_filename( setup_part='test', fold=fold, file_extension="csv") evaluate_filename = self.evaluation_setup_filename( setup_part='evaluate', fold=fold, file_extension="csv") if os.path.isfile(train_filename): # Training data for fold exists, load and process it self.crossvalidation_data['train'][fold] += MetaDataContainer( filename=train_filename).load() if os.path.isfile(test_filename): # Testing data for fold exists, load and process it self.crossvalidation_data['test'][fold] += MetaDataContainer( filename=test_filename).load() if os.path.isfile(evaluate_filename): # Evaluation data for fold exists, load and process it self.crossvalidation_data['evaluate'][ fold] += MetaDataContainer( filename=evaluate_filename).load() # Process items for item in self.crossvalidation_data['train'][fold]: self.process_meta_item(item=item) for item in self.crossvalidation_data['test'][fold]: self.process_meta_item(item=item) for item in self.crossvalidation_data['evaluate'][fold]: self.process_meta_item(item=item) return self
def system_meta(self, results, task=None, check_development_dataset=True, check_evaluation_dataset=True): """Check system result scores given in the meta data Parameters ---------- results : dict Result meta data task : str, optional Temporal override for the task parameter given to class constructor. check_development_dataset : bool Check development dataset results check_evaluation_dataset : bool Check evaluation dataset results Returns ------- self """ if task is None: task = self.task if results is None: self.error_log.append(u'No results section') else: results = DictContainer(results) if check_development_dataset: # Check development dataset results if results.get('development_dataset') is None: self.error_log.append(u'No development results given') else: if task == 'ASC': if results.get_path( 'development_dataset.overall.accuracy' ) is None: self.error_log.append( u'No development overall result given ') elif task == 'SED_event': if results.get_path( 'development_dataset.event_based.overall.er' ) is None: self.error_log.append( u'No development overall result given [event_based.overall.er]' ) if results.get_path( 'development_dataset.event_based.overall.f1' ) is None: self.error_log.append( u'No development overall result given [event_based.overall.f1]' ) elif task == 'SED_segment': if results.get_path( 'development_dataset.segment_based.overall.er' ) is None: self.error_log.append( u'No development overall result given [segment_based.overall.er]' ) if results.get_path( 'development_dataset.segment_based.overall.f1' ) is None: self.error_log.append( u'No development overall result given [segment_based.overall.f1]' ) elif task == 'task4': pass # Check development dataset / class wise results if task == 'ASC': if results.get_path( 'development_dataset.class_wise') is None: self.error_log.append( u'No class_wise development results given') else: if len( results.get_path( 'development_dataset.class_wise') ) != len(self.class_labels): self.error_log.append( u'Incorrect number class-wise development results given [{class_wise:d}/{target:d}]' .format(class_wise=len( results.get_path( 'development_dataset.class_wise')), target=len(self.class_labels))) for class_label, class_data in iteritems( results.get_path( 'development_dataset.class_wise')): if 'accuracy' not in class_data or class_data[ 'accuracy'] is None: self.error_log.append( u'Incorrect class-wise development results given for [{class_label:s}]' .format(class_label=class_label)) elif task == 'SED_event': if results.get_path( 'development_dataset.event_based.class_wise' ) is not None: if len( results.get_path( 'development_dataset.event_based.class_wise' )) != len(self.class_labels): self.error_log.append( u'Incorrect number class-wise development results given [{class_wise:d}/{target:d}]' .format(class_wise=len( results.get_path( 'development_dataset.event_based.class_wise' )), target=len(self.class_labels))) for class_label, class_data in iteritems( results.get_path( 'development_dataset.event_based.class_wise' )): if class_data.get('er') is None: self.error_log.append( u'Incorrect class-wise development results given for [{class_label:s} / er]' .format(class_label=class_label)) if class_data.get('f1') is None: self.error_log.append( u'Incorrect class-wise development results given for [{class_label:s} / f1]' .format(class_label=class_label)) else: self.error_log.append( u'No class_wise development results given') elif task == 'SED_segment': if results.get_path( 'development_dataset.segment_based.class_wise' ) is not None: if len( results.get_path( 'development_dataset.segment_based.class_wise' )) != len(self.class_labels): self.error_log.append( u'Incorrect number class-wise development results given [{class_wise:d}/{target:d}]' .format(class_wise=len( results.get_path( 'development_dataset.segment_based.class_wise' )), target=len(self.class_labels))) for class_label, class_data in iteritems( results.get_path( 'development_dataset.segment_based.class_wise' )): if class_data.get('er') is None: self.error_log.append( u'Incorrect class-wise development results given for [{class_label:s} / er]' .format(class_label=class_label)) if class_data.get('f1') is None: self.error_log.append( u'Incorrect class-wise development results given for [{class_label:s} / f1]' .format(class_label=class_label)) else: self.error_log.append( u'No class_wise development results given') elif task == 'task4': pass if check_evaluation_dataset: # Check evaluation dataset results if 'evaluation_dataset' not in results: self.error_log.append(u'No evaluation results given') else: if task == 'ASC': if results.get_path( 'evaluation_dataset.overall') is None: self.error_log.append( u'No evaluation results given') if results.get_path( 'evaluation_dataset.class_wise') is not None: if len( results.get_path( 'evaluation_dataset.class_wise') ) != len(self.class_labels): self.error_log.append( u'Incorrect number class-wise evaluation results given [{class_wise:d}/{target:d}]' .format(class_wise=len( results.get_path( 'evaluation_dataset.class_wise')), target=len(self.class_labels))) for class_label, class_data in iteritems( results.get_path( 'evaluation_dataset.class_wise')): if class_data.get('accuracy') is None: self.error_log.append( u'Incorrect class-wise evaluation results given for [{class_label:s}]' .format(class_label=class_label)) else: self.error_log.append( u'No class_wise development results given') elif task == 'SED_event': if results.get_path( 'evaluation_dataset.event_based.overall.er' ) is None: self.error_log.append( u'No evaluation results given [event_based.overall.er]' ) if results.get_path( 'evaluation_dataset.event_based.overall.f1' ) is None: self.error_log.append( u'No evaluation results given [event_based.overall.f1]' ) if results.get_path( 'evaluation_dataset.event_based.class_wise' ) is not None: if len( results.get_path( 'evaluation_dataset.event_based.class_wise' )) != len(self.class_labels): self.error_log.append( u'Incorrect number class-wise evaluation results given [{class_wise:d}/{target:d}]' .format(class_wise=len( results.get_path( 'evaluation_dataset.event_based.class_wise' )), target=len(self.class_labels))) for class_label, class_data in iteritems( results.get_path( 'evaluation_dataset.event_based.class_wise' )): if class_data.get('er') is None: self.error_log.append( u'Incorrect class-wise evaluation results given for [{class_label:s} / er]' .format(class_label=class_label)) if class_data.get('f1') is None: self.error_log.append( u'Incorrect class-wise evaluation results given for [{class_label:s} / f1]' .format(class_label=class_label)) else: self.error_log.append( u'No class_wise evaluation results given') elif task == 'SED_segment': if results.get_path( 'evaluation_dataset.segment_based.overall.er' ) is None: self.error_log.append( u'No evaluation results given [segment_based.overall.er]' ) if results.get_path( 'evaluation_dataset.segment_based.overall.f1' ) is None: self.error_log.append( u'No evaluation results given [segment_based.overall.f1]' ) if results.get_path( 'evaluation_dataset.segment_based.class_wise' ) is not None: if len( results.get_path( 'evaluation_dataset.segment_based.class_wise' )) != len(self.class_labels): self.error_log.append( u'Incorrect number class-wise evaluation results given [{class_wise:d}/{target:d}]' .format(class_wise=len( results.get_path( 'evaluation_dataset.segment_based.class_wise' )), target=len(self.class_labels))) for class_label, class_data in iteritems( results.get_path( 'evaluation_dataset.segment_based.class_wise' )): if class_data.get('er') is None: self.error_log.append( u'Incorrect class-wise evaluation results given for [{class_label:s} / er]' .format(class_label=class_label)) if class_data.get('f1') is None: self.error_log.append( u'Incorrect class-wise evaluation results given for [{class_label:s} / f1]' .format(class_label=class_label)) else: self.error_log.append( u'No class_wise evaluation results given') elif task == 'task4': pass return self
def setup_logging(parameters=None, coloredlogs=False, logging_file=None, default_setup_file='logging.yaml', default_level=logging.INFO, environmental_variable='LOG_CFG'): """Setup logging configuration Parameters ---------- parameters : dict Parameters in dict Default value None coloredlogs : bool Use coloredlogs Default value False logging_file : str Log filename for file based logging, if none given no file logging is used. Default value None environmental_variable : str Environmental variable to get the logging setup filename, if set will override default_setup_file Default value 'LOG_CFG' default_setup_file : str Default logging parameter file, used if one is not set in given ParameterContainer Default value 'logging.yaml' default_level : logging.level Default logging level, used if one is not set in given ParameterContainer Default value 'logging.INFO' Returns ------- nothing """ class LoggerFilter(object): def __init__(self, level): self.__level = level def filter(self, log_record): return log_record.levelno <= self.__level formatters = { 'simple': "[%(levelname).1s] %(message)s", 'normal': "%(asctime)s\t[%(name)-20s]\t[%(levelname)-8s]\t%(message)s", 'extended': "[%(asctime)s] [%(name)s]\t [%(levelname)-8s]\t %(message)s \t(%(filename)s:%(lineno)s)", 'extended2': "[%(levelname).1s] %(message)s \t(%(filename)s:%(lineno)s)", 'file_extended': "[%(levelname).1s] [%(asctime)s] %(message)s", } if not parameters: logging_parameter_file = default_setup_file value = os.getenv(environmental_variable, None) if value: # If environmental variable set logging_parameter_file = value if os.path.exists(logging_parameter_file): with open(logging_parameter_file, 'rt') as f: config = yaml.safe_load(f.read()) logging.config.dictConfig(config) try: # Check if coloredlogs is available import coloredlogs coloredlogs.install( level=config['handlers']['console']['level'], fmt=config['formatters'][config['handlers']['console']['formatter']]['format'] ) except ImportError: pass else: if coloredlogs: try: # Check if coloredlogs is available import coloredlogs coloredlogs.install( level=logging.INFO, fmt=formatters['simple'], reconfigure=True ) except ImportError: logger = logging.getLogger() logger.setLevel(default_level) console_info = logging.StreamHandler() console_info.setLevel(logging.INFO) console_info.setFormatter(logging.Formatter(formatters['simple'])) console_info.addFilter(LoggerFilter(logging.INFO)) logger.addHandler(console_info) console_debug = logging.StreamHandler() console_debug.setLevel(logging.DEBUG) console_debug.setFormatter(logging.Formatter(formatters['simple'])) console_debug.addFilter(LoggerFilter(logging.DEBUG)) logger.addHandler(console_debug) console_warning = logging.StreamHandler() console_warning.setLevel(logging.WARNING) console_warning.setFormatter(logging.Formatter(formatters['simple'])) console_warning.addFilter(LoggerFilter(logging.WARNING)) logger.addHandler(console_warning) console_critical = logging.StreamHandler() console_critical.setLevel(logging.CRITICAL) console_critical.setFormatter(logging.Formatter(formatters['extended2'])) console_critical.addFilter(LoggerFilter(logging.CRITICAL)) logger.addHandler(console_critical) console_error = logging.StreamHandler() console_error.setLevel(logging.ERROR) console_error.setFormatter(logging.Formatter(formatters['extended2'])) console_error.addFilter(LoggerFilter(logging.ERROR)) logger.addHandler(console_error) if logging_file: file_info = logging.handlers.RotatingFileHandler( filename=logging_file, maxBytes=10485760, backupCount=20, encoding='utf8' ) file_info.setLevel(logging.INFO) file_info.setFormatter(logging.Formatter(formatters['file_extended'])) logger.addHandler(file_info) else: logger = logging.getLogger() logger.setLevel(default_level) console_info = logging.StreamHandler() console_info.setLevel(logging.INFO) console_info.setFormatter(logging.Formatter(formatters['simple'])) console_info.addFilter(LoggerFilter(logging.INFO)) logger.addHandler(console_info) console_debug = logging.StreamHandler() console_debug.setLevel(logging.DEBUG) console_debug.setFormatter(logging.Formatter(formatters['simple'])) console_debug.addFilter(LoggerFilter(logging.DEBUG)) logger.addHandler(console_debug) console_warning = logging.StreamHandler() console_warning.setLevel(logging.WARNING) console_warning.setFormatter(logging.Formatter(formatters['simple'])) console_warning.addFilter(LoggerFilter(logging.WARNING)) logger.addHandler(console_warning) console_critical = logging.StreamHandler() console_critical.setLevel(logging.CRITICAL) console_critical.setFormatter(logging.Formatter(formatters['extended2'])) console_critical.addFilter(LoggerFilter(logging.CRITICAL)) logger.addHandler(console_critical) console_error = logging.StreamHandler() console_error.setLevel(logging.ERROR) console_error.setFormatter(logging.Formatter(formatters['extended2'])) console_error.addFilter(LoggerFilter(logging.ERROR)) logger.addHandler(console_error) if logging_file: file_info = logging.handlers.RotatingFileHandler( filename=logging_file, maxBytes=10485760, backupCount=20, encoding='utf8' ) file_info.setLevel(logging.INFO) file_info.setFormatter(logging.Formatter(formatters['file_extended'])) logger.addHandler(file_info) else: from dcase_util.containers import DictContainer parameters = DictContainer(parameters) logging.config.dictConfig(parameters.get('parameters')) if (parameters.get('colored', False) and 'console' in parameters.get_path('parameters.handlers')): try: # Check if coloredlogs is available import coloredlogs coloredlogs.install( level=parameters.get_path('parameters.handlers.console.level'), fmt=parameters.get_path('parameters.formatters')[ parameters.get_path('parameters.handlers.console.formatter') ].get('format') ) except ImportError: pass # Function to handle uncaught expections def handle_exception(exc_type, exc_value, exc_traceback): if issubclass(exc_type, KeyboardInterrupt): sys.__excepthook__(exc_type, exc_value, exc_traceback) return logger.error('Uncaught exception', exc_info=(exc_type, exc_value, exc_traceback)) sys.excepthook = handle_exception
def submission_authors(self, authors, check_email=True, check_affiliation=True, check_affiliation_abbreviation=True, check_affiliation_department=True): """Check submission authors Parameters ---------- authors : list of dict List of authors dicts. check_email : bool Check that author email exists. check_affiliation : bool Check author affiliation. check_affiliation_abbreviation : bool Check that affiliation abbreviation exists. check_affiliation_department : bool Check that affiliation has department defined. Returns ------- self """ if not isinstance(authors, list): self.error_log.append( u'Authors not given in list format for the submission') for author in authors: author = DictContainer(author) if author.get('lastname') is None: self.error_log.append( u'No lastname given for author ({last_name:s}, {first_name:s})' .format(last_name=author['lastname'], first_name=author['firstname'])) if author.get('firstname') is None: self.error_log.append( u'No firstname given for author ({last_name:s}, {first_name:s})' .format(last_name=author['lastname'], first_name=author['firstname'])) if check_email: if author.get('email') is None: self.error_log.append( u'No email given for author ({last_name:s}, {first_name:s})' .format(last_name=author['lastname'], first_name=author['firstname'])) if check_affiliation: if author.get('affiliation') is None: self.error_log.append( u'No affiliation given for author ({last_name:s}, {first_name:s})' .format(last_name=author['lastname'], first_name=author['firstname'])) else: if isinstance(author.get('affiliation'), list): for a in author.get('affiliation'): affiliation = ', '.join( filter(None, list(a.values()))) if check_affiliation_abbreviation: if a.get('abbreviation') is None: self.error_log.append( u'No abbreviation given ({last_name:s}, {first_name:s}, {affiliation:s})' .format(last_name=author['lastname'], first_name=author['firstname'], affiliation=affiliation)) if check_affiliation_department: if a.get('department') is None: self.error_log.append( u'No department given ({last_name:s}, {first_name:s}, {affiliation:s})' .format(last_name=author['lastname'], first_name=author['firstname'], affiliation=affiliation)) if a.get('institute') is None: self.error_log.append( u'No institute given ({last_name:s}, {first_name:s}, {affiliation:s})' .format(last_name=author['lastname'], first_name=author['firstname'], affiliation=affiliation)) if a.get('location') is None: self.error_log.append( u'No location given ({last_name:s}, {first_name:s}, {affiliation:s})' .format(last_name=author['lastname'], first_name=author['firstname'], affiliation=affiliation)) else: affiliation = ', '.join( filter(None, list(author['affiliation'].values()))) if check_affiliation_abbreviation: if author.get_path( 'affiliation.abbreviation') is None: self.error_log.append( u'No abbreviation given ({last_name:s}, {first_name:s}, {affiliation:s})' .format(last_name=author['lastname'], first_name=author['firstname'], affiliation=affiliation)) if check_affiliation_department: if author.get_path( 'affiliation.department') is None: self.error_log.append( u'No department given ({last_name:s}, {first_name:s}, {affiliation:s})' .format(last_name=author['lastname'], first_name=author['firstname'], affiliation=affiliation)) if author.get_path('affiliation.institute') is None: self.error_log.append( u'No institute given ({last_name:s}, {first_name:s}, {affiliation:s})' .format(last_name=author['lastname'], first_name=author['firstname'], affiliation=affiliation)) if author.get_path('affiliation.location') is None: self.error_log.append( u'No location given ({last_name:s}, {first_name:s})' .format(last_name=author['lastname'], first_name=author['firstname'], affiliation=affiliation)) return self
def model_summary_string(keras_model, mode='keras', show_parameters=True, display=False): """Model summary in a formatted string, similar to Keras model summary function. Parameters ---------- keras_model : keras model Keras model mode : str Summary mode ['extended', 'keras']. In case 'keras', standard Keras summary is returned. Default value keras show_parameters : bool Show model parameter count and input / output shapes Default value True display : bool Display summary immediately, otherwise return string Default value False Returns ------- str Model summary """ if is_jupyter(): ui = FancyHTMLStringifier() html_mode = True else: ui = FancyStringifier() html_mode = False output = '' output += ui.line('Model summary') + '\n' if mode == 'extended' or mode == 'extended_wide': layer_name_map = { 'BatchNormalization': 'BatchNorm', } layer_type_html_tags = { 'InputLayer': '<span class="label label-default">{0:s}</span>', 'Dense': '<span class="label label-primary">{0:s}</span>', 'TimeDistributed': '<span class="label label-primary">{0:s}</span>', 'BatchNorm': '<span class="label label-default">{0:s}</span>', 'Activation': '<span class="label label-default">{0:s}</span>', 'Dropout': '<span class="label label-default">{0:s}</span>', 'Flatten': '<span class="label label-success">{0:s}</span>', 'Reshape': '<span class="label label-success">{0:s}</span>', 'Permute': '<span class="label label-success">{0:s}</span>', 'Conv1D': '<span class="label label-warning">{0:s}</span>', 'Conv2D': '<span class="label label-warning">{0:s}</span>', 'MaxPooling1D': '<span class="label label-success">{0:s}</span>', 'MaxPooling2D': '<span class="label label-success">{0:s}</span>', 'MaxPooling3D': '<span class="label label-success">{0:s}</span>', 'AveragePooling1D': '<span class="label label-success">{0:s}</span>', 'AveragePooling2D': '<span class="label label-success">{0:s}</span>', 'AveragePooling3D': '<span class="label label-success">{0:s}</span>', 'GlobalMaxPooling1D': '<span class="label label-success">{0:s}</span>', 'GlobalMaxPooling2D': '<span class="label label-success">{0:s}</span>', 'GlobalMaxPooling3D': '<span class="label label-success">{0:s}</span>', 'GlobalAveragePooling1D': '<span class="label label-success">{0:s}</span>', 'GlobalAveragePooling2D': '<span class="label label-success">{0:s}</span>', 'GlobalAveragePooling3D': '<span class="label label-success">{0:s}</span>', 'RNN': '<span class="label label-danger">{0:s}</span>', 'SimpleRNN': '<span class="label label-danger">{0:s}</span>', 'GRU': '<span class="label label-danger">{0:s}</span>', 'CuDNNGRU': '<span class="label label-danger">{0:s}</span>', 'LSTM': '<span class="label label-danger">{0:s}</span>', 'CuDNNLSTM': '<span class="label label-danger">{0:s}</span>', 'Bidirectional': '<span class="label label-danger">{0:s}</span>' } from tensorflow import keras from distutils.version import LooseVersion import tensorflow.keras.backend as keras_backend table_data = { 'layer_type': [], 'output': [], 'parameter_count': [], 'name': [], 'connected_to': [], 'activation': [], 'initialization': [] } row_separators = [] prev_name = None for layer_id, layer in enumerate(keras_model.layers): connections = [] if LooseVersion(keras.__version__) >= LooseVersion('2.1.3'): for node_index, node in enumerate(layer._inbound_nodes): for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i].name inbound_node_index = node.node_indices[i] inbound_tensor_index = node.tensor_indices[i] connections.append( inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']' ) else: for node_index, node in enumerate(layer.inbound_nodes): for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i].name inbound_node_index = node.node_indices[i] inbound_tensor_index = node.tensor_indices[i] connections.append( inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']' ) config = DictContainer(layer.get_config()) layer_name = layer.__class__.__name__ if layer_name in layer_name_map: layer_name = layer_name_map[layer_name] if html_mode and layer_name in layer_type_html_tags: layer_name = layer_type_html_tags[layer_name].format(layer_name) if config.get_path('kernel_initializer.class_name') == 'VarianceScaling': init = str(config.get_path('kernel_initializer.config.distribution', '---')) elif config.get_path('kernel_initializer.class_name') == 'RandomUniform': init = 'uniform' else: init = '-' name_parts = layer.name.split('_') if prev_name != name_parts[0]: row_separators.append(layer_id) prev_name = name_parts[0] table_data['layer_type'].append(layer_name) table_data['output'].append(str(layer.output_shape)) table_data['parameter_count'].append(str(layer.count_params())) table_data['name'].append(layer.name) table_data['connected_to'].append(str(connections[0]) if len(connections) > 0 else '-') table_data['activation'].append(str(config.get('activation', '-'))) table_data['initialization'].append(init) trainable_count = int( numpy.sum([keras_backend.count_params(p) for p in set(keras_model.trainable_weights)]) ) non_trainable_count = int( numpy.sum([keras_backend.count_params(p) for p in set(keras_model.non_trainable_weights)]) ) # Show row separators only if they are useful if len(row_separators) == len(keras_model.layers): row_separators = None if mode == 'extended': output += ui.table( cell_data=[table_data['name'], table_data['layer_type'], table_data['output'], table_data['parameter_count']], column_headers=['Layer name', 'Layer type', 'Output shape', 'Parameters'], column_types=['str30', 'str20', 'str25', 'str20'], column_separators=[1, 2], row_separators=row_separators, indent=4 ) elif mode == 'extended_wide': output += ui.table( cell_data=[table_data['name'], table_data['layer_type'], table_data['output'], table_data['parameter_count'], table_data['activation'], table_data['initialization']], column_headers=['Layer name', 'Layer type', 'Output shape', 'Parameters', 'Act.', 'Init.'], column_types=['str30', 'str20', 'str25', 'str20', 'str15', 'str15'], column_separators=[1, 2, 3], row_separators=row_separators, indent=4 ) if show_parameters: output += ui.line('') + '\n' output += ui.line('Parameters', indent=4) + '\n' output += ui.data(indent=6, field='Total', value=trainable_count + non_trainable_count) + '\n' output += ui.data(indent=6, field='Trainable', value=trainable_count) + '\n' output += ui.data(indent=6, field='Non-Trainable', value=non_trainable_count) + '\n' else: output_buffer = [] keras_model.summary(print_fn=output_buffer.append) for line in output_buffer: if is_jupyter(): output += ui.line('<code>'+line+'</code>', indent=4) + '\n' else: output += ui.line(line, indent=4) + '\n' model_config = keras_model.get_config() if show_parameters: output += ui.line('') + '\n' output += ui.line('Input', indent=4) + '\n' output += ui.data(indent=6, field='Shape', value=keras_model.input_shape) + '\n' output += ui.line('Output', indent=4) + '\n' output += ui.data(indent=6, field='Shape', value=keras_model.output_shape) + '\n' if isinstance(model_config, dict) and 'layers' in model_config: output += ui.data( indent=6, field='Activation', value=model_config['layers'][-1]['config'].get('activation') ) + '\n' elif isinstance(model_config, list): output += ui.data( indent=6, field='Activation', value=model_config[-1].get('config', {}).get('activation') ) + '\n' if display: if is_jupyter(): from IPython.core.display import display, HTML display(HTML(output)) else: print(output) else: return output
def create_sequential_model(model_parameter_list, input_shape=None, output_shape=None, constants=None, return_functional=False): """Create sequential Keras model Example parameters:: model_parameter_list = [ { 'class_name': 'Dense', 'config': { 'units': 'CONSTANT_B', 'kernel_initializer': 'uniform', 'activation': 'relu' } }, { 'class_name': 'Dropout', 'config': { 'rate': 0.2 } }, { 'class_name': 'Dense', 'config': { 'units': 'CONSTANT_A' * 2, 'kernel_initializer': 'uniform', 'activation': 'relu' } }, { 'class_name': 'Dropout', 'config': { 'rate': 0.2 } }, { 'class_name': 'Dense', 'config': { 'units': 'CLASS_COUNT', 'kernel_initializer': 'uniform', 'activation': 'softmax' } } ] constants = { 'CONSTANT_A': 50, 'CONSTANT_B': 100 } Parameters ---------- model_parameter_list : dict or DictContainer Model parameters input_shape : int Size of the input layer Default value None output_shape : int Size of the output layer Default value None constants : dict or DictContainer Constants used in the model_parameter definitions. Default value None return_functional : bool Convert sequential model into function model. Default value False Returns ------- Keras model """ from tensorflow.keras.models import Sequential keras_model = Sequential() tuple_fields = [ 'input_shape', 'kernel_size', 'pool_size', 'dims', 'target_shape', 'strides' ] # Get constants for model if constants is None: constants = {} if 'INPUT_SHAPE' not in constants and input_shape is not None: constants['INPUT_SHAPE'] = input_shape if 'OUTPUT_SHAPE' not in constants and output_shape is not None: constants['OUTPUT_SHAPE'] = output_shape if 'CLASS_COUNT' not in constants: constants['CLASS_COUNT'] = output_shape if 'FEATURE_VECTOR_LENGTH' not in constants: constants['FEATURE_VECTOR_LENGTH'] = input_shape def logger(): logger_instance = logging.getLogger(__name__) if not logger_instance.handlers: setup_logging() return logger_instance def process_field(value, constants_dict): math_eval = SimpleMathStringEvaluator() if isinstance(value, str): sub_fields = value.split() if len(sub_fields) > 1: # Inject constants to math formula for subfield_id, subfield in enumerate(sub_fields): if subfield in constants_dict: sub_fields[subfield_id] = str(constants_dict[subfield]) value = ''.join(sub_fields) else: # Inject constants if value in constants_dict: value = str(constants_dict[value]) return math_eval.eval(value) elif isinstance(value, list): processed_value_list = [] for item_id, item in enumerate(value): processed_value_list.append( process_field( value=item, constants_dict=constants_dict ) ) return processed_value_list else: return value # Inject constant into constants with equations for field in list(constants.keys()): constants[field] = process_field( value=constants[field], constants_dict=constants ) # Setup layers for layer_id, layer_setup in enumerate(model_parameter_list): # Get layer parameters layer_setup = DictContainer(layer_setup) if 'config' not in layer_setup: layer_setup['config'] = {} # Get layer class try: layer_class = getattr( importlib.import_module('tensorflow.keras.layers'), layer_setup['class_name'] ) except AttributeError: message = 'Invalid Keras layer type [{type}].'.format( type=layer_setup['class_name'] ) logger().exception(message) raise AttributeError(message) # Inject constants for config_field in list(layer_setup['config'].keys()): layer_setup['config'][config_field] = process_field( value=layer_setup['config'][config_field], constants_dict=constants ) # Convert lists into tuples for field in tuple_fields: if field in layer_setup['config'] and isinstance(layer_setup['config'][field], list): layer_setup['config'][field] = tuple(layer_setup['config'][field]) # Inject input shape for Input layer if not given if layer_id == 0 and layer_setup.get_path('config.input_shape') is None and input_shape is not None: # Set input layer dimension for the first layer if not set layer_setup['config']['input_shape'] = (input_shape,) if 'wrapper' in layer_setup: # Get layer wrapper class try: wrapper_class = getattr( importlib.import_module("tensorflow.keras.layers"), layer_setup['wrapper'] ) except AttributeError: message = 'Invalid Keras layer wrapper type [{type}].'.format( type=layer_setup['wrapper'] ) logger().exception(message) raise AttributeError(message) wrapper_parameters = layer_setup.get('config_wrapper', {}) if layer_setup.get('config'): keras_model.add( wrapper_class(layer_class(**dict(layer_setup.get('config'))), **dict(wrapper_parameters))) else: keras_model.add(wrapper_class(layer_class(), **dict(wrapper_parameters))) else: if layer_setup.get('config'): keras_model.add(layer_class(**dict(layer_setup.get('config')))) else: keras_model.add(layer_class()) if return_functional: from tensorflow.keras.layers import Input from tensorflow.keras.models import Model input_layer = Input(batch_shape=keras_model.layers[0].input_shape) prev_layer = input_layer for layer in keras_model.layers: prev_layer = layer(prev_layer) keras_model = Model( inputs=[input_layer], outputs=[prev_layer] ) return keras_model
def test_log(): with dcase_util.utils.DisableLogger(): DictContainer(filename='test.yaml').log()
def get_audio_info(filename, logger=None): """Get information about audio file without opening it. Parameters ---------- filename : str filename logger : Logger class Logger class Default value None Returns ------- DictContainer Dict with audio file information """ from dcase_util.utils.files import FileFormat from dcase_util.files import File from dcase_util.containers import DictContainer if logger is None: logger = logging.getLogger(__name__) file = File(filename=filename, valid_formats=[ FileFormat.WAV, FileFormat.FLAC, FileFormat.OGG, FileFormat.MP3, FileFormat.M4A, FileFormat.MP4, FileFormat.WEBM ]) if not file.exists(): # File does not exists message = '{name}: File does not exists [{filename}] '.format( name=__name__, filename=filename) logger.exception(message) raise IOError(message) file.detect_file_format() if file.format is None: # Unknown format message = '{name}: File format cannot be detected for file [{filename}] '.format( name=__name__, filename=filename) logger.exception(message) raise IOError(message) info = DictContainer({ 'filename': file.filename, 'bytes': file.bytes, 'format': file.format }) if file.format == FileFormat.WAV: import soundfile wav_info = soundfile.info(file=file.filename) info['fs'] = wav_info.samplerate info['channels'] = wav_info.channels info['duration_sec'] = wav_info.duration info['duration_ms'] = (wav_info.frames / float(wav_info.samplerate)) * 1000 info['duration_samples'] = wav_info.frames info['subtype'] = { 'name': wav_info.subtype, 'info': wav_info.subtype_info } # Map sub type to bit depth if info['subtype'] == 'PCM_16': info['bit_depth'] = 16 elif info['subtype'] == 'PCM_24': info['bit_depth'] = 24 elif info['subtype'] == 'PCM_32': info['bit_depth'] = 32 elif file.format in [ FileFormat.FLAC, FileFormat.OGG, FileFormat.MP3, FileFormat.M4A, FileFormat.MP4, FileFormat.WEBM ]: # Use ffprobe to get file info from other formats import subprocess import json import shlex cmd = "ffprobe -v quiet -print_format json -show_streams" args = shlex.split(cmd) args.append(file.filename) # Run command line command, fetch and parse json output try: output = subprocess.check_output(args).decode('utf-8') except OSError: # Error while running the command message = '{name}: It seems that ffmpeg (ffprobe) is not installed.'.format( name=__name__, filename=filename) logger.exception(message) raise IOError(message) ffmpeg_meta = json.loads(output) for stream in ffmpeg_meta['streams']: if stream['codec_type'] == 'audio': # Fetch audio info from first audio stream info['fs'] = int(stream['sample_rate']) # Get duration if 'duration' not in stream: info['duration_sec'] = None elif is_float(stream['duration']): info['duration_sec'] = float(stream['duration']) else: info['duration_sec'] = None # Get bit rate if 'bit_rate' not in stream: info['bit_rate'] = None elif is_int(stream['bit_rate']): info['bit_rate'] = int(stream['bit_rate']) else: info['bit_rate'] = None # Get codec info info['codec'] = {} if 'codec_name' in stream: info['codec']['name'] = stream['codec_name'] if 'codec_long_name' in stream: info['codec']['name_long'] = stream['codec_long_name'] if 'codec_type' in stream: info['codec']['type'] = stream['codec_type'] break return info
def test_wrong_path(): with dcase_util.utils.DisableLogger(): DictContainer(data).get_path(path=9)
def prepare(self): """Prepare dataset for the usage. Returns ------- self """ if not self.meta_container.exists(): scene_label = 'home' dcase_cross_val_data = ListDictContainer(filename=os.path.join( self.local_path, 'chime_home', 'development_chunks_refined_crossval_dcase2016.csv')).load( fields=['id', 'filename', 'set_id']) audio_files = {} for item in dcase_cross_val_data: audio_filename = os.path.join( 'chime_home', 'chunks', item['filename'] + self.sample_mode + '.wav') annotation_filename = os.path.join('chime_home', 'chunks', item['filename'] + '.csv') if audio_filename not in audio_files: audio_files[audio_filename] = { 'audio': audio_filename, 'meta': annotation_filename } meta_data = MetaDataContainer() for audio_filename, data in iteritems(audio_files): current_meta_data = DictContainer(filename=os.path.join( self.local_path, data['meta'])).load() tags = [] for i, tag in enumerate(current_meta_data['majorityvote']): if tag != 'S' and tag != 'U': tags.append(self.tagcode_to_taglabel(tag)) name = os.path.split(audio_filename)[1] segment_name = name[0:name.find('_chunk')] chunk_name = name[name.find('_chunk') + 1:].split('.')[0] item = MetaDataItem({ 'filename': audio_filename, 'scene_label': scene_label, 'tags': ';'.join(tags) + ';', 'identifier': segment_name }) self.process_meta_item(item=item, absolute_path=False) meta_data.append(item) # Save meta meta_data.save(filename=self.meta_file) # Load meta and cross validation self.load() all_folds_found = True for fold in range(1, self.crossvalidation_folds + 1): train_filename = self.evaluation_setup_filename(setup_part='train', fold=fold) test_filename = self.evaluation_setup_filename(setup_part='test', fold=fold) eval_filename = self.evaluation_setup_filename( setup_part='evaluate', fold=fold) if not os.path.isfile(train_filename): all_folds_found = False if not os.path.isfile(test_filename): all_folds_found = False if not os.path.isfile(eval_filename): all_folds_found = False if not all_folds_found: Path().makedirs(path=self.evaluation_setup_path) dcase_crossval = { 1: [], 2: [], 3: [], 4: [], 5: [], } dcase_cross_val_data = ListDictContainer(filename=os.path.join( self.local_path, 'chime_home', 'development_chunks_refined_crossval_dcase2016.csv')).load( fields=['id', 'filename', 'set_id']) for item in dcase_cross_val_data: dcase_crossval[int(item['set_id']) + 1].append( self.relative_to_absolute_path( os.path.join( 'chime_home', 'chunks', item['filename'] + self.sample_mode + '.wav'))) for fold in range(1, self.crossvalidation_folds + 1): # Collect training and testing files train_files = [] for f in range(1, self.crossvalidation_folds + 1): if f is not fold: train_files += dcase_crossval[f] test_files = dcase_crossval[fold] # Create meta containers and save them # Train train_filename = self.evaluation_setup_filename( setup_part='train', fold=fold) train_meta = MetaDataContainer(filename=train_filename) for filename in train_files: item = self.file_meta(filename)[0] self.process_meta_item(item=item, absolute_path=False) train_meta.append(item) train_meta.save() # Test test_filename = self.evaluation_setup_filename( setup_part='test', fold=fold) test_meta = MetaDataContainer(filename=test_filename) for filename in test_files: item = MetaDataItem( {'filename': self.absolute_to_relative_path(filename)}) test_meta.append(item) test_meta.save() # Evaluate eval_filename = self.evaluation_setup_filename( setup_part='evaluate', fold=fold) eval_meta = MetaDataContainer(filename=eval_filename) for filename in test_files: item = self.file_meta(filename)[0] self.process_meta_item(item=item, absolute_path=False) eval_meta.append(item) eval_meta.save() # Load meta and cross validation self.load() return self
def test_load_not_found(): with dcase_util.utils.DisableLogger(): DictContainer().load(filename=os.path.join(tempfile.gettempdir(), 'wrong.cpickle'))
def prepare(self): """Prepare dataset for the usage. Returns ------- self """ if not self.meta_container.exists(): scene_label = 'home' evaluation_chunks = ListDictContainer( filename=os.path.join(self.local_path, 'chime_home', 'evaluation_chunks_refined.csv')).load( fields=['id', 'filename', 'set_id']) audio_files = {} for item in dcase_cross_val_data: audio_filename = os.path.join( 'chime_home', 'chunks', item['filename'] + self.sample_mode + '.wav') annotation_filename = os.path.join('chime_home', 'chunks', item['filename'] + '.csv') if audio_filename not in audio_files: audio_files[audio_filename] = { 'audio': audio_filename, 'meta': annotation_filename } meta_data = MetaDataContainer() for audio_filename, data in iteritems(audio_files): current_meta_data = DictContainer(filename=os.path.join( self.local_path, data['meta'])).load() tags = [] for i, tag in enumerate(current_meta_data['majorityvote']): if tag != 'S' and tag != 'U': tags.append(self.tagcode_to_taglabel(tag)) name = os.path.split(audio_filename)[1] segment_name = name[0:name.find('_chunk')] chunk_name = name[name.find('_chunk') + 1:].split('.')[0] item = MetaDataItem({ 'filename': audio_filename, 'scene_label': scene_label, 'tags': ';'.join(tags) + ';', 'identifier': segment_name }) self.process_meta_item(item=item, absolute_path=False) meta_data.append(item) # Save meta meta_data.save(filename=self.meta_file) # Load meta and cross validation self.load() all_folds_found = True train_filename = self.evaluation_setup_filename(setup_part='train') test_filename = self.evaluation_setup_filename(setup_part='test') eval_filename = self.evaluation_setup_filename(setup_part='evaluate') if not os.path.isfile(train_filename): all_folds_found = False if not os.path.isfile(test_filename): all_folds_found = False if not os.path.isfile(eval_filename): all_folds_found = False if not all_folds_found: Path().makedirs(path=self.evaluation_setup_path) # Train train_filename = self.evaluation_setup_filename(setup_part='train') train_meta = MetaDataContainer(filename=train_filename) for filename in self.train_files(): train_meta.append(self.file_meta(filename)[0]) train_meta.save() # Test test_filename = self.evaluation_setup_filename(setup_part='test') test_meta = MetaDataContainer(filename=test_filename) for filename in self.test_files(): test_meta.append( MetaDataItem( {'filename': self.absolute_to_relative_path(filename)})) test_meta.save() # Evaluate eval_filename = self.evaluation_setup_filename( setup_part='evaluate') eval_meta = MetaDataContainer(filename=eval_filename) for filename in self.test_files(): eval_meta.append(self.file_meta(filename)[0]) eval_meta.save() # Load meta and cross validation self.load() return self
def test_container(): data_container = DictContainer(data) nose.tools.eq_(data_container.get_path(path='level1.field1'), 1) nose.tools.eq_(data_container.get_path(path='level1.level2a.field2'), 2) nose.tools.eq_(data_container.get_path(path='level1.level2b.field3'), 3) nose.tools.eq_(data_container.get_path(path='level1.level2a.level3a.field1'), 1) nose.tools.eq_(data_container.get_path(path='level1.level2a.level3a'), {'field1': 1, 'field2': 2, 'field3': 3}) nose.tools.eq_(data_container.get_path(path='level1.level2c.*.field1'), [1, 1]) nose.tools.eq_(data_container.get_path(path=['level1', 'field1']), 1) nose.tools.eq_(data_container.get_path(path=['level1', 'level2a', 'field2']), 2) nose.tools.eq_(data_container.get_hash(), '23ffcb8de3af794547779197397ab987') nose.tools.eq_(data_container.get_hash_for_path(dotted_path='level1.level2c'), 'a084001c6e49eef233a95f8996d1183c') data_container.merge(override={ 'level1': { 'field1': 10, 'field2': 20, 'field3': 30, 'level2a': { 'field1': 10, 'field2': 20, 'field3': 30, 'level3a': { 'field1': 10, 'field2': 20, 'field3': 30, }, 'level3b': { 'field1': 10, 'field2': 20, 'field3': 30, }, } } }) nose.tools.eq_(data_container.get_path(path='level1.field1'), 10) nose.tools.eq_(data_container.get_path(path='level1.level2a.field2'), 20) nose.tools.eq_(data_container.get_path(path='level1.level2b.field3'), 3) data_container.set_path(path='level1.field1', new_value=100) nose.tools.eq_(data_container.get_path(path='level1.field1'), 100) data_container.set_path(path='level1.level2c.*.field1', new_value=100) nose.tools.eq_(data_container.get_path(path='level1.level2c.*.field1'), [100, 100]) nose.tools.eq_(data_container.get_hash(), '0adb9bf0f7f579e8b297b7186b0570da') data_container['_hash'] = 'test' nose.tools.eq_(data_container.get_hash(), '0adb9bf0f7f579e8b297b7186b0570da') data_container.set_path(path=['level1', 'field2'], new_value=100) nose.tools.eq_(data_container.get_path(path='level1.field2'), 100) data_container = DictContainer(data) nose.tools.eq_(data_container.get_leaf_path_list(), ['level1.field1', 'level1.field2', 'level1.field3', 'level1.level2a.field1', 'level1.level2a.field2', 'level1.level2a.field3', 'level1.level2a.level3a.field1', 'level1.level2a.level3a.field2', 'level1.level2a.level3a.field3', 'level1.level2a.level3b.field1', 'level1.level2a.level3b.field2', 'level1.level2a.level3b.field3', 'level1.level2b.field1', 'level1.level2b.field2', 'level1.level2b.field3', 'level1.level2b.level3.field1', 'level1.level2b.level3.field2', 'level1.level2b.level3.field3', 'level1.level2c.level3a.field1', 'level1.level2c.level3a.field2', 'level1.level2c.level3a.field3', 'level1.level2c.level3b.field1', 'level1.level2c.level3b.field2', 'level1.level2c.level3b.field3']) nose.tools.eq_(data_container.get_leaf_path_list(target_field='field1'), ['level1.field1', 'level1.level2a.field1', 'level1.level2a.level3a.field1', 'level1.level2a.level3b.field1', 'level1.level2b.field1', 'level1.level2b.level3.field1', 'level1.level2c.level3a.field1', 'level1.level2c.level3b.field1']) nose.tools.eq_(data_container.get_leaf_path_list(target_field_startswith='field'), ['level1.field1', 'level1.field2', 'level1.field3', 'level1.level2a.field1', 'level1.level2a.field2', 'level1.level2a.field3', 'level1.level2a.level3a.field1', 'level1.level2a.level3a.field2', 'level1.level2a.level3a.field3', 'level1.level2a.level3b.field1', 'level1.level2a.level3b.field2', 'level1.level2a.level3b.field3', 'level1.level2b.field1', 'level1.level2b.field2', 'level1.level2b.field3', 'level1.level2b.level3.field1', 'level1.level2b.level3.field2', 'level1.level2b.level3.field3', 'level1.level2c.level3a.field1', 'level1.level2c.level3a.field2', 'level1.level2c.level3a.field3', 'level1.level2c.level3b.field1', 'level1.level2c.level3b.field2', 'level1.level2c.level3b.field3']) nose.tools.eq_(data_container.get_leaf_path_list(target_field_endswith='d1'), ['level1.field1', 'level1.level2a.field1', 'level1.level2a.level3a.field1', 'level1.level2a.level3b.field1', 'level1.level2b.field1', 'level1.level2b.level3.field1', 'level1.level2c.level3a.field1', 'level1.level2c.level3b.field1'])
def __init__(self, *args, **kwargs): # Run DictContainer init DictContainer.__init__(self, *args, **kwargs) super(OneToOneMappingContainer, self).__init__(*args, **kwargs)