def __init__(self, params, ref_files_folder=None, use_polar_format=True):
        self._use_polar_format = use_polar_format
        self._desc_dir = ref_files_folder if ref_files_folder is not None else os.path.join(
            params['dataset_dir'], 'metadata_dev')
        self._doa_thresh = params['lad_doa_thresh']

        # Load feature class
        self._feat_cls = cls_feature_class.FeatureClass(params)

        # collect reference files
        self._ref_labels = {}
        for split in os.listdir(self._desc_dir):
            for ref_file in os.listdir(os.path.join(self._desc_dir, split)):
                # Load reference description file
                gt_dict = self._feat_cls.load_output_format_file(
                    os.path.join(self._desc_dir, split, ref_file))
                if not self._use_polar_format:
                    gt_dict = self._feat_cls.convert_output_format_polar_to_cartesian(
                        gt_dict)
                self._ref_labels[ref_file] = self._feat_cls.segment_labels(
                    gt_dict, self._feat_cls.get_nb_frames())

        self._nb_ref_files = len(self._ref_labels)
        print('SELD metrics class: loaded : {} reference files'.format(
            len(self._ref_labels)))
    def __init__(self,
                 dataset='foa',
                 feat_label_dir='',
                 is_eval=False,
                 split=1,
                 batch_size=16,
                 seq_len=64,
                 shuffle=True,
                 per_file=False,
                 channels_separate=False):
        self._per_file = per_file
        self._is_eval = is_eval
        self._splits = np.array(split)
        self._batch_size = batch_size
        self._seq_len = seq_len
        self._shuffle = shuffle
        self._feat_cls = cls_feature_class.FeatureClass(
            feat_label_dir=feat_label_dir, dataset=dataset, is_eval=is_eval)
        self._label_dir = self._feat_cls.get_label_dir()
        self._feat_dir = self._feat_cls.get_normalized_feat_dir()

        self._filenames_list = list()
        self._nb_frames_file = 0  # Using a fixed number of frames in feat files. Updated in _get_label_filenames_sizes()
        self._feat_len = None
        self._2_nb_ch = 2 * self._feat_cls.get_nb_channels()
        self._label_len = None  # total length of label - DOA + SED
        self._doa_len = None  # DOA label length
        self._class_dict = self._feat_cls.get_classes()
        self._nb_classes = len(self._class_dict.keys())
        self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr(
        )
        self._get_filenames_list_and_feat_label_sizes()

        self._batch_seq_len = self._batch_size * self._seq_len
        self._circ_buf_feat = None
        self._circ_buf_label = None
        self._channels_separate = channels_separate

        if self._per_file:
            self._nb_total_batches = len(self._filenames_list)
        else:
            self._nb_total_batches = int(
                np.floor((len(self._filenames_list) * self._nb_frames_file /
                          float(self._seq_len * self._batch_size))))

        # self._dummy_feat_vec = np.ones(self._feat_len.shape) *

        print('\tDatagen_mode: {}, nb_files: {}, nb_classes:{}\n'
              '\tnb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'.
              format('eval' if self._is_eval else 'dev',
                     len(self._filenames_list), self._nb_classes,
                     self._nb_frames_file, self._feat_len, self._2_nb_ch,
                     self._label_len))

        print('\tDataset: {}, split: {}\n'
              '\tbatch_size: {}, seq_len: {}, shuffle: {}\n'
              '\tlabel_dir: {}\n '
              '\tfeat_dir: {}\n'.format(dataset, split, self._batch_size,
                                        self._seq_len, self._shuffle,
                                        self._label_dir, self._feat_dir))
    def __init__(
            self, params, split=1, shuffle=True, per_file=False, is_eval=False
    ):
        self._per_file = per_file
        self._is_eval = is_eval
        self._splits = np.array(split)
        self._batch_size = params['batch_size']
        self._feature_seq_len = params['feature_sequence_length']
        self._label_seq_len = params['label_sequence_length']
        self._is_accdoa = params['is_accdoa']
        self._doa_objective = params['doa_objective']
        self._shuffle = shuffle
        self._feat_cls = cls_feature_class.FeatureClass(params=params, is_eval=self._is_eval)
        self._label_dir = self._feat_cls.get_label_dir()
        self._feat_dir = self._feat_cls.get_normalized_feat_dir()

        self._filenames_list = list()
        self._nb_frames_file = 0     # Using a fixed number of frames in feat files. Updated in _get_label_filenames_sizes()
        self._nb_mel_bins = self._feat_cls.get_nb_mel_bins()
        self._nb_ch = None
        self._label_len = None  # total length of label - DOA + SED
        self._doa_len = None    # DOA label length
        self._class_dict = self._feat_cls.get_classes()
        self._nb_classes = self._feat_cls.get_nb_classes()
        self._get_filenames_list_and_feat_label_sizes()

        self._feature_batch_seq_len = self._batch_size*self._feature_seq_len
        self._label_batch_seq_len = self._batch_size*self._label_seq_len
        self._circ_buf_feat = None
        self._circ_buf_label = None

        if self._per_file:
            self._nb_total_batches = len(self._filenames_list)
        else:
            self._nb_total_batches = int(np.floor((len(self._filenames_list) * self._nb_frames_file /
                                               float(self._feature_batch_seq_len))))

        # self._dummy_feat_vec = np.ones(self._feat_len.shape) *

        print(
            '\tDatagen_mode: {}, nb_files: {}, nb_classes:{}\n'
            '\tnb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'.format(
                'eval' if self._is_eval else 'dev', len(self._filenames_list),  self._nb_classes,
                self._nb_frames_file, self._nb_mel_bins, self._nb_ch, self._label_len
                )
        )

        print(
            '\tDataset: {}, split: {}\n'
            '\tbatch_size: {}, feat_seq_len: {}, label_seq_len: {}, shuffle: {}\n'
            '\tTotal batches in dataset: {}\n'
            '\tlabel_dir: {}\n '
            '\tfeat_dir: {}\n'.format(
                params['dataset'], split,
                self._batch_size, self._feature_seq_len, self._label_seq_len, self._shuffle,
                self._nb_total_batches,
                self._label_dir, self._feat_dir
            )
        )
Beispiel #4
0
def calculate_metrics(metadata_dir, prediction_paths):
    '''Calculate metrics using official tool. This part of code is modified from:
    https://github.com/sharathadavanne/seld-dcase2019/blob/master/calculate_SELD_metrics.py
    
    Args:
      metadata_dir: string, directory of reference files. 
      prediction_paths: list of string
      
    Returns:
      metrics: dict
    '''

    # Load feature class
    feat_cls = cls_feature_class.FeatureClass()

    # Load evaluation metric class
    eval = evaluation_metrics.SELDMetrics(nb_frames_1s=feat_cls.nb_frames_1s(),
                                          data_gen=feat_cls)

    eval.reset()  # Reset the evaluation metric parameters
    for prediction_path in prediction_paths:
        reference_path = os.path.join(
            metadata_dir, '{}.csv'.format(get_filename(prediction_path)))

        prediction_dict = evaluation_metrics.load_output_format_file(
            prediction_path)
        reference_dict = feat_cls.read_desc_file(reference_path)

        # Generate classification labels for SELD
        reference_tensor = feat_cls.get_clas_labels_for_file(reference_dict)
        prediction_tensor = evaluation_metrics.output_format_dict_to_classification_labels(
            prediction_dict, feat_cls)

        # Calculated SED and DOA scores
        eval.update_sed_scores(prediction_tensor.max(2),
                               reference_tensor.max(2))
        eval.update_doa_scores(prediction_tensor, reference_tensor)

    # Overall SED and DOA scores
    sed_error_rate, sed_f1_score = eval.compute_sed_scores()
    doa_error, doa_frame_recall = eval.compute_doa_scores()
    seld_score = evaluation_metrics.compute_seld_metric(
        [sed_error_rate, sed_f1_score], [doa_error, doa_frame_recall])

    metrics = {
        'sed_error_rate': sed_error_rate,
        'sed_f1_score': sed_f1_score,
        'doa_error': doa_error,
        'doa_frame_recall': doa_frame_recall,
        'seld_score': seld_score
    }

    return metrics
# Visualize the DCASE 2019 SELD task dataset distribution

import os
import numpy as np
import sys
sys.path.append(os.path.join(sys.path[0], '..'))
import cls_feature_class
import matplotlib.pyplot as plot
plot.switch_backend('Qt4Agg')
# plot.switch_backend('TkAgg')
from IPython import embed
# Path to the metadata folder
dev_dataset = '/home/adavanne/taitoSharedData/DCASE2019/dataset/metadata_dev'

feat_cls = cls_feature_class.FeatureClass()
hop_len_s = feat_cls.get_hop_len_sec()
max_frames = feat_cls.get_nb_frames()
unique_classes_dict = feat_cls.get_classes()
nb_classes = len(unique_classes_dict)
azi_list, ele_list = feat_cls.get_azi_ele_list()
min_azi_ind = min(azi_list) // 10
min_ele_ind = min(ele_list) // 10
nb_ir = 5
nb_files_per_split = [0] * 5
split_info_dic = {}
for dataset_path in [dev_dataset]:
    for file in os.listdir(dataset_path):
        desc_dict = feat_cls.read_desc_file(os.path.join(dataset_path, file))
        split = int(file[5])
        ir = int(file[9])
        ov = int(file[13])
# Extracts the features, labels, and normalizes the training and test split features. Make sure you update the location
# of the downloaded datasets before in the cls_feature_class.py

import cls_feature_class

dataset_name = 'tau'  # Datasets: ansim, resim, cansim, cresim and real

# Extracts feature and labels for all overlap and splits
for ovo in [1, 2]:  # SE overlap
    for splito in [0]:    # all splits. Use [1, 8, 9] for 'real' dataset
        for nffto in [512]:
            feat_cls = cls_feature_class.FeatureClass(ov=ovo, split=splito, nfft=nffto, dataset=dataset_name)

            # Extract features and normalize them
            feat_cls.extract_all_feature()
            feat_cls.preprocess_features()

            # # Extract labels in regression mode
            feat_cls.extract_all_labels('regr', 0)
Beispiel #7
0
    def __init__(self,
                 datagen_mode='train',
                 dataset='resim',
                 ov=1,
                 ov_num=1,
                 split=1,
                 db=30,
                 batch_size=32,
                 seq_len=64,
                 shuffle=True,
                 nfft=512,
                 classifier_mode='regr',
                 weakness=0,
                 cnn3d=False,
                 xyz_def_zero=False,
                 extra_name='',
                 azi_only=False):
        self._datagen_mode = datagen_mode
        self._classifier_mode = classifier_mode
        self._batch_size = batch_size
        self._seq_len = seq_len
        self._shuffle = shuffle
        self._split = split
        self._ov_num = ov_num
        self._feat_cls = cls_feature_class.FeatureClass(dataset=dataset,
                                                        ov=ov,
                                                        split=split,
                                                        db=db,
                                                        nfft=nfft)
        self._label_dir = self._feat_cls.get_label_dir(classifier_mode,
                                                       weakness, extra_name)
        self._feat_dir = self._feat_cls.get_normalized_feat_dir(extra_name)
        self._thickness = weakness
        self._xyz_def_zero = xyz_def_zero
        self._azi_only = azi_only

        self._filenames_list = list()
        self._nb_frames_file = None  # Assuming number of frames in feat files are the same
        self._feat_len = None
        self._2_nb_ch = 8
        self._label_len = None  # total length of label - DOA + SED
        self._doa_len = None  # DOA label length
        self._class_dict = self._feat_cls.get_classes()
        self._nb_classes = len(self._class_dict.keys())
        self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr(
        )
        self._is_cnn3d_model = cnn3d
        self._get_label_filenames_sizes()

        self._batch_seq_len = self._batch_size * self._seq_len
        self._circ_buf_feat = None
        self._circ_buf_label = None

        self._nb_total_batches = int(
            np.floor((len(self._filenames_list) * self._nb_frames_file /
                      float(self._seq_len * self._batch_size))))

        print('Datagen_mode: {}, nb_files: {}, nb_classes:{}\n'
              'nb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'.
              format(self._datagen_mode, len(self._filenames_list),
                     self._nb_classes, self._nb_frames_file, self._feat_len,
                     self._2_nb_ch, self._label_len))

        print('Dataset: {}, ov: {}, split: {}\n'
              'batch_size: {}, seq_len: {}, shuffle: {}\n'
              'label_dir: {}\n '
              'feat_dir: {}\n'.format(dataset, ov, split, self._batch_size,
                                      self._seq_len, self._shuffle,
                                      self._label_dir, self._feat_dir))
Beispiel #8
0
# Extracts the features, labels, and normalizes the development and evaluation split features.

import cls_feature_class
import parameter

process_str = 'dev'#, eval'   # 'dev' or 'eval' will extract features for the respective set accordingly
                            #  'dev, eval' will extract features of both sets together

params = parameter.get_params()


if 'dev' in process_str:
    # -------------- Extract features and labels for development set -----------------------------
    dev_feat_cls = cls_feature_class.FeatureClass(params, is_eval=False)

    # Extract features and normalize them
    dev_feat_cls.extract_all_feature()
    dev_feat_cls.preprocess_features()

    # # Extract labels in regression mode
    dev_feat_cls.extract_all_labels()


if 'eval' in process_str:
    # -----------------------------Extract ONLY features for evaluation set-----------------------------
    eval_feat_cls = cls_feature_class.FeatureClass(params, is_eval=True)

    # Extract features and normalize them
    eval_feat_cls.extract_all_feature()
    eval_feat_cls.preprocess_features()
Beispiel #9
0
# Extracts the features, labels, and normalizes the development and evaluation split features.
# NOTE: Change the dataset_dir and feat_label_dir path accordingly

import cls_feature_class

process_str = 'dev'  # 'dev' or 'eval' will extract features for the respective set accordingly
#  'dev, eval' will extract features of both sets together

dataset_name = 'foa'  # 'foa' -ambisonic or 'mic' - microphone signals
dataset_dir = '/Volumes/Dinge/DCASE2019_subset/'   # Base folder containing the foa/mic and metadata folders
feat_label_dir = '/Volumes/Dinge/DCASE2019_subset/feat_label_tmp/'  # Directory to dump extracted features and labels


if 'dev' in process_str:
    # -------------- Extract features and labels for development set -----------------------------
    dev_feat_cls = cls_feature_class.FeatureClass(dataset=dataset_name, dataset_dir=dataset_dir,
                                                  feat_label_dir=feat_label_dir)

    # Extract features and normalize them
    dev_feat_cls.extract_all_feature()
    dev_feat_cls.preprocess_features()

    # # Extract labels in regression mode
    dev_feat_cls.extract_all_labels()


if 'eval' in process_str:
    # -----------------------------Extract ONLY features for evaluation set-----------------------------
    eval_feat_cls = cls_feature_class.FeatureClass(dataset=dataset_name, dataset_dir=dataset_dir,
                                                   feat_label_dir=feat_label_dir, is_eval=True)

    # Extract features and normalize them
Beispiel #10
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    print(argv)
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    feat_cls = cls_feature_class.FeatureClass(params)
    train_splits, val_splits, test_splits = None, None, None

    if params['mode'] == 'dev':
        test_splits = [6]
        val_splits = [5]
        train_splits = [[1, 2, 3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [[7, 8]]
        val_splits = [[6]]
        train_splits = [[1, 2, 3, 4, 5]]

    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            params=params, split=train_splits[split_cnt])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            params=params,
            split=val_splits[split_cnt],
            shuffle=False,
            per_file=True,
            is_eval=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        nb_classes = data_gen_train.get_nb_classes()
        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['f_pool_size'], params['t_pool_size'],
                    params['rnn_size'], params['fnn_size'],
                    params['doa_objective']))

        print('Using loss weights : {}'.format(params['loss_weights']))
        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      f_pool_size=params['f_pool_size'],
                                      t_pool_size=params['t_pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'],
                                      doa_objective=params['doa_objective'],
                                      is_accdoa=params['is_accdoa'])

        # Dump results in DCASE output format for calculating final scores
        dcase_output_val_folder = os.path.join(
            params['dcase_output_dir'],
            '{}_{}_{}_val'.format(task_id, params['dataset'], params['mode']))
        cls_feature_class.delete_and_create_folder(dcase_output_val_folder)
        print('Dumping recording-wise val results in: {}'.format(
            dcase_output_val_folder))

        # Initialize evaluation metric class
        score_obj = ComputeSELDResults(params)

        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        tr_loss = np.zeros(nb_epoch)
        seld_metric = np.zeros((nb_epoch, 5))

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2,
            )
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]

            # predict once per epoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            if params['is_accdoa']:
                sed_pred, doa_pred = get_accdoa_labels(pred, nb_classes)
                sed_pred = reshape_3Dto2D(sed_pred)
                doa_pred = reshape_3Dto2D(doa_pred)
            else:
                sed_pred = reshape_3Dto2D(pred[0]) > 0.5
                doa_pred = reshape_3Dto2D(pred[1] if params['doa_objective'] is
                                          'mse' else pred[1][:, :,
                                                             nb_classes:])

            # Calculate the DCASE 2021 metrics - Location-aware detection and Class-aware localization scores
            dump_DCASE2021_results(data_gen_val, feat_cls,
                                   dcase_output_val_folder, sed_pred, doa_pred)
            seld_metric[epoch_cnt, :] = score_obj.get_SELD_Results(
                dcase_output_val_folder)

            patience_cnt += 1
            if seld_metric[epoch_cnt, -1] < best_seld_metric:
                best_seld_metric = seld_metric[epoch_cnt, -1]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, '
                '\n\t\t DCASE2021 SCORES: ER: {:0.2f}, F: {:0.1f}, LE: {:0.1f}, LR:{:0.1f}, seld_score (early stopping score): {:0.2f}, '
                'best_seld_score: {:0.2f}, best_epoch : {}\n'.format(
                    epoch_cnt,
                    time.time() - start, tr_loss[epoch_cnt],
                    seld_metric[epoch_cnt, 0], seld_metric[epoch_cnt, 1] * 100,
                    seld_metric[epoch_cnt, 2], seld_metric[epoch_cnt, 3] * 100,
                    seld_metric[epoch_cnt, -1], best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score (early stopping score) : {}'.format(
            best_seld_metric))

        print('\n\tDCASE2021 scores')
        print(
            '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}'
            .format(seld_metric[best_epoch, 2],
                    seld_metric[best_epoch, 3] * 100))
        print(
            '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
            .format(seld_metric[best_epoch, 0],
                    seld_metric[best_epoch, 1] * 100))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        print('\tLoading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            params=params,
            split=split,
            shuffle=False,
            per_file=True,
            is_eval=True if params['mode'] is 'eval' else False)

        model = keras_model.load_seld_model('{}_model.h5'.format(unique_name),
                                            params['doa_objective'])
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)
        if params['is_accdoa']:
            test_sed_pred, test_doa_pred = get_accdoa_labels(
                pred_test, nb_classes)
            test_sed_pred = reshape_3Dto2D(test_sed_pred)
            test_doa_pred = reshape_3Dto2D(test_doa_pred)
        else:
            test_sed_pred = reshape_3Dto2D(pred_test[0]) > 0.5
            test_doa_pred = reshape_3Dto2D(
                pred_test[1] if params['doa_objective'] is 'mse' else
                pred_test[1][:, :, nb_classes:])

        # Dump results in DCASE output format for calculating final scores
        dcase_output_test_folder = os.path.join(
            params['dcase_output_dir'],
            '{}_{}_{}_test'.format(task_id, params['dataset'], params['mode']))
        cls_feature_class.delete_and_create_folder(dcase_output_test_folder)
        print('Dumping recording-wise test results in: {}'.format(
            dcase_output_test_folder))
        dump_DCASE2021_results(data_gen_test, feat_cls,
                               dcase_output_test_folder, test_sed_pred,
                               test_doa_pred)

        if params['mode'] is 'dev':
            # Calculate DCASE2021 scores
            test_seld_metric = score_obj.get_SELD_Results(
                dcase_output_test_folder)

            print('Results on test split:')
            print('\tDCASE2021 Scores')
            print(
                '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}'
                .format(test_seld_metric[2], test_seld_metric[3] * 100))
            print(
                '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_seld_metric[0], test_seld_metric[1] * 100))
            print('\tSELD (early stopping metric): {:0.2f}'.format(
                test_seld_metric[-1]))
Beispiel #11
0
# Extracts the features, labels, and normalizes the development and evaluation split features.

import cls_feature_class
import doanet_parameters

params = doanet_parameters.get_params()

# -------------- Extract features and labels for development set -----------------------------
dev_feat_cls = cls_feature_class.FeatureClass(params)

# # Extract features and normalize them
dev_feat_cls.extract_all_feature()
dev_feat_cls.preprocess_features()

# # Extract labels in regression mode
dev_feat_cls.extract_all_labels()

Beispiel #12
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    print(argv)
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    feat_cls = cls_feature_class.FeatureClass(params)
    train_splits, val_splits, test_splits = None, None, None

    if params['mode'] == 'dev':
        test_splits = [1]
        val_splits = [2]
        train_splits = [[3, 4, 5, 6]]

    elif params['mode'] == 'eval':
        test_splits = [[7, 8]]
        val_splits = [[1]]
        train_splits = [[2, 3, 4, 5, 6]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            params=params, split=train_splits[split_cnt])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            params=params, split=val_splits[split_cnt], shuffle=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        nb_classes = data_gen_train.get_nb_classes()
        gt = collect_test_labels(data_gen_val, data_out, nb_classes,
                                 params['quick_test'])
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
        doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['f_pool_size'], params['t_pool_size'],
                    params['rnn_size'], params['fnn_size'],
                    params['doa_objective']))

        print('Using loss weights : {}'.format(params['loss_weights']))
        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      f_pool_size=params['f_pool_size'],
                                      t_pool_size=params['t_pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'],
                                      doa_objective=params['doa_objective'])
        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        seld_metric = np.zeros(nb_epoch)
        new_seld_metric = np.zeros(nb_epoch)
        tr_loss = np.zeros(nb_epoch)
        doa_metric = np.zeros((nb_epoch, 6))
        sed_metric = np.zeros((nb_epoch, 2))
        new_metric = np.zeros((nb_epoch, 4))

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2,
            )
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]

            # predict once per peoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(
                pred[1]
                if params['doa_objective'] is 'mse' else pred[1][:, :,
                                                                 nb_classes:])

            # Calculate the DCASE 2019 metrics - Detection-only and Localization-only scores
            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())
            doa_metric[
                epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr_xyz(
                    doa_pred, doa_gt, sed_pred, sed_gt)
            seld_metric[epoch_cnt] = evaluation_metrics.early_stopping_metric(
                sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :])

            # Calculate the DCASE 2020 metrics - Location-aware detection and Class-aware localization scores
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=data_gen_val.get_nb_classes(),
                doa_threshold=params['lad_doa_thresh'])
            pred_dict = feat_cls.regression_label_format_to_output_format(
                sed_pred, doa_pred)
            gt_dict = feat_cls.regression_label_format_to_output_format(
                sed_gt, doa_gt)

            pred_blocks_dict = feat_cls.segment_labels(pred_dict,
                                                       sed_pred.shape[0])
            gt_blocks_dict = feat_cls.segment_labels(gt_dict, sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(pred_blocks_dict,
                                                  gt_blocks_dict)
            new_metric[epoch_cnt, :] = cls_new_metric.compute_seld_scores()
            new_seld_metric[
                epoch_cnt] = evaluation_metrics.early_stopping_metric(
                    new_metric[epoch_cnt, :2], new_metric[epoch_cnt, 2:])

            # Visualize the metrics with respect to epochs
            plot_functions(unique_name, tr_loss, sed_metric, doa_metric,
                           seld_metric, new_metric, new_seld_metric)

            patience_cnt += 1
            if new_seld_metric[epoch_cnt] < best_seld_metric:
                best_seld_metric = new_seld_metric[epoch_cnt]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, '
                '\n\t\t DCASE2019 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, FR:{:0.1f}, seld_score: {:0.2f}, '
                '\n\t\t DCASE2020 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, DE_F:{:0.1f}, seld_score (early stopping score): {:0.2f}, '
                'best_seld_score: {:0.2f}, best_epoch : {}\n'.format(
                    epoch_cnt,
                    time.time() - start, tr_loss[epoch_cnt],
                    sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1] * 100,
                    doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1] * 100,
                    seld_metric[epoch_cnt], new_metric[epoch_cnt, 0],
                    new_metric[epoch_cnt, 1] * 100, new_metric[epoch_cnt, 2],
                    new_metric[epoch_cnt, 3] * 100, new_seld_metric[epoch_cnt],
                    best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        avg_scores_val.append([
            new_metric[best_epoch, 0], new_metric[best_epoch, 1],
            new_metric[best_epoch, 2], new_metric[best_epoch,
                                                  3], best_seld_metric
        ])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score (early stopping score) : {}'.format(
            best_seld_metric))

        print('\n\tDCASE2020 scores')
        print(
            '\tClass-aware localization scores: DOA_error: {:0.1f}, F-score: {:0.1f}'
            .format(new_metric[best_epoch, 2],
                    new_metric[best_epoch, 3] * 100))
        print(
            '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
            .format(new_metric[best_epoch, 0],
                    new_metric[best_epoch, 1] * 100))

        print('\n\tDCASE2019 scores')
        print(
            '\tLocalization-only scores: DOA_error: {:0.1f}, Frame recall: {:0.1f}'
            .format(doa_metric[best_epoch, 0],
                    doa_metric[best_epoch, 1] * 100))
        print(
            '\tDetection-only scores: Error rate: {:0.2f}, F-score: {:0.1f}\n'.
            format(sed_metric[best_epoch, 0], sed_metric[best_epoch, 1] * 100))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        print('\tLoading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            params=params,
            split=split,
            shuffle=False,
            per_file=params['dcase_output'],
            is_eval=True if params['mode'] is 'eval' else False)

        model = keras_model.load_seld_model('{}_model.h5'.format(unique_name),
                                            params['doa_objective'])
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
        test_doa_pred = evaluation_metrics.reshape_3Dto2D(
            pred_test[1]
            if params['doa_objective'] is 'mse' else pred_test[1][:, :,
                                                                  nb_classes:])

        if params['dcase_output']:
            # Dump results in DCASE output format for calculating final scores
            dcase_dump_folder = os.path.join(
                params['dcase_dir'],
                '{}_{}_{}'.format(task_id, params['dataset'], params['mode']))
            cls_feature_class.create_folder(dcase_dump_folder)
            print('Dumping recording-wise results in: {}'.format(
                dcase_dump_folder))

            test_filelist = data_gen_test.get_filelist()
            # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
            max_frames_with_content = data_gen_test.get_nb_frames()

            # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
            # zero padding in the remaining frames
            frames_per_file = data_gen_test.get_frame_per_file()

            for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
                output_file = os.path.join(
                    dcase_dump_folder,
                    test_filelist[file_cnt].replace('.npy', '.csv'))
                dc = file_cnt * frames_per_file
                output_dict = feat_cls.regression_label_format_to_output_format(
                    test_sed_pred[dc:dc + max_frames_with_content, :],
                    test_doa_pred[dc:dc + max_frames_with_content, :])
                data_gen_test.write_output_format_file(output_file,
                                                       output_dict)

        if params['mode'] is 'dev':
            test_data_in, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels(data_gen_test, test_data_out,
                                          nb_classes, params['quick_test'])
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
            test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])

            # Calculate DCASE2019 scores
            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            test_doa_loss = evaluation_metrics.compute_doa_scores_regr_xyz(
                test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
            test_metric_loss = evaluation_metrics.early_stopping_metric(
                test_sed_loss, test_doa_loss)

            # Calculate DCASE2020 scores
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=data_gen_test.get_nb_classes(), doa_threshold=20)
            test_pred_dict = feat_cls.regression_label_format_to_output_format(
                test_sed_pred, test_doa_pred)
            test_gt_dict = feat_cls.regression_label_format_to_output_format(
                test_sed_gt, test_doa_gt)

            test_pred_blocks_dict = feat_cls.segment_labels(
                test_pred_dict, test_sed_pred.shape[0])
            test_gt_blocks_dict = feat_cls.segment_labels(
                test_gt_dict, test_sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(test_pred_blocks_dict,
                                                  test_gt_blocks_dict)
            test_new_metric = cls_new_metric.compute_seld_scores()
            test_new_seld_metric = evaluation_metrics.early_stopping_metric(
                test_new_metric[:2], test_new_metric[2:])

            avg_scores_test.append([
                test_new_metric[0], test_new_metric[1], test_new_metric[2],
                test_new_metric[3], test_new_seld_metric
            ])
            print('Results on test split:')

            print('\tDCASE2020 Scores')
            print(
                '\tClass-aware localization scores: DOA Error: {:0.1f}, F-score: {:0.1f}'
                .format(test_new_metric[2], test_new_metric[3] * 100))
            print(
                '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_new_metric[0], test_new_metric[1] * 100))
            print('\tSELD (early stopping metric): {:0.2f}'.format(
                test_new_seld_metric))

            print('\n\tDCASE2019 Scores')
            print(
                '\tLocalization-only scores: DOA Error: {:0.1f}, Frame recall: {:0.1f}'
                .format(test_doa_loss[0], test_doa_loss[1] * 100))
            print(
                '\tDetection-only scores:Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_sed_loss[0], test_sed_loss[1] * 100))
Beispiel #13
0
    def __init__(self,
                 dataset='foa',
                 nb_ch=4,
                 feat_label_dir='',
                 is_eval=False,
                 split=1,
                 batch_size=32,
                 seq_len=128,
                 shuffle=True,
                 per_file=False,
                 feat_type='mel',
                 doa=None,
                 seed=1,
                 onlyphase=False,
                 trial=None):
        # MY ADDITION
        self.doa = doa  # specify whether regression or classification is used for doa
        self.seed = seed  # fix random seed so that each generator will give same results
        self.onlyphase = onlyphase

        self._per_file = per_file
        self._is_eval = is_eval
        self._splits = np.array(split)
        self._batch_size = batch_size
        self._seq_len = seq_len
        self._shuffle = shuffle
        self._feat_cls = cls_feature_class.FeatureClass(
            feat_label_dir=feat_label_dir,
            dataset=dataset,
            is_eval=self._is_eval,
            doa=doa)
        self._label_dir = self._feat_cls.get_label_dir()
        self._feat_dir = self._feat_cls.get_normalized_feat_dir()
        #print('feature path is {}'.format(self._feat_dir))
        #print('label path is {}'.format(self._label_dir))
        # print('\tthis is {}:'.format('eval' if self._is_eval else 'dev'))
        #print('train first')
        if onlyphase:
            self._feat_dir = self._feat_dir + '_phase'

        if trial:
            self._feat_dir = self._feat_dir + '_trial' + str(trial)

            #print('trial is 1 {}'.format(self._feat_dir))

        self._filenames_list = list()

        self._nb_frames_file = 0  # Using a fixed number of frames in feat files. Updated in _get_label_filenames_sizes()
        self._feat_len = None
        self._2_nb_ch = nb_ch  #self._feat_cls.get_nb_channels() if feat_type == 'mel' or onlyphase == True else 2 * self._feat_cls.get_nb_channels()
        self._label_len = None  # total length of label - DOA + SED
        self._doa_len = None  # DOA label length
        self._class_dict = self._feat_cls.get_classes()
        self._nb_classes = len(self._class_dict.keys())
        self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr(
        )
        self._get_filenames_list_and_feat_label_sizes()

        self._nb_azi = 36  # range(-180, 180, 10)
        self._nb_ele = 9  # range(-40, 50, 10)

        self._batch_seq_len = self._batch_size * self._seq_len
        self._circ_buf_feat = None
        self._circ_buf_label = None

        if self._per_file:
            self._nb_total_batches = len(self._filenames_list)
        else:
            # ******************************************************************************************************************************** #
            self._nb_total_batches = int(
                np.floor((len(self._filenames_list) * self._nb_frames_file /
                          float(self._seq_len * self._batch_size))))
            #self._nb_total_batches = int(np.floor((len(self._filenames_list) * self._nb_frames_file / float(self._seq_len * self._batch_size))))
            # ******************************************************************************************************************************** #

        # self._dummy_feat_vec = np.ones(self._feat_len.shape) *

        print('\tDatagen_mode: {}, nb_files: {}, nb_classes: {}\n'
              '\tnb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len: {}\n'.
              format('eval' if self._is_eval else 'dev',
                     len(self._filenames_list), self._nb_classes,
                     self._nb_frames_file, self._feat_len, self._2_nb_ch,
                     self._label_len))

        print('\tDataset: {}, split: {}\n'
              '\tbatch_size: {}, seq_len: {}, shuffle: {}\n'
              '\tlabel_dir: {}\n '
              '\tfeat_dir: {}\n'.format(dataset, split, self._batch_size,
                                        self._seq_len, self._shuffle,
                                        self._label_dir, self._feat_dir))
Beispiel #14
0
    def __init__(self,
                 datagen_mode='train',
                 dataset='ansim',
                 ov=1,
                 split=1,
                 db=30,
                 batch_size=32,
                 seq_len=64,
                 shuffle=True,
                 nfft=512,
                 classifier_mode='regr',
                 weakness=0,
                 cnn3d=False,
                 xyz_def_zero=False,
                 extra_name='',
                 azi_only=False,
                 debug_load_single_batch=False,
                 data_format='channels_first',
                 params=None,
                 load_files_before_after_splitting_point=None):
        if params is None:
            params = {}
        self.params = params
        self._datagen_mode = datagen_mode
        self._classifier_mode = classifier_mode
        self._batch_size = batch_size
        self._seq_len = seq_len
        self._shuffle = shuffle
        self._feat_cls = cls_feature_class.FeatureClass(dataset=dataset,
                                                        ov=ov,
                                                        split=split,
                                                        db=db,
                                                        nfft=nfft)
        self._label_dir = self._feat_cls.get_label_dir(classifier_mode,
                                                       weakness, extra_name)
        self._feat_dir = self._feat_cls.get_normalized_feat_dir(extra_name)
        self._thickness = weakness
        self._xyz_def_zero = xyz_def_zero
        self._azi_only = azi_only
        self._debug_load_single_batch = debug_load_single_batch
        self._data_format = data_format

        self._nb_frames_file = 0  # Assuming number of frames in feat files are the same
        self._feat_len = None
        self._2_nb_ch = 2 * self._feat_cls.get_nb_channels()
        self._label_len = None  # total length of label - DOA + SED
        self._doa_len = None  # DOA label length
        self._class_dict = self._feat_cls.get_classes()
        self._nb_classes = len(self._class_dict.keys())
        self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr(
        )
        self._is_cnn3d_model = cnn3d

        self._filenames_list = []
        self.create_filenames_list(load_files_before_after_splitting_point)

        self.get_feature_label_shapes()

        self._batch_seq_len = self._batch_size * self._seq_len
        self._circ_buf_feat = None
        self._circ_buf_label = None

        if self._debug_load_single_batch:
            num_files_for_one_batch = int(
                np.ceil(float(self._batch_seq_len) / self._nb_frames_file))
            num_files_for_one_batch = max(num_files_for_one_batch, 1)
            self._filenames_list = self._filenames_list[:
                                                        num_files_for_one_batch]

        self._nb_total_batches = int(
            np.floor((len(self._filenames_list) * self._nb_frames_file /
                      float(self._batch_seq_len))))
        logger.info(
            f"Data generator {datagen_mode}: {self._nb_total_batches} batches per epoch."
        )
        assert (self._nb_total_batches >= 1)

        logger.info(
            'Datagen_mode: {}, nb_files: {}, nb_classes:{}\n'
            'nb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'.
            format(self._datagen_mode, len(self._filenames_list),
                   self._nb_classes, self._nb_frames_file, self._feat_len,
                   self._2_nb_ch, self._label_len))

        logger.info('Dataset: {}, ov: {}, split: {}\n'
                    'batch_size: {}, seq_len: {}, shuffle: {}\n'
                    'label_dir: {}\n '
                    'feat_dir: {}\n'.format(dataset, ov, split,
                                            self._batch_size, self._seq_len,
                                            self._shuffle, self._label_dir,
                                            self._feat_dir))

        logger.debug("Complete file list:")
        for file_name in self._filenames_list:
            logger.debug(file_name)