Example #1
0
 def __init__(self, threshold_doa=20, threshold_sed=0.5):
     """
     :return metrics for doa, sed or seld, 2019 or 2020
     :type threshold_sed: float in (0,1), classify threshold
     :type threshold_doa: float in (0, 180), maximum threshold error for doa
     """
     self._params = get_params("4")
     self.feat_cls = FeatureClass(self._params)
     self._threshold_sed = threshold_sed
     self._threshold_doa = threshold_doa
def collect_test_labels(_data_gen_test, _data_out, classification_mode, quick_test):
    # Collecting ground truth for test data
    params = parameter.get_params('1')
    nb_batch = params['quick_test_nb_batch'] if quick_test else _data_gen_test.get_total_batches_in_data()

    batch_size = _data_out[0][0]
    gt_sed = np.zeros((nb_batch * batch_size, _data_out[0][1], _data_out[0][2]))
    gt_doa = np.zeros((nb_batch * batch_size, _data_out[0][1], _data_out[1][2]))

    print("nb_batch in test: {}".format(nb_batch))
    cnt = 0
    for tmp_feat, tmp_label in _data_gen_test.generate():
        gt_sed[cnt * batch_size:(cnt + 1) * batch_size, :, :] = tmp_label[0]
        gt_doa[cnt * batch_size:(cnt + 1) * batch_size, :, :] = tmp_label[1]
        cnt = cnt + 1
        print(cnt)

        if cnt == nb_batch:
            break
    return gt_sed.astype(int), gt_doa
Example #3
0
    def _get_label_filenames_sizes(self):
        #for filename in os.listdir(self._label_dir):
        #    if self._datagen_mode in filename:
        #        self._filenames_list.append(filename)

        #1 stands for default configuration
        _params = parameter.get_params('1')
        cnt_train = 0
        cnt_test = 0

        for filename in os.listdir(self._label_dir):
            if self._datagen_mode == "train":
                for split_n in _params["train_split"]:
                    if "split" + str(split_n) in filename:
                        self._filenames_list.append(filename)
                        print("TRAIN " + str(cnt_train) + ": " + filename)
                        cnt_train = cnt_train + 1
            elif self._datagen_mode == "validation":
                if "split" + str(self._split) in filename:
                    self._filenames_list.append(filename)
                    print("VALID " + str(cnt_test) + ": " + filename)
                    cnt_test = cnt_test + 1
            else:
                if ("split" + str(self._split)
                        in filename) and ("ov" + str(self._ov_num)
                                          in filename):
                    self._filenames_list.append(filename)
                    print("TEST " + str(cnt_test) + ": " + filename)
                    cnt_test = cnt_test + 1

        temp_feat = np.load(
            os.path.join(self._feat_dir, self._filenames_list[0]))
        self._nb_frames_file = temp_feat.shape[0]
        self._feat_len = int(temp_feat.shape[1] / self._2_nb_ch)

        temp_label = np.load(
            os.path.join(self._label_dir, self._filenames_list[0]))
        self._label_len = temp_label.shape[-1]
        self._doa_len = (self._label_len - self._nb_classes) / self._nb_classes
        return
 def __init__(self,
              seq_len,
              splits=[
                  1,
              ],
              random_shuffle=False,
              len_restrict=0,
              with_conj=False,
              rotate=None,
              output_trim=0,
              nb_freq_bins_use=None,
              direction_bias=None,
              direction_bias_additional=None,
              single_source_case_only=False,
              test_mode=False):
     self.params = parameter.get_params()
     self._seq_len = seq_len
     self._splits = np.array(splits)
     self._data_dir = self.params["dataset_dir"]
     self._label_dir = os.path.join(self._data_dir, 'label')
     self._feat_dir = os.path.join(self._data_dir, 'foa_norm')
     self._nb_classes = 11
     self._nb_ch = 4
     self._2_nb_ch = 2 * self._nb_ch
     self._nondeteministic_shuffle = random_shuffle
     self._filenames_list = list()
     self.gen_data_file_name_list(len_restrict)
     self._available_cases = list()
     self.single_source_case_only = single_source_case_only
     self.gen_available_cases()
     self.with_conj = with_conj
     self.rotate = rotate
     self.output_trim = output_trim
     self._nb_freq_bins_use = nb_freq_bins_use
     self._direction_bias = direction_bias
     self._direction_bias_additional = direction_bias_additional
Example #5
0
def main(args):
    '''
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
    '''
    # use parameter set defined by user
    dataset, mode, task_id, job_id = args.dataset, args.mode, args.name, args.job_id
    task = 'sed'
    feat_type = 'mel'
    nb_ch = 4
    doa_type = None
    params, model_params = parameter.get_params(dataset=dataset,
                                                mode=mode,
                                                task_id=task_id,
                                                feat_type=feat_type,
                                                doa=doa_type)

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        test_splits = [1, 2, 3, 4]
        val_splits = [2, 3, 4, 1]
        train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print('\nThis is split {}'.format(split_cnt))

        # Unique name for the run
        model_dir_prefix = os.path.join(
            params['model_dir'], task) if task == 'sed' else os.path.join(
                params['model_dir'], 'doa_reg')
        cls_feature_class.create_folder(model_dir_prefix)
        #model_id = int(job_id) + split_cnt
        unique_name = '{}{}_{}_{}_sed_dev_split{}'.format(
            task_id, str(job_id), params['dataset'], params['feat_type'],
            split_cnt + 1)
        unique_name = os.path.join(model_dir_prefix, unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print('\tmodel unique name: {}\n'.format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=train_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=params['seq_length'],
            feat_label_dir=params['feat_label_dir'],
            feat_type=feat_type,
            doa=doa_type)

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=val_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=3000,
            per_file=True,
            feat_label_dir=params['feat_label_dir'],
            shuffle=False,
            feat_type=feat_type,
            doa=doa_type)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        gt = collect_test_labels_3000(data_gen_val)
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt)  # [3000*100, 11]
        nb_classes = data_gen_train.get_nb_classes()
        def_elevation = data_gen_train.get_default_elevation()
        if task_id == 'crnn':
            model = CUDA(CRNN_SED(data_in, data_out[0]))
        elif task_id == 'mcrnn':
            model = CUDA(MCRNN_SED(data_in, data_out[0]))
        model.apply(kaiming_init)

        total_num = sum(param.numel() for param in model.parameters())
        print('==========================================')
        print('Total parameter number for {}: {}'.format(
            model_params['method'], total_num))
        print('==========================================')

        # Pytorch optimizer
        optimizer = optim.Adam(params=model.parameters(), lr=0.001)
        feat_torch = CUDA(
            Variable(
                torch.FloatTensor(params['batch_size'], nb_ch,
                                  params['seq_length'], params['feat_dim'])))
        label_sed = CUDA(
            Variable(
                torch.FloatTensor(params['batch_size'], params['seq_length'],
                                  11)))
        best_seld_metric = 99999
        best_sed_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        seld_metric = np.zeros(params['nb_epochs'])
        tr_loss = np.zeros(params['nb_epochs'])
        sed_val_loss = np.zeros(params['nb_epochs'])
        sed_metric = np.zeros((params['nb_epochs'], 2))
        nb_epoch = params['nb_epochs']

        # start training
        pbar_epoch = tqdm(total=nb_epoch, desc='[Epoch]')
        for epoch_cnt in range(nb_epoch):
            # train stage
            model.train()
            iter_cnt = 0
            for feat, label in data_gen_train.generate():
                feat_torch.resize_(params['batch_size'], nb_ch,
                                   params['seq_length'], params['feat_dim'])
                feat_torch.data.copy_(torch.from_numpy(feat))

                label_sed.resize_(params['batch_size'], params['seq_length'],
                                  11)
                label_sed.data.copy_(torch.from_numpy(label[0]))
                sed = model(feat_torch)

                sed_loss = bce_loss(sed, label_sed)
                doa_loss = 0.0

                total_loss = sed_loss + doa_loss

                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                if iter_cnt % params['print_iter'] == 0:
                    pbar_epoch.write(
                        'Iteration: {:3d}, sed_loss: {:.4f}, doa_loss: {:.4f}, total_loss: {:.4f}'
                        .format(iter_cnt, sed_loss, doa_loss, total_loss))

                #pbar_iteration.update(1)
                iter_cnt += 1
                if iter_cnt >= data_gen_train.get_total_batches_in_data():
                    break
            iter_cnt = 0
            sed_validation_loss = 0
            entire_pred_sed = np.zeros(
                (data_gen_val._batch_size *
                 data_gen_val.get_total_batches_in_data(), 3000, 11))
            model.eval()
            with torch.no_grad():
                for feat, label in data_gen_val.generate():
                    batch_size = feat.shape[0]

                    feat_torch.resize_(batch_size, nb_ch, 3000,
                                       params['feat_dim'])
                    feat_torch.data.copy_(torch.from_numpy(feat))
                    label_sed.resize_(batch_size, 3000, 11)
                    label_sed.copy_(torch.from_numpy(label[0]))

                    sed = model(feat_torch)
                    sed_loss = bce_loss(sed, label_sed)
                    sed_validation_loss += sed_loss

                    # concat all predictions
                    entire_pred_sed[
                        iter_cnt * batch_size:(iter_cnt + 1) *
                        batch_size, :] = sed.detach().cpu().numpy()
                    iter_cnt += 1
                    if iter_cnt >= data_gen_val.get_total_batches_in_data():
                        break
            sed_validation_loss = sed_validation_loss / data_gen_val.get_total_batches_in_data(
            )

            tr_loss[epoch_cnt] = total_loss
            sed_val_loss[epoch_cnt] = sed_validation_loss

            # Calculate the metrics
            sed_pred = evaluation_metrics.reshape_3Dto2D(
                entire_pred_sed) > params[
                    'threshold']  # compared with threshold
            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())

            patience_cnt += 1
            if sed_metric[epoch_cnt, 0] < best_sed_metric:
                best_sed_metric = sed_metric[epoch_cnt, 0]
                best_epoch = epoch_cnt
                save_model(model, model_name)
                patience_cnt = 0

            pbar_epoch.update(1)

            pbar_epoch.write(
                'epoch_cnt: %d, sed_tr_loss: %.4f, sed_val_loss: %.4f, ER_overall: %.2f, F1_overall: %.2f, best_sed_ER: %.4f, best_epoch : %d\n'
                % (epoch_cnt, tr_loss[epoch_cnt], sed_val_loss[epoch_cnt],
                   sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1],
                   best_sed_metric, best_epoch))

            if patience_cnt >= params['patience']:
                break

        pbar_epoch.close()

        avg_scores_val.append(
            [sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]]
        )  #, doa_metric[best_epoch, 0], doa_metric[best_epoch, 1], best_seld_metric])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
            sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print('Loading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=split,
            batch_size=params['batch_size'],
            seq_len=3000,
            feat_label_dir=params['feat_label_dir'],
            shuffle=False,
            per_file=True,
            is_eval=True if params['mode'] is 'eval' else False,  #False
            feat_type=feat_type,
            doa=doa_type)
        test_batch_size = data_gen_test._batch_size

        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        model = load_model(model, '{}_model.h5'.format(unique_name))
        model.eval()

        # test stage
        total_test_batches = data_gen_test.get_total_batches_in_data()
        pbar_test = tqdm(total=total_test_batches, desc='[Testing]')
        iter_cnt = 0
        entire_test_sed = np.zeros((100, 3000, 11))
        with torch.no_grad():
            if params['mode'] == 'dev':
                for feat, label in data_gen_test.generate():
                    batch_size = feat.shape[0]

                    feat_torch.data.resize_(batch_size, nb_ch, 3000,
                                            params['feat_dim'])
                    feat_torch.data.copy_(torch.from_numpy(feat))

                    sed = model(feat_torch)
                    # concat all predictions
                    entire_test_sed[
                        iter_cnt * test_batch_size:(iter_cnt + 1) *
                        test_batch_size, :] = sed.detach().cpu().numpy()
                    pbar_test.update(1)
                    iter_cnt += 1
                    if iter_cnt >= data_gen_test.get_total_batches_in_data():
                        break
        print('the test batch_size is{}'.format(batch_size))
        pbar_test.close()

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(
            entire_test_sed) > params['threshold']
        if params['mode'] == 'dev':
            _, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels_3000(data_gen_test)
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt)
            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            avg_scores_test.append([test_sed_loss[0], test_sed_loss[1]])
            print('Results on test split:')
            print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
                test_sed_loss[0], test_sed_loss[1]))

    print('\n\nValidation split scores per fold:\n')
    for cnt in range(len(val_splits)):
        print('\t Split {} - SED ER: {} F1: {}'.format(val_splits[cnt],
                                                       avg_scores_val[cnt][0],
                                                       avg_scores_val[cnt][1]))

    if params['mode'] == 'dev':
        print('\n\nTesting split scores per fold:\n')
        for cnt in range(len(val_splits)):
            print('\t Split {} - SED ER: {} F1: {}'.format(
                test_splits[cnt], avg_scores_test[cnt][0],
                avg_scores_test[cnt][1]))
Example #6
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    print(argv)
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    feat_cls = cls_feature_class.FeatureClass(params)
    train_splits, val_splits, test_splits = None, None, None

    if params['mode'] == 'dev':
        test_splits = [6]
        val_splits = [5]
        train_splits = [[1, 2, 3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [[7, 8]]
        val_splits = [[6]]
        train_splits = [[1, 2, 3, 4, 5]]

    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            params=params, split=train_splits[split_cnt])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            params=params,
            split=val_splits[split_cnt],
            shuffle=False,
            per_file=True,
            is_eval=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        nb_classes = data_gen_train.get_nb_classes()
        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['f_pool_size'], params['t_pool_size'],
                    params['rnn_size'], params['fnn_size'],
                    params['doa_objective']))

        print('Using loss weights : {}'.format(params['loss_weights']))
        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      f_pool_size=params['f_pool_size'],
                                      t_pool_size=params['t_pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'],
                                      doa_objective=params['doa_objective'],
                                      is_accdoa=params['is_accdoa'])

        # Dump results in DCASE output format for calculating final scores
        dcase_output_val_folder = os.path.join(
            params['dcase_output_dir'],
            '{}_{}_{}_val'.format(task_id, params['dataset'], params['mode']))
        cls_feature_class.delete_and_create_folder(dcase_output_val_folder)
        print('Dumping recording-wise val results in: {}'.format(
            dcase_output_val_folder))

        # Initialize evaluation metric class
        score_obj = ComputeSELDResults(params)

        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        tr_loss = np.zeros(nb_epoch)
        seld_metric = np.zeros((nb_epoch, 5))

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2,
            )
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]

            # predict once per epoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            if params['is_accdoa']:
                sed_pred, doa_pred = get_accdoa_labels(pred, nb_classes)
                sed_pred = reshape_3Dto2D(sed_pred)
                doa_pred = reshape_3Dto2D(doa_pred)
            else:
                sed_pred = reshape_3Dto2D(pred[0]) > 0.5
                doa_pred = reshape_3Dto2D(pred[1] if params['doa_objective'] is
                                          'mse' else pred[1][:, :,
                                                             nb_classes:])

            # Calculate the DCASE 2021 metrics - Location-aware detection and Class-aware localization scores
            dump_DCASE2021_results(data_gen_val, feat_cls,
                                   dcase_output_val_folder, sed_pred, doa_pred)
            seld_metric[epoch_cnt, :] = score_obj.get_SELD_Results(
                dcase_output_val_folder)

            patience_cnt += 1
            if seld_metric[epoch_cnt, -1] < best_seld_metric:
                best_seld_metric = seld_metric[epoch_cnt, -1]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, '
                '\n\t\t DCASE2021 SCORES: ER: {:0.2f}, F: {:0.1f}, LE: {:0.1f}, LR:{:0.1f}, seld_score (early stopping score): {:0.2f}, '
                'best_seld_score: {:0.2f}, best_epoch : {}\n'.format(
                    epoch_cnt,
                    time.time() - start, tr_loss[epoch_cnt],
                    seld_metric[epoch_cnt, 0], seld_metric[epoch_cnt, 1] * 100,
                    seld_metric[epoch_cnt, 2], seld_metric[epoch_cnt, 3] * 100,
                    seld_metric[epoch_cnt, -1], best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score (early stopping score) : {}'.format(
            best_seld_metric))

        print('\n\tDCASE2021 scores')
        print(
            '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}'
            .format(seld_metric[best_epoch, 2],
                    seld_metric[best_epoch, 3] * 100))
        print(
            '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
            .format(seld_metric[best_epoch, 0],
                    seld_metric[best_epoch, 1] * 100))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        print('\tLoading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            params=params,
            split=split,
            shuffle=False,
            per_file=True,
            is_eval=True if params['mode'] is 'eval' else False)

        model = keras_model.load_seld_model('{}_model.h5'.format(unique_name),
                                            params['doa_objective'])
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)
        if params['is_accdoa']:
            test_sed_pred, test_doa_pred = get_accdoa_labels(
                pred_test, nb_classes)
            test_sed_pred = reshape_3Dto2D(test_sed_pred)
            test_doa_pred = reshape_3Dto2D(test_doa_pred)
        else:
            test_sed_pred = reshape_3Dto2D(pred_test[0]) > 0.5
            test_doa_pred = reshape_3Dto2D(
                pred_test[1] if params['doa_objective'] is 'mse' else
                pred_test[1][:, :, nb_classes:])

        # Dump results in DCASE output format for calculating final scores
        dcase_output_test_folder = os.path.join(
            params['dcase_output_dir'],
            '{}_{}_{}_test'.format(task_id, params['dataset'], params['mode']))
        cls_feature_class.delete_and_create_folder(dcase_output_test_folder)
        print('Dumping recording-wise test results in: {}'.format(
            dcase_output_test_folder))
        dump_DCASE2021_results(data_gen_test, feat_cls,
                               dcase_output_test_folder, test_sed_pred,
                               test_doa_pred)

        if params['mode'] is 'dev':
            # Calculate DCASE2021 scores
            test_seld_metric = score_obj.get_SELD_Results(
                dcase_output_test_folder)

            print('Results on test split:')
            print('\tDCASE2021 Scores')
            print(
                '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}'
                .format(test_seld_metric[2], test_seld_metric[3] * 100))
            print(
                '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_seld_metric[0], test_seld_metric[1] * 100))
            print('\tSELD (early stopping metric): {:0.2f}'.format(
                test_seld_metric[-1]))
                    # Calculated scores
                    eval.update_seld_scores(pred_labels, self._ref_labels[pred_file])

                # Overall SED and DOA scores
                ER, F, LE, LR = eval.compute_seld_scores()
                seld_scr = SELD_evaluation_metrics.early_stopping_metric([ER, F], [LE, LR])

                print('\nAverage score for {} {} data using {} coordinates'.format(score_type, 'fold' if score_type=='all' else split_key, 'Polar' if self._use_polar_format else 'Cartesian' ))
                print('SELD score (early stopping metric): {:0.2f}'.format(seld_scr))
                print('SED metrics: Error rate: {:0.2f}, F-score:{:0.1f}'.format(ER, 100*F))
                print('DOA metrics: Localization error: {:0.1f}, Localization Recall: {:0.1f}'.format(LE, 100*LR))

def reshape_3Dto2D(A):
    return A.reshape(A.shape[0] * A.shape[1], A.shape[2])


if __name__ == "__main__":
    pred_output_format_files = 'results/4_foa_dev_test' # Path of the DCASEoutput format files

    # Compute just the DCASE 2021 final results 
    score_obj = ComputeSELDResults(parameter.get_params())
    ER, F, LE, LR, seld_scr = score_obj.get_SELD_Results(pred_output_format_files)
    print('SELD score (early stopping metric): {:0.2f}'.format(seld_scr))
    print('SED metrics: Error rate: {:0.2f}, F-score:{:0.1f}'.format(ER, 100*F))
    print('DOA metrics: Localization error: {:0.1f}, Localization Recall: {:0.1f}'.format(LE, 100*LR))

    # Compute DCASE 2021 results along with room-wise performance
    score_obj.get_consolidated_SELD_results(pred_output_format_files)

Example #8
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    print(argv)
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    feat_cls = cls_feature_class.FeatureClass(params)
    train_splits, val_splits, test_splits = None, None, None

    if params['mode'] == 'dev':
        test_splits = [1]
        val_splits = [2]
        train_splits = [[3, 4, 5, 6]]

    elif params['mode'] == 'eval':
        test_splits = [[7, 8]]
        val_splits = [[1]]
        train_splits = [[2, 3, 4, 5, 6]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            params=params, split=train_splits[split_cnt])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            params=params, split=val_splits[split_cnt], shuffle=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        nb_classes = data_gen_train.get_nb_classes()
        gt = collect_test_labels(data_gen_val, data_out, nb_classes,
                                 params['quick_test'])
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
        doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['f_pool_size'], params['t_pool_size'],
                    params['rnn_size'], params['fnn_size'],
                    params['doa_objective']))

        print('Using loss weights : {}'.format(params['loss_weights']))
        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      f_pool_size=params['f_pool_size'],
                                      t_pool_size=params['t_pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'],
                                      doa_objective=params['doa_objective'])
        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        seld_metric = np.zeros(nb_epoch)
        new_seld_metric = np.zeros(nb_epoch)
        tr_loss = np.zeros(nb_epoch)
        doa_metric = np.zeros((nb_epoch, 6))
        sed_metric = np.zeros((nb_epoch, 2))
        new_metric = np.zeros((nb_epoch, 4))

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2,
            )
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]

            # predict once per peoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(
                pred[1]
                if params['doa_objective'] is 'mse' else pred[1][:, :,
                                                                 nb_classes:])

            # Calculate the DCASE 2019 metrics - Detection-only and Localization-only scores
            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())
            doa_metric[
                epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr_xyz(
                    doa_pred, doa_gt, sed_pred, sed_gt)
            seld_metric[epoch_cnt] = evaluation_metrics.early_stopping_metric(
                sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :])

            # Calculate the DCASE 2020 metrics - Location-aware detection and Class-aware localization scores
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=data_gen_val.get_nb_classes(),
                doa_threshold=params['lad_doa_thresh'])
            pred_dict = feat_cls.regression_label_format_to_output_format(
                sed_pred, doa_pred)
            gt_dict = feat_cls.regression_label_format_to_output_format(
                sed_gt, doa_gt)

            pred_blocks_dict = feat_cls.segment_labels(pred_dict,
                                                       sed_pred.shape[0])
            gt_blocks_dict = feat_cls.segment_labels(gt_dict, sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(pred_blocks_dict,
                                                  gt_blocks_dict)
            new_metric[epoch_cnt, :] = cls_new_metric.compute_seld_scores()
            new_seld_metric[
                epoch_cnt] = evaluation_metrics.early_stopping_metric(
                    new_metric[epoch_cnt, :2], new_metric[epoch_cnt, 2:])

            # Visualize the metrics with respect to epochs
            plot_functions(unique_name, tr_loss, sed_metric, doa_metric,
                           seld_metric, new_metric, new_seld_metric)

            patience_cnt += 1
            if new_seld_metric[epoch_cnt] < best_seld_metric:
                best_seld_metric = new_seld_metric[epoch_cnt]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, '
                '\n\t\t DCASE2019 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, FR:{:0.1f}, seld_score: {:0.2f}, '
                '\n\t\t DCASE2020 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, DE_F:{:0.1f}, seld_score (early stopping score): {:0.2f}, '
                'best_seld_score: {:0.2f}, best_epoch : {}\n'.format(
                    epoch_cnt,
                    time.time() - start, tr_loss[epoch_cnt],
                    sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1] * 100,
                    doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1] * 100,
                    seld_metric[epoch_cnt], new_metric[epoch_cnt, 0],
                    new_metric[epoch_cnt, 1] * 100, new_metric[epoch_cnt, 2],
                    new_metric[epoch_cnt, 3] * 100, new_seld_metric[epoch_cnt],
                    best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        avg_scores_val.append([
            new_metric[best_epoch, 0], new_metric[best_epoch, 1],
            new_metric[best_epoch, 2], new_metric[best_epoch,
                                                  3], best_seld_metric
        ])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score (early stopping score) : {}'.format(
            best_seld_metric))

        print('\n\tDCASE2020 scores')
        print(
            '\tClass-aware localization scores: DOA_error: {:0.1f}, F-score: {:0.1f}'
            .format(new_metric[best_epoch, 2],
                    new_metric[best_epoch, 3] * 100))
        print(
            '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
            .format(new_metric[best_epoch, 0],
                    new_metric[best_epoch, 1] * 100))

        print('\n\tDCASE2019 scores')
        print(
            '\tLocalization-only scores: DOA_error: {:0.1f}, Frame recall: {:0.1f}'
            .format(doa_metric[best_epoch, 0],
                    doa_metric[best_epoch, 1] * 100))
        print(
            '\tDetection-only scores: Error rate: {:0.2f}, F-score: {:0.1f}\n'.
            format(sed_metric[best_epoch, 0], sed_metric[best_epoch, 1] * 100))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        print('\tLoading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            params=params,
            split=split,
            shuffle=False,
            per_file=params['dcase_output'],
            is_eval=True if params['mode'] is 'eval' else False)

        model = keras_model.load_seld_model('{}_model.h5'.format(unique_name),
                                            params['doa_objective'])
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
        test_doa_pred = evaluation_metrics.reshape_3Dto2D(
            pred_test[1]
            if params['doa_objective'] is 'mse' else pred_test[1][:, :,
                                                                  nb_classes:])

        if params['dcase_output']:
            # Dump results in DCASE output format for calculating final scores
            dcase_dump_folder = os.path.join(
                params['dcase_dir'],
                '{}_{}_{}'.format(task_id, params['dataset'], params['mode']))
            cls_feature_class.create_folder(dcase_dump_folder)
            print('Dumping recording-wise results in: {}'.format(
                dcase_dump_folder))

            test_filelist = data_gen_test.get_filelist()
            # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
            max_frames_with_content = data_gen_test.get_nb_frames()

            # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
            # zero padding in the remaining frames
            frames_per_file = data_gen_test.get_frame_per_file()

            for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
                output_file = os.path.join(
                    dcase_dump_folder,
                    test_filelist[file_cnt].replace('.npy', '.csv'))
                dc = file_cnt * frames_per_file
                output_dict = feat_cls.regression_label_format_to_output_format(
                    test_sed_pred[dc:dc + max_frames_with_content, :],
                    test_doa_pred[dc:dc + max_frames_with_content, :])
                data_gen_test.write_output_format_file(output_file,
                                                       output_dict)

        if params['mode'] is 'dev':
            test_data_in, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels(data_gen_test, test_data_out,
                                          nb_classes, params['quick_test'])
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
            test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])

            # Calculate DCASE2019 scores
            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            test_doa_loss = evaluation_metrics.compute_doa_scores_regr_xyz(
                test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
            test_metric_loss = evaluation_metrics.early_stopping_metric(
                test_sed_loss, test_doa_loss)

            # Calculate DCASE2020 scores
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=data_gen_test.get_nb_classes(), doa_threshold=20)
            test_pred_dict = feat_cls.regression_label_format_to_output_format(
                test_sed_pred, test_doa_pred)
            test_gt_dict = feat_cls.regression_label_format_to_output_format(
                test_sed_gt, test_doa_gt)

            test_pred_blocks_dict = feat_cls.segment_labels(
                test_pred_dict, test_sed_pred.shape[0])
            test_gt_blocks_dict = feat_cls.segment_labels(
                test_gt_dict, test_sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(test_pred_blocks_dict,
                                                  test_gt_blocks_dict)
            test_new_metric = cls_new_metric.compute_seld_scores()
            test_new_seld_metric = evaluation_metrics.early_stopping_metric(
                test_new_metric[:2], test_new_metric[2:])

            avg_scores_test.append([
                test_new_metric[0], test_new_metric[1], test_new_metric[2],
                test_new_metric[3], test_new_seld_metric
            ])
            print('Results on test split:')

            print('\tDCASE2020 Scores')
            print(
                '\tClass-aware localization scores: DOA Error: {:0.1f}, F-score: {:0.1f}'
                .format(test_new_metric[2], test_new_metric[3] * 100))
            print(
                '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_new_metric[0], test_new_metric[1] * 100))
            print('\tSELD (early stopping metric): {:0.2f}'.format(
                test_new_seld_metric))

            print('\n\tDCASE2019 Scores')
            print(
                '\tLocalization-only scores: DOA Error: {:0.1f}, Frame recall: {:0.1f}'
                .format(test_doa_loss[0], test_doa_loss[1] * 100))
            print(
                '\tDetection-only scores:Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_sed_loss[0], test_sed_loss[1] * 100))
Example #9
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
        second input: task_id - (optional) To chose the system configuration in parameters.py. 
                                (default) uses default parameters
    
    if len(argv) != 4:
        logger.info('\n\n')
        logger.info('-------------------------------------------------------------------------------------------------------')
        logger.info('The code expected three inputs')
        logger.info('\t>> python seld.py <job-id> <train-test> <task-id>')
        logger.info('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
              'You can use any number or string for this.')
        logger.info('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py')
        logger.info('Using default inputs for now')
        logger.info('-------------------------------------------------------------------------------------------------------')
        logger.info('\n\n')
	"""
    job_id = 1 if len(argv) < 2 else argv[1]

    # use parameter set defined by user
    task_id = '1' if len(argv) < 3 else argv[2]
    params = parameter.get_params(task_id)

    isTraining = True if len(argv) < 4 else (True if argv[3] == 'train' else False)
    logger.info(f"isTraining {isTraining}")

    log_dir_name = None if len(argv) < 5 else argv[4]
    if not log_dir_name and not isTraining:
        raise ValueError("Specify log_dir if evaluation mode")

    model_dir = os.path.join(os.pardir, 'models')
    if isTraining:
        utils.create_folder(model_dir)

    unique_name = '{}_ov{}_train{}_val{}_{}'.format(
        params['dataset'], list_to_string(params['overlap']), list_to_string(params['train_split']),
        list_to_string(params['val_split']),
        job_id)
    
    if not isTraining:
        unique_name = job_id
        
    logger.info(f"unique_name: {unique_name}")

    dnn_type = 'QTCN' if params['use_quaternions'] else params['recurrent_type']
    if not log_dir_name:
        log_dir_name = "-".join([dnn_type, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")])
    logger.info(f"log_dir_name: {log_dir_name}")

    log_dir = os.path.join(model_dir, unique_name, log_dir_name)
    logger.info(f"log_dir: {log_dir}")

    if isTraining:
        utils.create_folder(log_dir)

    utils.setup_logger(log_dir, console_logger_level=logging.INFO)
    
    logger.info(f"log_dir {log_dir}")
    logger.info("unique_name: {}\n".format(unique_name))
    
    data_gen_train = None
    data_gen_val = None
    data_gen_test = None
    if isTraining:
        load_files_train_splitting_point = None if params['train_val_split'] == 1.0 else 'before'
        data_gen_train = cls_data_generator.DataGenerator(
            dataset=params['dataset'], ov=params['overlap'], split=params['train_split'], db=params['db'],
            nfft=params['nfft'],
            batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
            weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'],
            xyz_def_zero=params['xyz_def_zero'],
            azi_only=params['azi_only'], debug_load_single_batch=params['debug_load_single_batch'],
            data_format=params['data_format'], params=params,
            load_files_before_after_splitting_point=load_files_train_splitting_point
        )

        if not params['quick_test']:
            load_files_val_splitting_point = None if params['train_val_split'] == 1.0 else 'after'
            data_gen_val = cls_data_generator.DataGenerator(
                dataset=params['dataset'], ov=params['overlap'], split=params['val_split'], db=params['db'],
                nfft=params['nfft'],
                batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
                weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'],
                xyz_def_zero=params['xyz_def_zero'],
                azi_only=params['azi_only'], shuffle=False, debug_load_single_batch=params['debug_load_single_batch'],
                data_format=params['data_format'], params=params,
                load_files_before_after_splitting_point=load_files_val_splitting_point
            )
        else:
            import copy
            data_gen_val = copy.deepcopy(data_gen_train)
            logger.warning(f"Quick test, validation set is a deep copy of training set.")

    else:
        data_gen_test = cls_data_generator.DataGenerator(
            dataset=params['dataset'], ov=params['overlap'], split=params['test_split'], db=params['db'],
            nfft=params['nfft'],
            batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
            weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'],
            xyz_def_zero=params['xyz_def_zero'],
            azi_only=params['azi_only'], shuffle=False, debug_load_single_batch=params['debug_load_single_batch'],
            data_format=params['data_format'], params=params
        )

    data_gen_for_shapes = data_gen_train if isTraining else data_gen_test
    data_in, data_out = data_gen_for_shapes.get_data_sizes()
    logger.info(
        'FEATURES:\n'
        '\tdata_in: {}\n'
        '\tdata_out: {}\n'.format(
            data_in, data_out
        )
    )

    logger.info(
        'MODEL:\n'
        '\tdropout_rate: {}\n'
        '\tCNN: nb_cnn_filt: {}, pool_size{}\n'
        '\trnn_size: {}, fnn_size: {}\n'.format(
            params['dropout_rate'],
            params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'],
            params['rnn_size'], params['fnn_size']
        )
    )

    network.set_global_num_classes(params)
    keras.backend.set_image_data_format(params['data_format'])
    logger.info(f"Data format set to {params['data_format']}")
    
    model = None
    if isTraining:
        if params['use_quaternions']:
            assert (params['data_format'] == 'channels_last')

        if params['use_giusenso']:
            assert (params['data_format'] == 'channels_first')
            model = keras_model_giusenso.get_model_giusenso(data_in, data_out, params['dropout_rate'],
                                                            params['nb_cnn2d_filt'],
                                                            params['pool_size'], params['fnn_size'], params['loss_weights'])
        else:
            model = network.get_model(input_shape=data_in, output_shape=data_out, dropout_rate=params['dropout_rate'],
                                      pool_size=params['pool_size'],
                                      rnn_size=params['rnn_size'], fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'], data_format=params['data_format'],
                                      params=params)
    
    model_path = os.path.join(log_dir, 'model')
    logger.info(f"model_path {model_path}")
    if os.path.exists(model_path):
        logger.info(f"Loading pretrained model from {model_path}")
        model = network.load_seld_model(model_path, params['doa_objective'])
    else:
        if not isTraining:
            raise FileNotFoundError(f"test mode but model was not found at {os.path.abspath(model_path)}")

    try:
        dot_img_file = os.path.join(log_dir, 'model_plot.png')
        keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True)
    except ImportError:
        logger.warning(f"Failed to import pydot, skip plotting")

    if isTraining:
        utils.copy_source_code(log_dir)
        train(model, data_gen_train, data_gen_val, params, log_dir=log_dir, unique_name=unique_name)
    else:
        evaluate(model, data_gen_test, params, log_dir=log_dir, unique_name=unique_name)
Example #10
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        test_splits = [1, 2, 3, 4]
        val_splits = [2, 3, 4, 1]
        train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]]

        # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split.
        # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits
        # test_splits = [1]
        # val_splits = [2]
        # train_splits = [[3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [0]
        val_splits = [1]
        train_splits = [[2, 3, 4]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=train_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=val_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'],
            shuffle=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        gt = collect_test_labels(data_gen_val, data_out, params['quick_test'])
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
        doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

        # rescaling the reference elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        nb_classes = data_gen_train.get_nb_classes()
        def_elevation = data_gen_train.get_default_elevation()
        doa_gt[:,
               nb_classes:] = doa_gt[:, nb_classes:] / (180. / def_elevation)

        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, pool_size{}\n\trnn_size: {}, fnn_size: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['pool_size'], params['rnn_size'],
                    params['fnn_size']))

        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      pool_size=params['pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'])
        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        seld_metric = np.zeros(params['nb_epochs'])
        tr_loss = np.zeros(params['nb_epochs'])
        val_loss = np.zeros(params['nb_epochs'])
        doa_metric = np.zeros((params['nb_epochs'], 6))
        sed_metric = np.zeros((params['nb_epochs'], 2))
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                validation_data=data_gen_val.generate(),
                validation_steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2)
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]
            val_loss[epoch_cnt] = hist.history.get('val_loss')[-1]

            # predict once per peoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            # Calculate the metrics
            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])

            # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
            doa_pred[:,
                     nb_classes:] = doa_pred[:, nb_classes:] / (180. /
                                                                def_elevation)

            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())
            doa_metric[
                epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr(
                    doa_pred, doa_gt, sed_pred, sed_gt)
            seld_metric[epoch_cnt] = evaluation_metrics.compute_seld_metric(
                sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :])

            # Visualize the metrics with respect to epochs
            plot_functions(unique_name, tr_loss, val_loss, sed_metric,
                           doa_metric, seld_metric)

            patience_cnt += 1
            if seld_metric[epoch_cnt] < best_seld_metric:
                best_seld_metric = seld_metric[epoch_cnt]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, '
                'ER_overall: %.2f, F1_overall: %.2f, '
                'doa_error_pred: %.2f, good_pks_ratio:%.2f, '
                'seld_score: %.2f, best_seld_score: %.2f, best_epoch : %d\n' %
                (epoch_cnt, time.time() - start, tr_loss[epoch_cnt],
                 val_loss[epoch_cnt], sed_metric[epoch_cnt,
                                                 0], sed_metric[epoch_cnt, 1],
                 doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1],
                 seld_metric[epoch_cnt], best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        avg_scores_val.append([
            sed_metric[best_epoch, 0], sed_metric[best_epoch, 1],
            doa_metric[best_epoch, 0], doa_metric[best_epoch,
                                                  1], best_seld_metric
        ])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score: {}'.format(best_seld_metric))
        print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
            doa_metric[best_epoch, 0], doa_metric[best_epoch, 1]))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
            sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print('Loading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=split,
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'],
            shuffle=False,
            per_file=params['dcase_output'],
            is_eval=True if params['mode'] is 'eval' else False)

        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        model = load_model('{}_model.h5'.format(unique_name))
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
        test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1])

        # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / (
            180. / def_elevation)

        if params['dcase_output']:
            # Dump results in DCASE output format for calculating final scores
            dcase_dump_folder = os.path.join(
                params['dcase_dir'],
                '{}_{}_{}'.format(task_id, params['dataset'], params['mode']))
            cls_feature_class.create_folder(dcase_dump_folder)
            print('Dumping recording-wise results in: {}'.format(
                dcase_dump_folder))

            test_filelist = data_gen_test.get_filelist()
            # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
            max_frames_with_content = data_gen_test.get_nb_frames()

            # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
            # zero padding in the remaining frames
            frames_per_file = data_gen_test.get_frame_per_file()

            for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
                output_file = os.path.join(
                    dcase_dump_folder,
                    test_filelist[file_cnt].replace('.npy', '.csv'))
                dc = file_cnt * frames_per_file
                output_dict = evaluation_metrics.regression_label_format_to_output_format(
                    data_gen_test,
                    test_sed_pred[dc:dc + max_frames_with_content, :],
                    test_doa_pred[dc:dc + max_frames_with_content, :] * 180 /
                    np.pi)
                evaluation_metrics.write_output_format_file(
                    output_file, output_dict)

        if params['mode'] is 'dev':
            test_data_in, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels(data_gen_test, test_data_out,
                                          params['quick_test'])
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
            test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])
            # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose
            test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / (
                180. / def_elevation)

            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            test_doa_loss = evaluation_metrics.compute_doa_scores_regr(
                test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
            test_metric_loss = evaluation_metrics.compute_seld_metric(
                test_sed_loss, test_doa_loss)

            avg_scores_test.append([
                test_sed_loss[0], test_sed_loss[1], test_doa_loss[0],
                test_doa_loss[1], test_metric_loss
            ])
            print('Results on test split:')
            print('\tSELD_score: {},  '.format(test_metric_loss))
            print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
                test_doa_loss[0], test_doa_loss[1]))
            print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
                test_sed_loss[0], test_sed_loss[1]))

    print('\n\nValidation split scores per fold:\n')
    for cnt in range(len(val_splits)):
        print(
            '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}'
            .format(cnt, avg_scores_val[cnt][0], avg_scores_val[cnt][1],
                    avg_scores_val[cnt][2], avg_scores_val[cnt][3],
                    avg_scores_val[cnt][4]))

    if params['mode'] is 'dev':
        print('\n\nTesting split scores per fold:\n')
        for cnt in range(len(val_splits)):
            print(
                '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}'
                .format(cnt, avg_scores_test[cnt][0], avg_scores_test[cnt][1],
                        avg_scores_test[cnt][2], avg_scores_test[cnt][3],
                        avg_scores_test[cnt][4]))
Example #11
0
def main(argv):
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        # test_splits = [1, 2, 3, 4]
        # val_splits = [2, 3, 4, 1]
        # train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]]
        # TODO for debug only
        test_splits = [1]
        val_splits = [1]
        train_splits = [[1, 1]]

        # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split.
        # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits
        # test_splits = [1]
        # val_splits = [2]
        # train_splits = [[3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [0]
        val_splits = [1]
        train_splits = [[2, 3, 4]]

    # ------------------  Calculate metric scores for unseen test split ---------------------------------
    print('Loading testing dataset:')
    data_gen_test = cls_data_generator.DataGenerator(
        dataset=params['dataset'],
        split=split,
        batch_size=params['batch_size'],
        seq_len=params['sequence_length'],
        feat_label_dir=params['feat_label_dir'],
        shuffle=False,
        per_file=params['dcase_output'],
        is_eval=True if params['mode'] is 'eval' else False)

    # print('\nLoading the best model and predicting results on the testing split')
    # model = load_model('{}_model.h5'.format(unique_name))
    # pred_test = model.predict_generator(
    #     generator=data_gen_test.generate(),
    #     steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(),
    #     verbose=2
    # )

    test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
    test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1])

    # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
    test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / (
        180. / def_elevation)

    if params['dcase_output']:
        # Dump results in DCASE output format for calculating final scores
        dcase_dump_folder = os.path.join(
            params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'],
                                                   params['mode']))
        cls_feature_class.create_folder(dcase_dump_folder)
        print(
            'Dumping recording-wise results in: {}'.format(dcase_dump_folder))

        test_filelist = data_gen_test.get_filelist()
        # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
        max_frames_with_content = data_gen_test.get_nb_frames()

        # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
        # zero padding in the remaining frames
        frames_per_file = data_gen_test.get_frame_per_file()

        for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
            output_file = os.path.join(
                dcase_dump_folder,
                test_filelist[file_cnt].replace('.npy', '.csv'))
            dc = file_cnt * frames_per_file
            output_dict = evaluation_metrics.regression_label_format_to_output_format(
                data_gen_test,
                test_sed_pred[dc:dc + max_frames_with_content, :],
                test_doa_pred[dc:dc + max_frames_with_content, :] * 180 /
                np.pi)
            evaluation_metrics.write_output_format_file(
                output_file, output_dict)

    if params['mode'] is 'dev':
        _, _, test_data_out = data_gen_test.get_data_sizes()
        test_gt = collect_test_labels(data_gen_test, test_data_out,
                                      params['quick_test'])
        test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
        test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])
        # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / (
            180. / def_elevation)

        test_sed_loss = evaluation_metrics.compute_sed_scores(
            test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
        test_doa_loss = evaluation_metrics.compute_doa_scores_regr(
            test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
        test_metric_loss = evaluation_metrics.compute_seld_metric(
            test_sed_loss, test_doa_loss)

        avg_scores_test.append([
            test_sed_loss[0], test_sed_loss[1], test_doa_loss[0],
            test_doa_loss[1], test_metric_loss
        ])
        print('Results on test split:')
        print('\tSELD_score: {},  '.format(test_metric_loss))
        print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
            test_doa_loss[0], test_doa_loss[1]))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
            test_sed_loss[0], test_sed_loss[1]))
Example #12
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
        second input: task_id - (optional) To chose the system configuration in parameters.py. 
                                (default) uses default parameters
    """
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two inputs')
        print('\t>> python seld.py <job-id> <task-id>')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')
    # use parameter set defined by user
    task_id = '1' if len(argv) < 3 else argv[-1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 2 else argv[1]

    model_dir = 'models/'
    utils.create_folder(model_dir)
    unique_name = '{}_ov{}_split{}_{}{}_3d{}_{}'.format(
        params['dataset'], params['overlap'], params['split'], params['mode'],
        params['weakness'], int(params['cnn_3d']), job_id)
    unique_name = os.path.join(model_dir, unique_name)
    print("unique_name: {}\n".format(unique_name))

    data_gen_train = cls_data_generator.DataGenerator(
        dataset=params['dataset'],
        ov=params['overlap'],
        split=params['split'],
        db=params['db'],
        nfft=params['nfft'],
        batch_size=params['batch_size'],
        seq_len=params['sequence_length'],
        classifier_mode=params['mode'],
        weakness=params['weakness'],
        datagen_mode='train',
        cnn3d=params['cnn_3d'],
        xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only'])

    data_gen_test = cls_data_generator.DataGenerator(
        dataset=params['dataset'],
        ov=params['overlap'],
        split=params['split'],
        db=params['db'],
        nfft=params['nfft'],
        batch_size=params['batch_size'],
        seq_len=params['sequence_length'],
        classifier_mode=params['mode'],
        weakness=params['weakness'],
        datagen_mode='test',
        cnn3d=params['cnn_3d'],
        xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only'],
        shuffle=False)

    data_in, data_out = data_gen_train.get_data_sizes()
    print('FEATURES:\n'
          '\tdata_in: {}\n'
          '\tdata_out: {}\n'.format(data_in, data_out))

    gt = collect_test_labels(data_gen_test, data_out, params['mode'],
                             params['quick_test'])
    sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
    doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

    print('MODEL:\n'
          '\tdropout_rate: {}\n'
          '\tCNN: nb_cnn_filt: {}, pool_size{}\n'
          '\trnn_size: {}, fnn_size: {}\n'.format(
              params['dropout_rate'], params['nb_cnn3d_filt']
              if params['cnn_3d'] else params['nb_cnn2d_filt'],
              params['pool_size'], params['rnn_size'], params['fnn_size']))

    # TPU CODE FOR GOOGLE COLABORATORY
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
        tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
    tf.config.experimental_connect_to_cluster(resolver)
    # This is the TPU initialization code that has to be at the beginning.
    tf.tpu.experimental.initialize_tpu_system(resolver)
    print("All devices: ", tf.config.list_logical_devices('TPU'))

    strategy = tf.distribute.experimental.TPUStrategy(resolver)

    with strategy.scope():
        # Load or create model
        model = utils.load_model(unique_name)
        if model is None:
            model = keras_model.get_model(
                data_in=data_in,
                data_out=data_out,
                dropout_rate=params['dropout_rate'],
                nb_cnn2d_filt=params['nb_cnn2d_filt'],
                pool_size=params['pool_size'],
                rnn_size=params['rnn_size'],
                fnn_size=params['fnn_size'],
                classification_mode=params['mode'],
                weights=params['loss_weights'])
        model.summary()

    best_metric = 99999
    conf_mat = None
    best_conf_mat = None
    best_epoch = -1
    patience_cnt = 0
    epoch_metric_loss = np.zeros(params['nb_epochs'])
    sed_score = np.zeros(params['nb_epochs'])
    doa_score = np.zeros(params['nb_epochs'])
    tr_loss = np.zeros(params['nb_epochs'])
    val_loss = np.zeros(params['nb_epochs'])
    doa_loss = np.zeros((params['nb_epochs'], 6))
    sed_loss = np.zeros((params['nb_epochs'], 2))
    for epoch_cnt in range(params['nb_epochs']):
        start = time.time()
        hist = model.fit_generator(
            generator=data_gen_train.generate(),
            steps_per_epoch=5 if params['quick_test'] else
            data_gen_train.get_total_batches_in_data(),
            validation_data=data_gen_test.generate(),
            validation_steps=5 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            use_multiprocessing=False,
            epochs=1,
            verbose=1)
        tr_loss[epoch_cnt] = hist.history.get('loss')[-1]
        val_loss[epoch_cnt] = hist.history.get('val_loss')[-1]

        pred = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=5 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            use_multiprocessing=False,
            verbose=2)
        print("pred:", pred[1].shape)
        if params['mode'] == 'regr':
            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])

            sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_test.nb_frames_1s())
            if params['azi_only']:
                doa_loss[
                    epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(
                        doa_pred, doa_gt, sed_pred, sed_gt)
            else:
                doa_loss[
                    epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(
                        doa_pred, doa_gt, sed_pred, sed_gt)


#            epoch_metric_loss[epoch_cnt] = np.mean([
#                sed_loss[epoch_cnt, 0],
#                1-sed_loss[epoch_cnt, 1],
#                2*np.arcsin(doa_loss[epoch_cnt, 1]/2.0)/np.pi,
#                1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))]
#            )
            sed_score[epoch_cnt] = np.mean(
                [sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]])
            doa_score[epoch_cnt] = np.mean([
                2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi,
                1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))
            ])

        #plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, epoch_metric_loss)
        plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss,
                       sed_score, doa_score)

        patience_cnt += 1
        #        if epoch_metric_loss[epoch_cnt] < best_metric:
        #            best_metric = epoch_metric_loss[epoch_cnt]
        #            best_conf_mat = conf_mat
        #            best_epoch = epoch_cnt
        #            model.save('{}_model.h5'.format(unique_name))
        #            patience_cnt = 0
        if sed_score[epoch_cnt] < best_metric:
            best_metric = sed_score[epoch_cnt]
            best_conf_mat = conf_mat
            best_epoch = epoch_cnt
            model.save('{}_model.h5'.format(unique_name))
            patience_cnt = 0

        print(
            'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, '
            'F1_overall: %.2f, ER_overall: %.2f, '
            'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, '
            'sed_score: %.2f, doa_score: %.2f, best_error_metric: %.2f, best_epoch : %d'
            % (epoch_cnt, time.time() - start, tr_loss[epoch_cnt],
               val_loss[epoch_cnt], sed_loss[epoch_cnt, 1],
               sed_loss[epoch_cnt, 0], doa_loss[epoch_cnt, 1],
               doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] /
               float(sed_gt.shape[0]), sed_score[epoch_cnt],
               doa_score[epoch_cnt], best_metric, best_epoch))

    #plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, sed_score, doa_score, epoch_cnt)
    print('best_conf_mat : {}'.format(best_conf_mat))
    print('best_conf_mat_diag : {}'.format(np.diag(best_conf_mat)))
    print('saved model for the best_epoch: {} with best_metric: {},  '.format(
        best_epoch, best_metric))
    print(
        'DOA Metrics: doa_loss_gt: {}, doa_loss_pred: {}, good_pks_ratio: {}'.
        format(doa_loss[best_epoch, 1], doa_loss[best_epoch, 2],
               doa_loss[best_epoch, 5] / float(sed_gt.shape[0])))
    print('SED Metrics: ER_overall: {}, F1_overall: {}'.format(
        sed_loss[best_epoch, 0], sed_loss[best_epoch, 1]))
    print('unique_name: {} '.format(unique_name))
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
        second input: task_id - (optional) To chose the system configuration in parameters.py. 
                                (default) uses default parameters
    """
    if len(argv) != 3:
        print('\n\n')
        print('-------------------------------------------------------------------------------------------------------')
        print('The code expected two inputs')
        print('\t>> python seld.py <job-id> <task-id>')
        print('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
              'You can use any number or string for this.')
        print('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py')
        print('Using default inputs for now')
        print('-------------------------------------------------------------------------------------------------------')
        print('\n\n')
    # use parameter set defined by user
    task_id = '1' if len(argv) < 3 else argv[-1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 2 else argv[1]

    model_dir = 'models/'
    utils.create_folder(model_dir)
    unique_name = '{}_train{}_validation{}_seq{}'.format(params['dataset'], params['train_split'], params['val_split'], params['sequence_length'])
    unique_name = os.path.join(model_dir, unique_name)
    print("unique_name: {}\n".format(unique_name))

    # Cycling over overlaps
    for ov in range(1, params['overlap']+1):

        data_gen_test = cls_data_generator.DataGenerator(
            dataset=params['dataset'], ov=params['overlap'], ov_num=ov, split=params['test_split'], db=params['db'], nfft=params['nfft'],
            batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
            weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'],
            azi_only=params['azi_only'], shuffle=False
        )

        data_in, data_out = data_gen_test.get_data_sizes()
        n_classes = data_out[0][2]

        print(
            'FEATURES:\n'
            '\tdata_in: {}\n'
            '\tdata_out: {}\n'.format(
                data_in, data_out
            )
        )

        gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test'])
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
        doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

        print("#### Saving DOA and SED GT Values ####")
        f = open("models/doa_gt.txt", "w+")
        for elem in doa_gt:
          f.write(str(list(elem)) + "\n")
        f.close()

        f = open("models/sed_gt.txt", "w+")
        for elem in sed_gt:
          f.write(str(elem)+"\n")
        f.close()
        print("######################################")

        print(
            'MODEL:\n'
            '\tdropout_rate: {}\n'
            '\tCNN: nb_cnn_filt: {}, pool_size{}\n'
            '\trnn_size: {}, fnn_size: {}\n'.format(
                params['dropout_rate'],
                params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'],
                params['rnn_size'], params['fnn_size']
            )
        )

        model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'],
                        nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'],
                        rnn_size=params['rnn_size'], fnn_size=params['fnn_size'],
                        classification_mode=params['mode'], weights=params['loss_weights'], summary=False)

        if(os.path.exists('{}_model.ckpt'.format(unique_name))):
            print("Model found!")
            model.load_weights('{}_model.ckpt'.format(unique_name))
            for i in range(10):
                print("###")

        sed_score = np.zeros(params['nb_epochs'])
        doa_score = np.zeros(params['nb_epochs'])
        seld_score = np.zeros(params['nb_epochs'])
        tr_loss = np.zeros(params['nb_epochs'])
        val_loss = np.zeros(params['nb_epochs'])
        doa_loss = np.zeros((params['nb_epochs'], 6))
        sed_loss = np.zeros((params['nb_epochs'], 2))

        epoch_cnt = 0
        start = time.time()

        print("#### Prediction on validation split ####")
        pred = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=params['quick_test_steps'] if params['quick_test'] else data_gen_test.get_total_batches_in_data(),
            use_multiprocessing=False,
            workers=1,
            verbose=1,
        )
        print("##########################")
        #print("pred:", pred[1].shape)

        if params['mode'] == 'regr':
            sed_pred = np.array(evaluation_metrics.reshape_3Dto2D(pred[0])) > .5
            doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])

            print("#### Saving DOA and SED Pred Values ####")
            f = open("models/doa_pred.txt", "w+")
            for elem in doa_pred:
              f.write(str(list(elem)) + "\n")
            f.close()

            f = open("models/sed_pred.txt", "w+")
            for elem in sed_pred:
              f.write(str(elem)+"\n")
            f.close()
            print("########################################")

            # Old version of confidence intervals
            '''
            # Computing confidence intervals
            sed_err = sed_gt - sed_pred
            [sed_conf_low, sed_conf_up, sed_median] = compute_confidence(sed_err)
            # print("Condidence Interval for SED error is [" + str(sed_conf_low) + ", " + str(sed_conf_up) + "]")
            print("Confidence Interval for SED error is [ %f, %f ]" % (sed_conf_low, sed_conf_up))
            # print("\tMedian is " + str(sed_median))
            print("\tMedian is %f" % (sed_median))
            # print("\tDisplacement: +/- " + str(sed_conf_up - sed_median))
            print("\tDisplacement: +/- %f" % (sed_conf_up - sed_median))
            doa_err = doa_gt - doa_pred
            [doa_conf_low, doa_conf_up, doa_median] = compute_confidence(doa_err)
            # print("Condidence Interval for DOA is [" + str(doa_conf_low) + ", " + str(doa_conf_up) + "]")
            print("Confidence Interval for DOA is [ %f, %f ]" % (doa_conf_low, doa_conf_up))
            # print("Median is " + str(doa_median))
            print("\tMedian is %f" % (doa_median))
            # print("Displacement: +/- " + str(doa_conf_up - doa_median))
            print("\tDisplacement: +/- %f" % (doa_conf_up - doa_median))
            # ------------------------------
            '''

            sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(sed_pred, sed_gt, data_gen_test.nb_frames_1s())
            if params['azi_only']:
                doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(doa_pred, doa_gt,
                                                                                                 sed_pred, sed_gt)
            else:
                doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(doa_pred, doa_gt,
                                                                                                  sed_pred, sed_gt)

            sed_score[epoch_cnt] = np.mean([sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]])
            doa_score[epoch_cnt] = np.mean([2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi,
                                            1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))])
            seld_score[epoch_cnt] = (sed_score[epoch_cnt] + doa_score[epoch_cnt]) / 2

            if os.path.isdir('./models'):
                plot.imshow(conf_mat, cmap='binary', interpolation='None')
                plot.savefig('models/confusion_matrix.jpg')

            # New confidence computation, differing doa and sed errors
            sed_err = sed_loss[epoch_cnt, 0]
            [sed_conf_low, sed_conf_up] = compute_confidence(sed_err, sed_pred.shape[0])
            print("Confidence Interval for SED error is [ %f, %f ]" % (sed_conf_low, sed_conf_up))

            doa_err = doa_gt - doa_pred
            [x_err, y_err, z_err] = compute_doa_confidence(doa_err, n_classes)


            print('epoch_cnt: %d, time: %.2fs, tr_loss: %.4f, val_loss: %.4f, '
                'F1_overall: %.2f, ER_overall: %.2f, '
                'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, '
                'sed_score: %.4f, doa_score: %.4f, seld_score: %.4f' %
                (
                    epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt],
                    sed_loss[epoch_cnt, 1], sed_loss[epoch_cnt, 0],
                    doa_loss[epoch_cnt, 1], doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]),
                    sed_score[epoch_cnt], doa_score[epoch_cnt], seld_score[epoch_cnt]
                )
            )

        simple_plotter.plot_3d("models/doa_gt.txt", "models/doa_pred.txt", 0, 11, 200)
        simple_plotter.plot_confidence(x_err, y_err, z_err, "ov"+str(ov))
# Extracts the features, labels, and normalizes the training and test split features. Make sure you update the location
# of the downloaded datasets before in the cls_feature_class.py

import cls_feature_class
import cls_feature_extr
import parameter

params = parameter.get_params('1')
dataset_name = params['dataset']
dataset_dir = params['dataset_dir']
feat_label_dir = params['feat_label_dir']


if(dataset_name == "foa"):
    # -------------- Extract features and labels for development set -----------------------------
    dev_feat_cls = cls_feature_extr.FeatureClass(dataset=dataset_name, dataset_dir=dataset_dir, feat_label_dir=feat_label_dir)

    # Extract features and normalize them
    dev_feat_cls.extract_all_feature()
    dev_feat_cls.preprocess_features()

    # Extract labels in regression mode
    dev_feat_cls.extract_all_labels()

else:
    # Extracts feature and labels for all overlap and splits
    for ovo in [2]:  # SE overlap
        for splito in [1]:    # all splits. Use [1, 8, 9] for 'real' dataset
            for nffto in [512]:
                feat_cls = cls_feature_class.FeatureClass(ov=ovo, split=splito, nfft=nffto, dataset=dataset_name)
Example #15
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.

    :param argv: expects two optional inputs.
        first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
        second input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) uses default parameters
    """
    if len(argv) != 3:
        print('\n\n')
        print('-------------------------------------------------------------------------------------------------------')
        print('The code expected two inputs')
        print('\t>> python seld.py <job-id> <task-id>')
        print('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
              'You can use any number or string for this.')
        print('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py')
        print('Using default inputs for now')
        print('-------------------------------------------------------------------------------------------------------')
        print('\n\n')
    # use parameter set defined by user
    task_id = '1' if len(argv) < 3 else argv[-1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 2 else argv[1]

    model_dir = 'models/'
    utils.create_folder(model_dir)
    unique_name = '{}_train{}_validation{}_seq{}'.format(params['dataset'], params['train_split'], params['val_split'],
                                                         params['sequence_length'])

    unique_name = os.path.join(model_dir, unique_name)
    print("unique_name: {}\n".format(unique_name))

    data_gen_train = cls_data_generator.DataGenerator(
        dataset=params['dataset'], ov=params['overlap'], split=params['train_split'], db=params['db'],
        nfft=params['nfft'],
        batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
        weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only']
    )

    data_gen_test = cls_data_generator.DataGenerator(
        dataset=params['dataset'], ov=params['overlap'], split=params['val_split'], db=params['db'],
        nfft=params['nfft'],
        batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
        weakness=params['weakness'], datagen_mode='validation', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only'], shuffle=False
    )

    data_in, data_out = data_gen_train.get_data_sizes()
    #n_classes = data_out[0][2]

    print(
        'FEATURES:\n'
        '\tdata_in: {}\n'
        '\tdata_out: {}\n'.format(
            data_in, data_out
        )
    )

    gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test'])
    sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
    doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

    print(
        'MODEL:\n'
        '\tdropout_rate: {}\n'
        '\tCNN: nb_cnn_filt: {}, pool_size{}\n'
        '\trnn_size: {}, fnn_size: {}\n'.format(
            params['dropout_rate'],
            params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'],
            params['rnn_size'], params['fnn_size']
        )
    )

    model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'],
                                  nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'],
                                  rnn_size=params['rnn_size'], fnn_size=params['fnn_size'],
                                  classification_mode=params['mode'], weights=params['loss_weights'], summary=True)

    if (os.path.exists('{}_model.ckpt'.format(unique_name))):
        print("Model found!")
        model.load_weights('{}_model.ckpt'.format(unique_name))
        for i in range(10):
            print("###")

    best_metric = 99999
    conf_mat = None
    best_conf_mat = None
    best_epoch = -1
    patience_cnt = 0
    epoch_metric_loss = np.zeros(params['nb_epochs'])
    sed_score = np.zeros(params['nb_epochs'])
    doa_score = np.zeros(params['nb_epochs'])
    seld_score = np.zeros(params['nb_epochs'])
    tr_loss = np.zeros(params['nb_epochs'])
    val_loss = np.zeros(params['nb_epochs'])
    doa_loss = np.zeros((params['nb_epochs'], 6))
    sed_loss = np.zeros((params['nb_epochs'], 2))

    for epoch_cnt in range(params['nb_epochs']):
        start = time.time()

        print("##### Training the model #####")
        hist = model.fit_generator(
            generator=data_gen_train.generate(),
            steps_per_epoch=params['quick_test_steps'] if params[
                'quick_test'] else data_gen_train.get_total_batches_in_data(),
            validation_data=data_gen_test.generate(),
            validation_steps=params['quick_test_steps'] if params[
                'quick_test'] else data_gen_test.get_total_batches_in_data(),
            use_multiprocessing=False,
            workers=1,
            epochs=1,
            verbose=1
        )
        tr_loss[epoch_cnt] = hist.history.get('loss')[-1]
        val_loss[epoch_cnt] = hist.history.get('val_loss')[-1]
        print("##########################")

        # Save, get model and re-load weights for the predict_generator bug
        print("##### Saving weights #####")
        model.save_weights('{}_model.ckpt'.format(unique_name))

        model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'],
                                      rnn_size=params['rnn_size'], fnn_size=params['fnn_size'],
                                      classification_mode=params['mode'], weights=params['loss_weights'], summary=False)
        model.load_weights('{}_model.ckpt'.format(unique_name))
        print("##########################")

        print("#### Prediction on validation split ####")
        pred = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=params['quick_test_steps'] if params['quick_test'] else data_gen_test.get_total_batches_in_data(),
            use_multiprocessing=False,
            workers=1,
            verbose=1
        )
        print("########################################")
        # print("pred:",pred[1].shape)

        if params['mode'] == 'regr':
            sed_pred = np.array(evaluation_metrics.reshape_3Dto2D(pred[0])) > .5
            doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])

            # Old confidence intervals
            '''
            sed_err = sed_gt - sed_pred
            [sed_conf_low, sed_conf_up, sed_median] = compute_confidence(sed_err)
            # print("Condidence Interval for SED error is [" + str(sed_conf_low) + ", " + str(sed_conf_up) + "]")
            print("Confidence Interval for SED error is [ %.5f, %.5f ]" % (sed_conf_low, sed_conf_up))
            # print("\tMedian is " + str(sed_median))
            print("\tMedian is %.5f" % (sed_median))
            # print("\tDisplacement: +/- " + str(sed_conf_up - sed_median))
            print("\tDisplacement: +/- %.5f" % (sed_conf_up - sed_median))
            doa_err = doa_gt - doa_pred
            [doa_conf_low, doa_conf_up, doa_median] = compute_confidence(doa_err)
            # print("Condidence Interval for DOA is [" + str(doa_conf_low) + ", " + str(doa_conf_up) + "]")
            print("Confidence Interval for DOA is [ %.5f, %.5f ]" % (doa_conf_low, doa_conf_up))
            # print("Median is " + str(doa_median))
            print("\tMedian is %.5f" % (doa_median))
            # print("Displacement: +/- " + str(doa_conf_up - doa_median))
            print("\tDisplacement: +/- %.5f" % (doa_conf_up - doa_median))
            '''

            sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(sed_pred, sed_gt,
                                                                           data_gen_test.nb_frames_1s())
            if params['azi_only']:
                doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(doa_pred, doa_gt,
                                                                                                 sed_pred, sed_gt)
            else:
                doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(doa_pred, doa_gt,
                                                                                                  sed_pred, sed_gt)

            sed_score[epoch_cnt] = np.mean([sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]])
            doa_score[epoch_cnt] = np.mean([2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi,
                                            1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))])
            seld_score[epoch_cnt] = (sed_score[epoch_cnt] + doa_score[epoch_cnt]) / 2

            if os.path.isdir('./models'):
                plot.imshow(conf_mat, cmap='binary', interpolation='None')
                plot.savefig('models/confusion_matrix.jpg')

        # New confidence computation, differing doa and sed errors
        sed_err = sed_loss[epoch_cnt, 0]
        [sed_conf_low, sed_conf_up] = compute_confidence(sed_err, sed_pred.shape[0])
        print("Confidence Interval for SED error is [ %f, %f ]" % (sed_conf_low, sed_conf_up))

        #doa_err = doa_gt - doa_pred
        #[x_err, y_err, z_err] = compute_doa_confidence(doa_err, n_classes)

        plot_array = [tr_loss[epoch_cnt],  # 0
                      val_loss[epoch_cnt],  # 1
                      sed_loss[epoch_cnt][0],  # 2    er
                      sed_loss[epoch_cnt][1],  # 3    f1
                      doa_loss[epoch_cnt][0],  # 4    avg_accuracy
                      doa_loss[epoch_cnt][1],  # 5    doa_loss_gt
                      doa_loss[epoch_cnt][2],  # 6    doa_loss_pred
                      doa_loss[epoch_cnt][3],  # 7    doa_loss_gt_cnt
                      doa_loss[epoch_cnt][4],  # 8    doa_loss_pred_cnt
                      doa_loss[epoch_cnt][5],  # 9    good_frame_cnt
                      sed_score[epoch_cnt],  # 10
                      doa_score[epoch_cnt],
                      seld_score[epoch_cnt],
                      #doa_conf_low, doa_median,
                      #doa_conf_up, sed_conf_low,
                      #sed_median, sed_conf_up]
                      sed_conf_low, sed_conf_up]

        patience_cnt += 1

        # model.save_weights('{}_model.ckpt'.format(unique_name))
        simple_plotter.save_array_to_csv("{}_plot.csv".format(unique_name), plot_array)
        #simple_plotter.plot_confidence(x_err, y_err, z_err, "ov")
        print("##### Model and metrics saved! #####")

        if seld_score[epoch_cnt] < best_metric:
            best_metric = seld_score[epoch_cnt]
            best_conf_mat = conf_mat
            best_epoch = epoch_cnt
            # Now we save the model at every iteration
            model.save_weights('{}_BEST_model.ckpt'.format(unique_name))
            patience_cnt = 0

        print('epoch_cnt: %d, time: %.2fs, tr_loss: %.4f, val_loss: %.4f, '
              'F1_overall: %.2f, ER_overall: %.2f, '
              'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, '
              'sed_score: %.4f, doa_score: %.4f, seld_score: %.4f, best_error_metric: %.2f, best_epoch : %d' %
              (
                  epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt],
                  sed_loss[epoch_cnt, 1], sed_loss[epoch_cnt, 0],
                  doa_loss[epoch_cnt, 1], doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]),
                  sed_score[epoch_cnt], doa_score[epoch_cnt], seld_score[epoch_cnt], best_metric, best_epoch
              )
              )

    # plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, sed_score, doa_score, epoch_cnt)
    print('best_conf_mat : {}'.format(best_conf_mat))
    print('best_conf_mat_diag : {}'.format(np.diag(best_conf_mat)))
    print('saved model for the best_epoch: {} with best_metric: {},  '.format(best_epoch, best_metric))
    print('DOA Metrics: doa_loss_gt: {}, doa_loss_pred: {}, good_pks_ratio: {}'.format(
        doa_loss[best_epoch, 1], doa_loss[best_epoch, 2], doa_loss[best_epoch, 5] / float(sed_gt.shape[0])))
    print('SED Metrics: ER_overall: {}, F1_overall: {}'.format(sed_loss[best_epoch, 0], sed_loss[best_epoch, 1]))
    print('unique_name: {} '.format(unique_name))
    def __init__(self,
                 dataset='ansim',
                 ov=3,
                 split=1,
                 nfft=1024,
                 db=30,
                 wav_extra_name='',
                 desc_extra_name=''):

        # TODO: Change the path according to your machine.
        # TODO: It should point to a folder which consists of sub-folders for audio and metada

        params = parameter.get_params('1')
        dataset_dir = params['dataset_dir']

        if dataset == 'ansim':
            self._base_folder = 'ansim'
        elif dataset == 'resim':
            #self._base_folder = os.path.join('/proj/asignal/TUT_SELD/', 'doa_data_echoic/')
            self._base_folder = 'resim'
        elif dataset == 'cansim':
            self._base_folder = os.path.join('/proj/asignal/TUT_SELD/',
                                             'doa_circdata/')
        elif dataset == 'cresim':
            self._base_folder = os.path.join('/proj/asignal/TUT_SELD/',
                                             'doa_circdata_echoic/')
        elif dataset == 'real':
            self._base_folder = 'real'
            #self._base_folder = os.path.join('/proj/asignal/TUT_SELD/', 'tut_seld_data/')
        elif dataset == 'foa':
            self._base_folder = dataset_dir

        # Input directories
        if dataset == 'foa':
            self._aud_dir = os.path.join(
                self._base_folder,
                'foa_dev/wav_ov{}_split{}'.format(ov, split, db,
                                                  wav_extra_name))
            self._desc_dir = os.path.join(
                self._base_folder, 'metadata_dev/desc_ov{}_split{}{}'.format(
                    ov, split, desc_extra_name))
        else:
            self._aud_dir = os.path.join(
                self._base_folder,
                'wav_ov{}_split{}_{}db{}'.format(ov, split, db,
                                                 wav_extra_name))
            self._desc_dir = os.path.join(
                self._base_folder,
                'desc_ov{}_split{}{}'.format(ov, split, desc_extra_name))

        # Output directories
        self._label_dir = None
        self._feat_dir = None
        self._feat_dir_norm = None

        # Local parameters
        self._mode = None
        self._ov = ov
        self._split = split
        self._db = db
        self._nfft = nfft
        self._win_len = self._nfft
        self._hop_len = self._nfft / 2
        self._dataset = dataset
        self._eps = np.spacing(np.float(1e-16))

        # If circular-array 8 channels else 4 for Ambisonic
        if 'c' in self._dataset:
            self._nb_channels = 8
        else:
            self._nb_channels = 4

        # Sound event classes dictionary
        self._unique_classes = dict()
        if 'real' in self._dataset:
            # Urbansound8k sound events
            self._unique_classes = \
                {
                    '1': 0,
                    '3': 1,
                    '4': 2,
                    '5': 3,
                    '6': 4,
                    '7': 5,
                    '8': 6,
                    '9': 7
                }
        else:
            # DCASE 2016 Task 2 sound events
            self._unique_classes = \
                {
                    'clearthroat': 2,
                    'cough': 8,
                    'doorslam': 9,
                    'drawer': 1,
                    'keyboard': 6,
                    'keysDrop': 4,
                    'knock': 0,
                    'laughter': 10,
                    'pageturn': 7,
                    'phone': 3,
                    'speech': 5
                }

        self._fs = 48000

        self._hop_len_s = self._nfft / 2.0 / self._fs
        self._nb_frames_1s = int(1 / self._hop_len_s)
        self._frame_res = self._fs / float(self._hop_len)

        self._resolution = 10
        self._azi_list = range(-180, 180, self._resolution)
        self._length = len(self._azi_list)
        #CNG
        self._ele_list = range(-40, 50, self._resolution)
        self._height = len(self._ele_list)
        self._weakness = None

        # For regression task only
        self._default_azi = 180
        #CNG
        self._default_ele = 50

        if self._default_azi in self._azi_list:
            print(
                'ERROR: chosen default_azi value {} should not exist in azi_list'
                .format(self._default_azi))
            exit()
        if self._default_ele in self._ele_list:
            print(
                'ERROR: chosen default_ele value {} should not exist in ele_list'
                .format(self._default_ele))
            exit()

        self._audio_max_len_samples = 60 * self._fs  # TODO: Fix the audio synthesis code to always generate 30s of
        # audio. Currently it generates audio till the last active sound event, which is not always 30s long. This is a
        # quick fix to overcome that. We need this because, for processing and training we need the length of features
        # to be fixed.

        self._max_frames = int(
            np.ceil((self._audio_max_len_samples - self._win_len) /
                    float(self._hop_len)))
Example #17
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
        second input: task_id - (optional) To chose the system configuration in parameters.py. 
                                (default) uses default parameters
    """
    if len(argv) != 3:
        print('\n\n')
        print('-------------------------------------------------------------------------------------------------------')
        print('The code expected two inputs')
        print('\t>> python seld.py <job-id> <task-id>')
        print('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
              'You can use any number or string for this.')
        print('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py')
        print('Using default inputs for now')
        print('-------------------------------------------------------------------------------------------------------')
        print('\n\n')
    # use parameter set defined by user
    task_id = '1' if len(argv) < 3 else argv[-1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 2 else argv[1]

    model_dir = 'models/'
    utils.create_folder(model_dir)
    unique_name = '{}_ov{}_split{}_{}{}_3d{}_{}'.format(
        params['dataset'], params['overlap'], params['split'], params['mode'], params['weakness'],
        int(params['cnn_3d']), job_id
    )
    unique_name = os.path.join(model_dir, unique_name)
    print("unique_name: {}\n".format(unique_name))

    data_gen_train = cls_data_generator.DataGenerator(
        dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'],
        batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
        weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only']
    )

    data_gen_test = cls_data_generator.DataGenerator(
        dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'],
        batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
        weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only'], shuffle=False
    )

    data_in, data_out = data_gen_train.get_data_sizes()
    print(
        'FEATURES:\n'
        '\tdata_in: {}\n'
        '\tdata_out: {}\n'.format(
            data_in, data_out
        )
    )

    gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test'])
    sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
    doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

    print(
        'MODEL:\n'
        '\tdropout_rate: {}\n'
        '\tCNN: nb_cnn_filt: {}, pool_size{}\n'
        '\trnn_size: {}, fnn_size: {}\n'.format(
            params['dropout_rate'],
            params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'],
            params['rnn_size'], params['fnn_size']
        )
    )

    model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'],
                                  nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'],
                                  rnn_size=params['rnn_size'], fnn_size=params['fnn_size'],
                                  classification_mode=params['mode'], weights=params['loss_weights'], loader=False, loader2=False) # Change loader to True to enable transfer learning, Change loader2 to True to enable transfer learning with different labels
    best_metric = 99999
    conf_mat = None
    best_conf_mat = None
    best_epoch = -1
    patience_cnt = 0
    epoch_metric_loss = np.zeros(params['nb_epochs'])
    tr_loss = np.zeros(params['nb_epochs'])
    val_loss = np.zeros(params['nb_epochs'])
    doa_loss = np.zeros((params['nb_epochs'], 6))
    sed_loss = np.zeros((params['nb_epochs'], 2))
    nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
    for epoch_cnt in range(nb_epoch):
        start = time.time()
        hist = model.fit_generator(
            generator=data_gen_train.generate(),
            steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(),
            validation_data=data_gen_test.generate(),
            validation_steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(),
            epochs=1,
            verbose=0
        )
        tr_loss[epoch_cnt] = hist.history.get('loss')[-1]
        val_loss[epoch_cnt] = hist.history.get('val_loss')[-1]

        pred = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(),
            verbose=2
        )
        if params['mode'] == 'regr':
            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])

            sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(sed_pred, sed_gt, data_gen_test.nb_frames_1s())
            if params['azi_only']:
                doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(doa_pred, doa_gt,
                                                                                                 sed_pred, sed_gt)
            else:
                doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(doa_pred, doa_gt,
                                                                                                  sed_pred, sed_gt)

            epoch_metric_loss[epoch_cnt] = np.mean([
                sed_loss[epoch_cnt, 0],
                1-sed_loss[epoch_cnt, 1],
                2*np.arcsin(doa_loss[epoch_cnt, 1]/2.0)/np.pi,
                1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))]
            )
        plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, epoch_metric_loss)

        patience_cnt += 1
        if epoch_metric_loss[epoch_cnt] < best_metric:
            best_metric = epoch_metric_loss[epoch_cnt]
            best_conf_mat = conf_mat
            best_epoch = epoch_cnt
            model.save('{}_model.h5'.format(unique_name))
            patience_cnt = 0

        print(
            'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, '
            'F1_overall: %.2f, ER_overall: %.2f, '
            'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, '
            'error_metric: %.2f, best_error_metric: %.2f, best_epoch : %d' %
            (
                epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt],
                sed_loss[epoch_cnt, 1], sed_loss[epoch_cnt, 0],
                doa_loss[epoch_cnt, 1], doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]),
                epoch_metric_loss[epoch_cnt], best_metric, best_epoch
            )
        )
        if patience_cnt > params['patience']:
            break

    print('best_conf_mat : {}'.format(best_conf_mat))
    print('best_conf_mat_diag : {}'.format(np.diag(best_conf_mat)))
    print('saved model for the best_epoch: {} with best_metric: {},  '.format(best_epoch, best_metric))
    print('DOA Metrics: doa_loss_gt: {}, doa_loss_pred: {}, good_pks_ratio: {}'.format(
        doa_loss[best_epoch, 1], doa_loss[best_epoch, 2], doa_loss[best_epoch, 5] / float(sed_gt.shape[0])))
    print('SED Metrics: ER_overall: {}, F1_overall: {}'.format(sed_loss[best_epoch, 1], sed_loss[best_epoch, 0]))
    print('unique_name: {} '.format(unique_name))
Example #18
0
                                                               class_active]:
                    error_string += '{}: {} - {} | '.format(
                        class_active, label[batch, frame, class_active],
                        output[batch, frame, class_active])
                    is_err = True
                if label[batch, frame, class_active]:
                    count += 1
            if is_err:
                count_frame_err += 1
                if count >= 2:
                    count_overlap_err += 1
    return error_string, count_overlap_err, count_frame_err


# parameter
params = get_params('4')
with open('config/evaluate_config.yaml', 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
for key, value in config.items():
    print(f"{key:20} {value}")

# Get data from fold 1
data_eval = get_data(kind_data='valid',
                     params=params,
                     fold_set=config['fold_set'])

# Get model
model = get_model(model_name=config['model_name'],
                  input_shape=config['input_shape'],
                  params=params)
Example #19
0
if not os.path.isdir(os.path.abspath(out_path)):
    os.makedirs(os.path.abspath(out_path))
if not os.path.isdir(os.path.abspath(checkpoint_path)):
    os.makedirs(os.path.abspath(checkpoint_path))

evaluate_every = FLAGS.evaluate_every
seq_len = FLAGS.seq_len

#learning schedule
scheduler = dict(learning_rate=FLAGS.learning_rate,
                 decay_rate=FLAGS.decay_rate,
                 warmup_epoch=10,
                 schedule=[200, 600, 1000, 9000, 9500],
                 training_epoch=FLAGS.training_epoch)

params = parameter.get_params(str(FLAGS.task_id))
feat_cls = cls_feature_class.FeatureClass(params)

train_splits, train_check_splits, val_splits, test_splits = None, None, None, None

if params['mode'] == 'dev':
    test_splits = [1]
    val_splits = [2]
    train_splits = [3, 4, 5, 6]

elif params['mode'] == 'eval':
    test_splits = [7, 8]
    val_splits = []
    train_splits = [1, 2, 3, 4, 5, 6]

iseval = (params['mode'] == 'eval')
Example #20
0
def main(args):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
        second input: task_id - (optional) To chose the system configuration in parameters.py. 
                                (default) uses default parameters
    """

    # use parameter set defined by user
    task_id = args.params
    params = parameter.get_params(task_id)

    job_id = args.model_name

    model_dir = 'models/' + args.author + '/' if args.author != "" else 'models/'
    utils.create_folder(model_dir)
    unique_name = '{}_ov{}_split{}_{}{}_3d{}_{}'.format(
        params['dataset'], params['overlap'], params['split'], params['mode'],
        params['weakness'], int(params['cnn_3d']), job_id)

    model_name = unique_name

    epoch_manager = JSON_Manager(args.author, unique_name)
    logdir = "logs/" + args.author + "/" + unique_name

    unique_name = os.path.join(model_dir, unique_name)
    print("unique_name: {}\n".format(unique_name))

    session = tf.InteractiveSession()

    file_writer = tf.summary.FileWriter(logdir, session.graph)

    data_gen_train = cls_data_generator.DataGenerator(
        dataset=params['dataset'],
        ov=params['overlap'],
        split=params['split'],
        db=params['db'],
        nfft=params['nfft'],
        batch_size=params['batch_size'],
        seq_len=params['sequence_length'],
        classifier_mode=params['mode'],
        weakness=params['weakness'],
        datagen_mode='train',
        cnn3d=params['cnn_3d'],
        xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only'])

    data_gen_test = cls_data_generator.DataGenerator(
        dataset=params['dataset'],
        ov=params['overlap'],
        split=params['split'],
        db=params['db'],
        nfft=params['nfft'],
        batch_size=params['batch_size'],
        seq_len=params['sequence_length'],
        classifier_mode=params['mode'],
        weakness=params['weakness'],
        datagen_mode='test',
        cnn3d=params['cnn_3d'],
        xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only'],
        shuffle=False)

    data_in, data_out = data_gen_train.get_data_sizes()
    print('FEATURES:\n'
          '\tdata_in: {}\n'
          '\tdata_out: {}\n'.format(data_in, data_out))

    gt = collect_test_labels(data_gen_test, data_out, params['mode'],
                             params['quick_test'])
    sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
    doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

    print('MODEL:\n'
          '\tdropout_rate: {}\n'
          '\tCNN: nb_cnn_filt: {}, pool_size{}\n'
          '\trnn_size: {}, fnn_size: {}\n'.format(
              params['dropout_rate'], params['nb_cnn3d_filt']
              if params['cnn_3d'] else params['nb_cnn2d_filt'],
              params['pool_size'], params['rnn_size'], params['fnn_size']))

    model = keras_model.get_model(data_in=data_in,
                                  data_out=data_out,
                                  dropout_rate=params['dropout_rate'],
                                  nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                  pool_size=params['pool_size'],
                                  rnn_size=params['rnn_size'],
                                  fnn_size=params['fnn_size'],
                                  classification_mode=params['mode'],
                                  weights=params['loss_weights'])

    initial = epoch_manager.get_epoch()
    if initial != 0:
        print(f"Resume training from epoch {initial}")
        print("Loading already trained model...")
        # In order to load custom layers we need to link the references to the custom objects
        model = load_model(os.path.join(model_dir, model_name + "_model.h5"),
                           custom_objects={
                               'QuaternionConv2D': QuaternionConv2D,
                               'QuaternionGRU': QuaternionGRU,
                               'QuaternionDense': QuaternionDense
                           })

    best_metric = epoch_manager.get_best_metric()
    best_std = epoch_manager.get_best_std()
    conf_mat = None
    best_conf_mat = epoch_manager.get_best_conf_mat()
    best_epoch = epoch_manager.get_best_epoch()
    patience_cnt = epoch_manager.get_patience_cnt()
    epoch_metric_loss = np.zeros(params['nb_epochs'])
    sed_score = np.zeros(params['nb_epochs'])
    std_score = np.zeros(params['nb_epochs'])
    doa_score = np.zeros(params['nb_epochs'])
    seld_score = np.zeros(params['nb_epochs'])
    tr_loss = np.zeros(params['nb_epochs'])
    val_loss = np.zeros(params['nb_epochs'])
    doa_loss = np.zeros((params['nb_epochs'], 6))
    sed_loss = np.zeros((params['nb_epochs'], 2))

    time_hold = tf.placeholder(tf.float32, shape=None, name='time_summary')
    time_summ = tf.summary.scalar('time', time_hold)

    tr_loss_hold = tf.placeholder(tf.float32,
                                  shape=None,
                                  name='tr_loss_summary')
    tr_loss_summ = tf.summary.scalar('tr_loss', tr_loss_hold)

    val_loss_hold = tf.placeholder(tf.float32,
                                   shape=None,
                                   name='val_loss_summary')
    val_loss_summ = tf.summary.scalar('val_loss', val_loss_hold)

    f1_hold = tf.placeholder(tf.float32, shape=None, name='f1_summary')
    f1_summ = tf.summary.scalar('F1_overall', f1_hold)

    er_hold = tf.placeholder(tf.float32, shape=None, name='er_summary')
    er_summ = tf.summary.scalar('ER_overall', er_hold)

    doa_error_gt_hold = tf.placeholder(tf.float32,
                                       shape=None,
                                       name='doa_error_gt_summary')
    doa_error_gt_summ = tf.summary.scalar('doa_error_gt', doa_error_gt_hold)

    doa_error_pred_hold = tf.placeholder(tf.float32,
                                         shape=None,
                                         name='doa_error_pred_summary')
    doa_error_pred_summ = tf.summary.scalar('doa_error_pred',
                                            doa_error_pred_hold)

    good_pks_hold = tf.placeholder(tf.float32,
                                   shape=None,
                                   name='good_pks_summary')
    good_pks_summ = tf.summary.scalar('good_pks_ratio', good_pks_hold)

    sed_score_hold = tf.placeholder(tf.float32,
                                    shape=None,
                                    name='sed_score_summary')
    sed_score_summ = tf.summary.scalar('sed_score', sed_score_hold)

    doa_score_hold = tf.placeholder(tf.float32,
                                    shape=None,
                                    name='doa_score_summary')
    doa_score_summ = tf.summary.scalar('doa_score', doa_score_hold)

    seld_score_hold = tf.placeholder(tf.float32,
                                     shape=None,
                                     name='seld_score_summary')
    seld_score_summ = tf.summary.scalar('seld_score', seld_score_hold)

    std_score_hold = tf.placeholder(tf.float32,
                                    shape=None,
                                    name='std_score_summary')
    std_score_summ = tf.summary.scalar('std_score', std_score_hold)

    best_error_metric_hold = tf.placeholder(tf.float32,
                                            shape=None,
                                            name='best_error_metric_summary')
    best_error_metric_summ = tf.summary.scalar('best_error_metric',
                                               best_error_metric_hold)

    best_epoch_hold = tf.placeholder(tf.float32,
                                     shape=None,
                                     name='best_epoch_summary')
    best_epoch_summ = tf.summary.scalar('best_epoch', best_epoch_hold)

    best_std_hold = tf.placeholder(tf.float32,
                                   shape=None,
                                   name='best_std_summary')
    best_std_summ = tf.summary.scalar('best_std', best_std_hold)

    merged = tf.summary.merge_all()

    for epoch_cnt in range(initial, params['nb_epochs']):
        start = time.time()
        hist = model.fit_generator(
            generator=data_gen_train.generate(),
            steps_per_epoch=5 if params['quick_test'] else
            data_gen_train.get_total_batches_in_data(),
            validation_data=data_gen_test.generate(),
            validation_steps=5 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            use_multiprocessing=False,
            epochs=1,
            verbose=1)
        tr_loss[epoch_cnt] = hist.history.get('loss')[-1]
        val_loss[epoch_cnt] = hist.history.get('val_loss')[-1]

        pred = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=5 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            use_multiprocessing=False,
            verbose=2)
        print("pred:", pred[1].shape)
        if params['mode'] == 'regr':
            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])

            sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_test.nb_frames_1s())
            if params['azi_only']:
                doa_loss[
                    epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(
                        doa_pred, doa_gt, sed_pred, sed_gt)
            else:
                doa_loss[
                    epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(
                        doa_pred, doa_gt, sed_pred, sed_gt)

            sed_score[epoch_cnt] = np.mean(
                [sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]])
            doa_score[epoch_cnt] = np.mean([
                2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi,
                1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))
            ])
            seld_score[epoch_cnt] = np.mean(
                [sed_score[epoch_cnt], doa_score[epoch_cnt]])

            # standard deviation
            std_score[epoch_cnt] = np.std(
                [sed_score[epoch_cnt], doa_score[epoch_cnt]])

        plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss,
                       sed_score, doa_score)

        patience_cnt += 1
        epoch_manager.increase_patience_cnt()

        model.save('{}_model.h5'.format(unique_name))

        if seld_score[epoch_cnt] < best_metric:
            best_metric = seld_score[epoch_cnt]
            epoch_manager.set_best_metric(best_metric)

            best_std = std_score[epoch_cnt]
            epoch_manager.set_best_std(best_std)

            best_conf_mat = conf_mat
            epoch_manager.set_best_conf_mat(conf_mat)

            best_epoch = epoch_cnt
            epoch_manager.set_best_epoch(best_epoch)

            model.save('{}_best_model.h5'.format(unique_name))
            patience_cnt = 0
            epoch_manager.reset_patience_cnt()

        if patience_cnt > params['patience']:
            print(
                f"\n----  PATIENCE TRIGGERED AFTER {epoch_cnt} EPOCHS  ----\n")
            break

        summary = session.run(merged,
                              feed_dict={
                                  time_hold:
                                  time.time() - start,
                                  tr_loss_hold:
                                  tr_loss[epoch_cnt],
                                  val_loss_hold:
                                  val_loss[epoch_cnt],
                                  f1_hold:
                                  sed_loss[epoch_cnt, 1],
                                  er_hold:
                                  sed_loss[epoch_cnt, 0],
                                  doa_error_gt_hold:
                                  doa_loss[epoch_cnt, 1],
                                  doa_error_pred_hold:
                                  doa_loss[epoch_cnt, 2],
                                  good_pks_hold:
                                  doa_loss[epoch_cnt, 5] /
                                  float(sed_gt.shape[0]),
                                  sed_score_hold:
                                  sed_score[epoch_cnt],
                                  doa_score_hold:
                                  doa_score[epoch_cnt],
                                  seld_score_hold:
                                  seld_score[epoch_cnt],
                                  std_score_hold:
                                  std_score[epoch_cnt],
                                  best_error_metric_hold:
                                  best_metric,
                                  best_epoch_hold:
                                  best_epoch,
                                  best_std_hold:
                                  best_std
                              })
        file_writer.add_summary(summary, epoch_cnt)

        print(
            'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, '
            'F1_overall: %.2f, ER_overall: %.2f, '
            'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, '
            'sed_score: %.2f, doa_score: %.2f, best_error_metric: %.2f, best_epoch : %d, best_std: %.2f'
            % (epoch_cnt, time.time() - start, tr_loss[epoch_cnt],
               val_loss[epoch_cnt], sed_loss[epoch_cnt, 1],
               sed_loss[epoch_cnt, 0], doa_loss[epoch_cnt, 1],
               doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] /
               float(sed_gt.shape[0]), sed_score[epoch_cnt],
               doa_score[epoch_cnt], best_metric, best_epoch, best_std))
        epoch_manager.increase_epoch()
    lower_confidence, upper_confidence = evaluation_metrics.compute_confidence_interval(
        best_metric, best_std, params['nb_epochs'],
        confid_coeff=1.96)  # 1.96 for a 95% CI

    print("\n----  FINISHED TRAINING  ----\n")

    print('best_conf_mat : {}'.format(best_conf_mat))
    print('best_conf_mat_diag : {}'.format(np.diag(best_conf_mat)))
    print('saved model for the best_epoch: {} with best_metric: {},  '.format(
        best_epoch, best_metric))
    print(
        'DOA Metrics: doa_loss_gt: {}, doa_loss_pred: {}, good_pks_ratio: {}'.
        format(doa_loss[best_epoch, 1], doa_loss[best_epoch, 2],
               doa_loss[best_epoch, 5] / float(sed_gt.shape[0])))
    print('SED Metrics: ER_overall: {}, F1_overall: {}'.format(
        sed_loss[best_epoch, 0], sed_loss[best_epoch, 1]))
    print('Confidence Interval: lower_interval: {}, upper_inteval: {}'.format(
        lower_confidence, upper_confidence))
    print('unique_name: {} '.format(unique_name))
Example #21
0
                                      '.log')
    filehandler.setLevel(logging.DEBUG)
    filehandler.setFormatter(file_formatter)
    logger.addHandler(filehandler)

    if args.resume:
        checkpoint = torch.load(args.resume)
        params = checkpoint['params']
        model = get_model(params['model'])
        net = model(cgnet_params=params['cgnet_params']).cuda()
        net.load_state_dict(checkpoint['model_state_dict'])
        criterion_sed = nn.BCEWithLogitsLoss(
            pos_weight=torch.FloatTensor([params['bce_weight']])).cuda()
        criterion_sed.load_state_dict(checkpoint['criterion_sed_state_dict'])
    else:
        params = parameter.get_params()
        model = get_model(params['model'])
        net = model(cgnet_params=params['cgnet_params']).cuda()
        criterion_sed = nn.BCEWithLogitsLoss(
            pos_weight=torch.FloatTensor([params['bce_weight']])).cuda()

    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=params['learning_rate'],
                                 weight_decay=params['weight_decay'])
    if params['learning_rate_scheduling']:
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size=30,
                                                    gamma=0.1)
    logger.info('Parameters: ' + str(params))
    shutil.copy2('./parameter.py',
                 os.path.join('result', experiment_id + '_param.py'))
Example #22
0
def main(args):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
        second input: task_id - (optional) To chose the system configuration in parameters.py. 
                                (default) uses default parameters
    """

    # use parameter set defined by user
    task_id = args.params
    params = parameter.get_params(task_id)

    job_id = args.model_name

    model_dir = 'models/' + args.author + '/' if args.author != "" else 'models/'
    utils.create_folder(model_dir)
    unique_name = '{}_ov{}_split{}_{}{}_3d{}_{}'.format(
        params['dataset'], params['overlap'], params['split'], params['mode'],
        params['weakness'], int(params['cnn_3d']), job_id)

    model_name = unique_name

    epoch_manager = JSON_Manager(args.author, unique_name)
    logdir = "logs/" + args.author + "/" + unique_name

    unique_name = os.path.join(model_dir, unique_name)
    print("unique_name: {}\n".format(unique_name))

    data_gen_test = cls_data_generator_seld.DataGenerator(
        dataset=params['dataset'],
        ov=params['overlap'],
        split=params['split'],
        db=params['db'],
        nfft=params['nfft'],
        batch_size=params['batch_size'],
        seq_len=params['sequence_length'],
        classifier_mode=params['mode'],
        weakness=params['weakness'],
        datagen_mode='test',
        cnn3d=params['cnn_3d'],
        xyz_def_zero=params['xyz_def_zero'],
        azi_only=params['azi_only'],
        shuffle=False)

    data_in, data_out = data_gen_test.get_data_sizes()
    print('FEATURES:\n'
          '\tdata_in: {}\n'
          '\tdata_out: {}\n'.format(data_in, data_out))

    gt = collect_test_labels(data_gen_test, data_out, params['mode'],
                             params['quick_test'])
    sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
    doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

    print('MODEL:\n'
          '\tdropout_rate: {}\n'
          '\tCNN: nb_cnn_filt: {}, pool_size{}\n'
          '\trnn_size: {}, fnn_size: {}\n'.format(
              params['dropout_rate'], params['nb_cnn3d_filt']
              if params['cnn_3d'] else params['nb_cnn2d_filt'],
              params['pool_size'], params['rnn_size'], params['fnn_size']))

    model = load_model(os.path.join(model_dir, model_name + "_best_model.h5"),
                       custom_objects={
                           'QuaternionConv2D': QuaternionConv2D,
                           'QuaternionGRU': QuaternionGRU,
                           'QuaternionDense': QuaternionDense
                       })
    model.summary()
    plot_model(model, to_file=os.path.join(model_dir, 'model.png'))

    best_metric = epoch_manager.get_best_metric()
    conf_mat = None
    best_conf_mat = epoch_manager.get_best_conf_mat()
    best_epoch = epoch_manager.get_best_epoch()
    patience_cnt = epoch_manager.get_patience_cnt()
    epoch_metric_loss = np.zeros(params['nb_epochs'])
    sed_score = np.zeros(params['nb_epochs'])
    std_score = np.zeros(params['nb_epochs'])
    doa_score = np.zeros(params['nb_epochs'])
    seld_score = np.zeros(params['nb_epochs'])
    tr_loss = np.zeros(params['nb_epochs'])
    val_loss = np.zeros(params['nb_epochs'])
    doa_loss = np.zeros((params['nb_epochs'], 6))
    sed_loss = np.zeros((params['nb_epochs'], 2))

    epoch_cnt = 0

    pred = model.predict_generator(
        generator=data_gen_test.generate(),
        steps=data_gen_test.get_total_batches_in_data(),
        use_multiprocessing=False,
        verbose=2)
    print("pred[1]:", pred[1].shape)
    if params['mode'] == 'regr':
        sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
        print(f"sed_pred: {sed_pred.shape}")
        doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])
        print(f"doa_pred: {doa_pred.shape}")
        sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
            sed_pred, sed_gt, data_gen_test.nb_frames_1s())

        if params['azi_only']:
            doa_loss[
                epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(
                    doa_pred, doa_gt, sed_pred, sed_gt)
        else:
            doa_loss[
                epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(
                    doa_pred, doa_gt, sed_pred, sed_gt)

        sed_score[epoch_cnt] = np.mean(
            [sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]])
        print(f"ER: {sed_loss[epoch_cnt, 0]}")
        er = sed_loss[epoch_cnt, 0]

        interval = 1.96 * np.sqrt(((er) * (1 - er)) / sed_pred.shape[0])
        print(f"interval: {interval}")

        doa_score[epoch_cnt] = np.mean([
            2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi,
            1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))
        ])
        seld_score[epoch_cnt] = np.mean(
            [sed_score[epoch_cnt], doa_score[epoch_cnt]])

        doa_error = doa_pred - doa_gt

        doa_error = np.reshape(doa_error, newshape=(doa_error.shape[0], 11, 2))
        doa_error = np.absolute(doa_error[:, :, 0])
        print(f"doa_error: {doa_error.shape}")
        doa_error = np.reshape(doa_error,
                               newshape=(doa_error.shape[0] *
                                         doa_error.shape[1]))
        print(f"doa_error: {doa_error.shape}")

        np.save(model_name + "_x", doa_error)

        doa_error = doa_pred - doa_gt

        doa_error = np.reshape(doa_error, newshape=(doa_error.shape[0], 11, 2))
        doa_error = np.absolute(doa_error[:, :, 1])
        print(f"doa_error: {doa_error.shape}")
        doa_error = np.reshape(doa_error,
                               newshape=(doa_error.shape[0] *
                                         doa_error.shape[1]))
        print(f"doa_error: {doa_error.shape}")

        np.save(model_name + "_y", doa_error)

        # standard deviation
        std_score[epoch_cnt] = np.std(
            [sed_score[epoch_cnt], doa_score[epoch_cnt]])

        print(f"{er-interval}   /   {er+interval}")
    #lower_confidence, upper_confidence = evaluation_metrics.compute_confidence_interval(best_metric,best_std, params['nb_epochs'], confid_coeff=1.96) # 1.96 for a 95% CI

    print("\n----  FINISHED  ----\n")