Esempio n. 1
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        test_splits = [1, 2, 3, 4]
        val_splits = [2, 3, 4, 1]
        train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]]

        # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split.
        # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits
        # test_splits = [1]
        # val_splits = [2]
        # train_splits = [[3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [0]
        val_splits = [1]
        train_splits = [[2, 3, 4]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=train_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=val_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'],
            shuffle=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        gt = collect_test_labels(data_gen_val, data_out, params['quick_test'])
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
        doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

        # rescaling the reference elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        nb_classes = data_gen_train.get_nb_classes()
        def_elevation = data_gen_train.get_default_elevation()
        doa_gt[:,
               nb_classes:] = doa_gt[:, nb_classes:] / (180. / def_elevation)

        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, pool_size{}\n\trnn_size: {}, fnn_size: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['pool_size'], params['rnn_size'],
                    params['fnn_size']))

        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      pool_size=params['pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'])
        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        seld_metric = np.zeros(params['nb_epochs'])
        tr_loss = np.zeros(params['nb_epochs'])
        val_loss = np.zeros(params['nb_epochs'])
        doa_metric = np.zeros((params['nb_epochs'], 6))
        sed_metric = np.zeros((params['nb_epochs'], 2))
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                validation_data=data_gen_val.generate(),
                validation_steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2)
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]
            val_loss[epoch_cnt] = hist.history.get('val_loss')[-1]

            # predict once per peoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            # Calculate the metrics
            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])

            # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
            doa_pred[:,
                     nb_classes:] = doa_pred[:, nb_classes:] / (180. /
                                                                def_elevation)

            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())
            doa_metric[
                epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr(
                    doa_pred, doa_gt, sed_pred, sed_gt)
            seld_metric[epoch_cnt] = evaluation_metrics.compute_seld_metric(
                sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :])

            # Visualize the metrics with respect to epochs
            plot_functions(unique_name, tr_loss, val_loss, sed_metric,
                           doa_metric, seld_metric)

            patience_cnt += 1
            if seld_metric[epoch_cnt] < best_seld_metric:
                best_seld_metric = seld_metric[epoch_cnt]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, '
                'ER_overall: %.2f, F1_overall: %.2f, '
                'doa_error_pred: %.2f, good_pks_ratio:%.2f, '
                'seld_score: %.2f, best_seld_score: %.2f, best_epoch : %d\n' %
                (epoch_cnt, time.time() - start, tr_loss[epoch_cnt],
                 val_loss[epoch_cnt], sed_metric[epoch_cnt,
                                                 0], sed_metric[epoch_cnt, 1],
                 doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1],
                 seld_metric[epoch_cnt], best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        avg_scores_val.append([
            sed_metric[best_epoch, 0], sed_metric[best_epoch, 1],
            doa_metric[best_epoch, 0], doa_metric[best_epoch,
                                                  1], best_seld_metric
        ])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score: {}'.format(best_seld_metric))
        print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
            doa_metric[best_epoch, 0], doa_metric[best_epoch, 1]))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
            sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print('Loading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=split,
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'],
            shuffle=False,
            per_file=params['dcase_output'],
            is_eval=True if params['mode'] is 'eval' else False)

        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        model = load_model('{}_model.h5'.format(unique_name))
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
        test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1])

        # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / (
            180. / def_elevation)

        if params['dcase_output']:
            # Dump results in DCASE output format for calculating final scores
            dcase_dump_folder = os.path.join(
                params['dcase_dir'],
                '{}_{}_{}'.format(task_id, params['dataset'], params['mode']))
            cls_feature_class.create_folder(dcase_dump_folder)
            print('Dumping recording-wise results in: {}'.format(
                dcase_dump_folder))

            test_filelist = data_gen_test.get_filelist()
            # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
            max_frames_with_content = data_gen_test.get_nb_frames()

            # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
            # zero padding in the remaining frames
            frames_per_file = data_gen_test.get_frame_per_file()

            for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
                output_file = os.path.join(
                    dcase_dump_folder,
                    test_filelist[file_cnt].replace('.npy', '.csv'))
                dc = file_cnt * frames_per_file
                output_dict = evaluation_metrics.regression_label_format_to_output_format(
                    data_gen_test,
                    test_sed_pred[dc:dc + max_frames_with_content, :],
                    test_doa_pred[dc:dc + max_frames_with_content, :] * 180 /
                    np.pi)
                evaluation_metrics.write_output_format_file(
                    output_file, output_dict)

        if params['mode'] is 'dev':
            test_data_in, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels(data_gen_test, test_data_out,
                                          params['quick_test'])
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
            test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])
            # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose
            test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / (
                180. / def_elevation)

            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            test_doa_loss = evaluation_metrics.compute_doa_scores_regr(
                test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
            test_metric_loss = evaluation_metrics.compute_seld_metric(
                test_sed_loss, test_doa_loss)

            avg_scores_test.append([
                test_sed_loss[0], test_sed_loss[1], test_doa_loss[0],
                test_doa_loss[1], test_metric_loss
            ])
            print('Results on test split:')
            print('\tSELD_score: {},  '.format(test_metric_loss))
            print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
                test_doa_loss[0], test_doa_loss[1]))
            print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
                test_sed_loss[0], test_sed_loss[1]))

    print('\n\nValidation split scores per fold:\n')
    for cnt in range(len(val_splits)):
        print(
            '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}'
            .format(cnt, avg_scores_val[cnt][0], avg_scores_val[cnt][1],
                    avg_scores_val[cnt][2], avg_scores_val[cnt][3],
                    avg_scores_val[cnt][4]))

    if params['mode'] is 'dev':
        print('\n\nTesting split scores per fold:\n')
        for cnt in range(len(val_splits)):
            print(
                '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}'
                .format(cnt, avg_scores_test[cnt][0], avg_scores_test[cnt][1],
                        avg_scores_test[cnt][2], avg_scores_test[cnt][3],
                        avg_scores_test[cnt][4]))
Esempio n. 2
0
def calculate_SELD_metrics(gt_meta_dir, pred_meta_dir, score_type):
    '''Calculate metrics using official tool. This part of code is modified from:
    https://github.com/sharathadavanne/seld-dcase2019/blob/master/calculate_SELD_metrics.py
    
    Args:
      gt_meta_dir: ground truth meta directory. 
      pred_meta_dir: prediction meta directory.
      score_type: 'all', 'split', 'ov', 'ir'
      
    Returns:
      metrics: dict
    '''

    # Load feature class
    feat_cls = cls_feature_class.FeatureClass()

    # collect gt files info
    # gt_meta_files = [fn for fn in os.listdir(gt_meta_dir) if fn.endswith('.csv') and not fn.startswith('.')]

    # collect pred files info
    pred_meta_files = [
        fn for fn in os.listdir(pred_meta_dir)
        if fn.endswith('.csv') and not fn.startswith('.')
    ]

    # Load evaluation metric class
    eval = evaluation_metrics.SELDMetrics(nb_frames_1s=feat_cls.nb_frames_1s(),
                                          data_gen=feat_cls)

    # Calculate scores for different splits, overlapping sound events, and impulse responses (reverberant scenes)
    # score_type = 'all', 'split', 'ov', 'ir'
    split_cnt_dict = get_nb_files(pred_meta_files, _group=score_type)

    sed_error_rate = []
    sed_f1_score = []
    doa_error = []
    doa_frame_recall = []
    seld_metric = []

    # Calculate scores across files for a given score_type
    for split_key in np.sort(list(split_cnt_dict)):
        eval.reset()  # Reset the evaluation metric parameters
        for _, pred_file in enumerate(split_cnt_dict[split_key]):
            # Load predicted output format file
            pred_dict = evaluation_metrics.load_output_format_file(
                os.path.join(pred_meta_dir, pred_file))

            # Load reference description file
            gt_desc_file_dict = feat_cls.read_desc_file(
                os.path.join(gt_meta_dir, pred_file.replace('.npy', '.csv')))

            # Generate classification labels for SELD
            gt_labels = feat_cls.get_clas_labels_for_file(gt_desc_file_dict)
            pred_labels = evaluation_metrics.output_format_dict_to_classification_labels(
                pred_dict, feat_cls)

            # Calculated SED and DOA scores
            eval.update_sed_scores(pred_labels.max(2), gt_labels.max(2))
            eval.update_doa_scores(pred_labels, gt_labels)

        # Overall SED and DOA scores
        sed_er, sed_f1 = eval.compute_sed_scores()
        doa_err, doa_fr = eval.compute_doa_scores()
        seld_metr = evaluation_metrics.compute_seld_metric([sed_er, sed_f1],
                                                           [doa_err, doa_fr])

        sed_error_rate.append(sed_er)
        sed_f1_score.append(sed_f1)
        doa_error.append(doa_err)
        doa_frame_recall.append(doa_fr)
        seld_metric.append(seld_metr)

    sed_scores = [sed_error_rate, sed_f1_score]
    doa_er_metric = [doa_error, doa_frame_recall]

    sed_scores = np.array(sed_scores).squeeze()
    doa_er_metric = np.array(doa_er_metric).squeeze()
    seld_metric = np.array(seld_metric).squeeze()

    return sed_scores, doa_er_metric, seld_metric
Esempio n. 3
0
def main(argv):
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        # test_splits = [1, 2, 3, 4]
        # val_splits = [2, 3, 4, 1]
        # train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]]
        # TODO for debug only
        test_splits = [1]
        val_splits = [1]
        train_splits = [[1, 1]]

        # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split.
        # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits
        # test_splits = [1]
        # val_splits = [2]
        # train_splits = [[3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [0]
        val_splits = [1]
        train_splits = [[2, 3, 4]]

    # ------------------  Calculate metric scores for unseen test split ---------------------------------
    print('Loading testing dataset:')
    data_gen_test = cls_data_generator.DataGenerator(
        dataset=params['dataset'],
        split=split,
        batch_size=params['batch_size'],
        seq_len=params['sequence_length'],
        feat_label_dir=params['feat_label_dir'],
        shuffle=False,
        per_file=params['dcase_output'],
        is_eval=True if params['mode'] is 'eval' else False)

    # print('\nLoading the best model and predicting results on the testing split')
    # model = load_model('{}_model.h5'.format(unique_name))
    # pred_test = model.predict_generator(
    #     generator=data_gen_test.generate(),
    #     steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(),
    #     verbose=2
    # )

    test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
    test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1])

    # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
    test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / (
        180. / def_elevation)

    if params['dcase_output']:
        # Dump results in DCASE output format for calculating final scores
        dcase_dump_folder = os.path.join(
            params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'],
                                                   params['mode']))
        cls_feature_class.create_folder(dcase_dump_folder)
        print(
            'Dumping recording-wise results in: {}'.format(dcase_dump_folder))

        test_filelist = data_gen_test.get_filelist()
        # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
        max_frames_with_content = data_gen_test.get_nb_frames()

        # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
        # zero padding in the remaining frames
        frames_per_file = data_gen_test.get_frame_per_file()

        for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
            output_file = os.path.join(
                dcase_dump_folder,
                test_filelist[file_cnt].replace('.npy', '.csv'))
            dc = file_cnt * frames_per_file
            output_dict = evaluation_metrics.regression_label_format_to_output_format(
                data_gen_test,
                test_sed_pred[dc:dc + max_frames_with_content, :],
                test_doa_pred[dc:dc + max_frames_with_content, :] * 180 /
                np.pi)
            evaluation_metrics.write_output_format_file(
                output_file, output_dict)

    if params['mode'] is 'dev':
        _, _, test_data_out = data_gen_test.get_data_sizes()
        test_gt = collect_test_labels(data_gen_test, test_data_out,
                                      params['quick_test'])
        test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
        test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])
        # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / (
            180. / def_elevation)

        test_sed_loss = evaluation_metrics.compute_sed_scores(
            test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
        test_doa_loss = evaluation_metrics.compute_doa_scores_regr(
            test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
        test_metric_loss = evaluation_metrics.compute_seld_metric(
            test_sed_loss, test_doa_loss)

        avg_scores_test.append([
            test_sed_loss[0], test_sed_loss[1], test_doa_loss[0],
            test_doa_loss[1], test_metric_loss
        ])
        print('Results on test split:')
        print('\tSELD_score: {},  '.format(test_metric_loss))
        print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
            test_doa_loss[0], test_doa_loss[1]))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
            test_sed_loss[0], test_sed_loss[1]))
    split_cnt_dict = get_nb_files(pred_files, _group=score_type) # collect files corresponding to score_type

    # Calculate scores across files for a given score_type
    for split_key in np.sort(list(split_cnt_dict)):
        eval.reset()    # Reset the evaluation metric parameters
        for pred_cnt, pred_file in enumerate(split_cnt_dict[split_key]):
            # Load predicted output format file
            pred_dict = evaluation_metrics.load_output_format_file(os.path.join(pred_output_format_files, pred_file))

            # Load reference description file
            gt_desc_file_dict = feat_cls.read_desc_file(os.path.join(ref_desc_files, pred_file.replace('.npy', '.csv')))

            # Generate classification labels for SELD
            gt_labels = feat_cls.get_clas_labels_for_file(gt_desc_file_dict)
            pred_labels = evaluation_metrics.output_format_dict_to_classification_labels(pred_dict, feat_cls)

            # Calculated SED and DOA scores
            eval.update_sed_scores(pred_labels.max(2), gt_labels.max(2))
            eval.update_doa_scores(pred_labels, gt_labels)

        # Overall SED and DOA scores
        er, f = eval.compute_sed_scores()
        doa_err, frame_recall = eval.compute_doa_scores()
        seld_scr = evaluation_metrics.compute_seld_metric([er, f], [doa_err, frame_recall])

        print('\nAverage score for {} {} data'.format(score_type, 'fold' if score_type=='all' else split_key))
        print('SELD score: {}'.format(seld_scr))
        print('SED metrics: er: {}, f:{}'.format(er, f))
        print('DOA metrics: doa error: {}, frame recall:{}'.format(doa_err, frame_recall))