def visualize(self, data_type, max_iteration=None):
        '''Visualize the log mel spectrogram. 
        
        Args:
          data_type: 'train' | 'validate'
          max_iteration: None | int, use maximum iteration of partial data for
              fast evaluation
        '''

        mel_bins = config.mel_bins
        audio_duration = config.audio_duration
        frames_num = config.frames_num
        coarse_classes_num = config.coarse_classes_num
        coarse_idx_to_lb = config.coarse_idx_to_lb

        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_input=True,
                              return_target=True)

        rows_num = 3
        cols_num = 3

        fig, axs = plt.subplots(rows_num, cols_num, figsize=(10, 5))

        for k in range(coarse_classes_num):
            for n, audio_name in enumerate(output_dict['audio_name']):
                if output_dict['coarse_target'][n, k] > 0.5:
                    row = k // cols_num
                    col = k % cols_num
                    title = coarse_idx_to_lb[k]
                    title = '{}\n{}'.format(coarse_idx_to_lb[k], audio_name)
                    axs[row, col].set_title(title, color='r')
                    logmel = inverse_scale(output_dict['feature'][n],
                                           self.data_generator.scalar['mean'],
                                           self.data_generator.scalar['std'])
                    axs[row, col].matshow(logmel.T,
                                          origin='lower',
                                          aspect='auto',
                                          cmap='jet')
                    axs[row, col].set_xticks([0, frames_num])
                    axs[row, col].set_xticklabels(
                        ['0', '{:.1f} s'.format(audio_duration)])
                    axs[row, col].xaxis.set_ticks_position('bottom')
                    axs[row, col].set_ylabel('Mel bins')
                    axs[row, col].set_yticks([])
                    break

        for k in range(coarse_classes_num, rows_num * cols_num):
            row = k // cols_num
            col = k % cols_num
            axs[row, col].set_visible(False)

        fig.tight_layout(pad=0, w_pad=0, h_pad=0)
        plt.show()
Example #2
0
    def evaluate(self, data_loader):

        # Forward
        output_dict = forward(model=self.model,
                              generator=data_loader,
                              return_target=True)

        clipwise_output = output_dict[
            'clipwise_output']  # (audios_num, classes_num)
        target = output_dict['target']  # (audios_num, classes_num)

        cm = metrics.confusion_matrix(np.argmax(target, axis=-1),
                                      np.argmax(clipwise_output, axis=-1),
                                      labels=None)
        precision = calculate_precision(target, clipwise_output)
        recall = calculate_recall(target, clipwise_output)
        f_score = calculate_f_score(target, clipwise_output)
        # print('Val recall: {}'.format(recall))
        # print('Val accuracy: {}'.format(accuracy))
        # print(cm)

        statistics = {
            'precision': precision,
            'recall': recall,
            'f_score': f_score,
            'cm': cm
        }

        return statistics, output_dict
Example #3
0
    def evaluate(self, data_loader):
        """Forward evaluation data and calculate statistics.

        Args:
          data_loader: object

        Returns:
          statistics: dict, 
              {'average_precision': (classes_num,), 'auc': (classes_num,)}
        """

        # Forward
        output_dict = forward(model=self.model,
                              generator=data_loader,
                              return_target=True)

        clipwise_output = output_dict[
            'clipwise_output']  # (audios_num, classes_num)
        target = output_dict['target']  # (audios_num, classes_num)

        average_precision = metrics.average_precision_score(target,
                                                            clipwise_output,
                                                            average=None)

        auc = metrics.roc_auc_score(target, clipwise_output, average=None)

        statistics = {'average_precision': average_precision, 'auc': auc}

        return statistics
Example #4
0
    def visualize(self, data_type, source, max_iteration=None):
        '''Visualize log mel spectrogram of different sound classes.
        
        Args: 
          data_type: 'train' | 'validate'
          source: 'a' | 'b' | 'c'
          max_iteration: None | int, maximum iteration to run to speed up evaluation
        '''
        mel_bins = config.mel_bins
        audio_duration = config.audio_duration
        frames_num = config.frames_num
        labels = config.labels
        in_domain_classes_num = len(config.labels) - 1
        idx_to_lb = config.idx_to_lb

        generate_func = self.data_generator.generate_validate(
            data_type=data_type, source=source, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_input=True,
                              return_target=True)

        # Plot log mel spectrogram of different sound classes
        rows_num = 3
        cols_num = 4

        fig, axs = plt.subplots(rows_num, cols_num, figsize=(10, 5))

        for k in range(in_domain_classes_num):
            for n, audio_name in enumerate(output_dict['audio_name']):
                if output_dict['target'][n, k] == 1:
                    title = idx_to_lb[k]
                    row = k // cols_num
                    col = k % cols_num
                    axs[row, col].set_title(title, color='r')
                    logmel = inverse_scale(output_dict['feature'][n],
                                           self.data_generator.scalar['mean'],
                                           self.data_generator.scalar['std'])
                    axs[row, col].matshow(logmel.T,
                                          origin='lower',
                                          aspect='auto',
                                          cmap='jet')
                    axs[row, col].set_xticks([0, frames_num])
                    axs[row, col].set_xticklabels(
                        ['0', '{:.1f} s'.format(audio_duration)])
                    axs[row, col].xaxis.set_ticks_position('bottom')
                    axs[row, col].set_ylabel('Mel bins')
                    axs[row, col].set_yticks([])
                    break

        for k in range(in_domain_classes_num, rows_num * cols_num):
            row = k // cols_num
            col = k % cols_num
            axs[row, col].set_visible(False)

        fig.tight_layout(pad=0, w_pad=0, h_pad=0)
        plt.show()
Example #5
0
    def evaluate(self,
                 data_type,
                 metadata_dir,
                 submissions_dir,
                 max_validate_num=None):
        '''Evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          metadata_dir: string, directory of reference meta csvs
          submissions_dir: string: directory to write out submission csvs
          max_validate_num: None | int, maximum iteration to run to speed up 
              evaluation
        '''

        # Forward
        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_validate_num=max_validate_num)

        list_dict = forward(model=self.model,
                            generate_func=generate_func,
                            cuda=self.cuda,
                            return_target=True)

        # Calculate loss
        (total_loss, event_loss,
         position_loss) = self.calculate_loss(list_dict)

        logging.info('{:<20} {}: {:.3f}, {}: {:.3f}, {}: {:.3f}'
                     ''.format(data_type + ' statistics: ', 'total_loss',
                               total_loss, 'event_loss', event_loss,
                               'position_loss', position_loss))

        # Write out submission and evaluate using code provided by organizer
        write_submission(list_dict, submissions_dir)

        prediction_paths = [
            os.path.join(submissions_dir, '{}.csv'.format(dict['name']))
            for dict in list_dict
        ]

        statistics = calculate_metrics(metadata_dir, prediction_paths)

        for key in statistics.keys():
            logging.info('    {:<20} {:.3f}'.format(key + ' :',
                                                    statistics[key]))

        return statistics
    def evaluate(self, data_loader):

        # Forward
        output_dict = forward(
            model=self.model, 
            generator=data_loader, 
            return_target=True)

        clipwise_output = output_dict['clipwise_output']    # (audios_num, classes_num)
        target = output_dict['target']    # (audios_num, classes_num)

        cm = metrics.confusion_matrix(np.argmax(target, axis=-1), np.argmax(clipwise_output, axis=-1), labels=None)
        accuracy = calculate_accuracy(target, clipwise_output)

        statistics = {'accuracy': accuracy}

        return statistics
    def evaluate(self, data_loader, reference_csv_path, submission_path):
        """Evaluate AT and SED performance.

        Args:
          data_loader: object
          reference_csv_path: str, strongly labelled ground truth csv
          submission: str, path to write out submission file

        Returns:
          statistics: dict
          output_dict: dict
        """
        output_dict = forward(model=self.model,
                              data_loader=data_loader,
                              return_input=False,
                              return_target=True)

        statistics = {}

        # Clipwise statistics
        statistics['clipwise_ap'] = metrics.average_precision_score(
            output_dict['target'],
            output_dict['clipwise_output'],
            average=None)

        # Framewise statistics
        if 'strong_target' in output_dict.keys():
            statistics['framewise_ap'] = sed_average_precision(
                output_dict['strong_target'],
                output_dict['framewise_output'],
                average=None)

        # Framewise predictions to eventwise predictions
        predict_event_list = frame_prediction_to_event_prediction(
            output_dict, self.sed_params_dict)

        # Write eventwise predictions to submission file
        write_submission(predict_event_list, submission_path)

        # SED with official tool
        statistics['sed_metrics'] = official_evaluate(reference_csv_path,
                                                      submission_path)

        return statistics, output_dict
    def evaluate(self, reference_csv_path, submission_path):
        """Evaluate AT and SED performance.

        Args:
          reference_csv_path: str, strongly labelled ground truth csv
          submission: str, path to write out submission file
        """
        output_dict = forward(
            model=self.model, 
            generator=self.generator, 
            return_input=False, 
            return_target=True)

        predictions = {'clipwise_output': output_dict['clipwise_output'], 
            'framewise_output': output_dict['framewise_output']}

        statistics = {}
        
        # Weak statistics
        clipwise_ap = metrics.average_precision_score(
            output_dict['target'], output_dict['clipwise_output'], average=None)
        statistics['clipwise_ap'] = clipwise_ap
        logging.info('    clipwise mAP: {:.3f}'.format(np.mean(clipwise_ap)))

        if 'strong_target' in output_dict.keys():
            framewise_ap = sed_average_precision(output_dict['strong_target'], 
                output_dict['framewise_output'], average=None)
            statistics['framewise_ap'] = framewise_ap
            logging.info('    framewise mAP: {:.3f}'.format(np.mean(framewise_ap)))
         
        # Obtain eventwise prediction frame framewise prediction using predefined thresholds
        predict_event_list = frame_prediction_to_event_prediction(output_dict, 
            self.sed_params_dict)
        
        # Write predicted events to submission file
        write_submission(predict_event_list, submission_path)

        # SED with official tool
        results = official_evaluate(reference_csv_path, submission_path)
        logging.info('    {}'.format(results['overall']['error_rate']))
        statistics['sed_metrics'] = results

        return statistics, predictions
    def evaluate(self):

        # Forward
        output_dict = forward(
            model=self.model, 
            generator=self.generator, 
            return_target=True)

        clipwise_output = output_dict['clipwise_output']    # (audios_num, classes_num)
        target = output_dict['target']    # (audios_num, classes_num)

        average_precision = metrics.average_precision_score(
            target, clipwise_output, average=None)

        auc = metrics.roc_auc_score(target, clipwise_output, average=None)
        
        statistics = {'average_precision': average_precision, 'auc': auc}

        return statistics
Example #10
0
    def evaluate(self, data_type):

        # Forward
        list_dict = forward(
            model=self.model,
            generate_func=self.data_generator.generate_validate(data_type),
            cuda=self.cuda,
            return_target=True,
            max_validate_num=self.max_validate_num)

        # Calculate loss
        (total_loss, event_loss,
         position_loss) = self.calculate_loss(list_dict)
        logging.info('{:<20} {}: {:.3f}, {}: {:.3f}, {}: {:.3f}'
                     ''.format(data_type + ' statistics: ', 'total_loss',
                               total_loss, 'event_loss', event_loss,
                               'position_loss', position_loss))

        return list_dict
def inference_evaluation(args):
    '''Inference on evaluation data and write out submission file. 
    
    Args: 
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'leaderboard' | 'evaluation'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    holdout_fold = 'none'
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    trained_sub_dir = get_subdir(subtask, 'development')
    
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(trained_sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    submission_path = os.path.join(workspace, 'submissions', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, 
        '{}_iterations'.format(iteration), 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = EvaluationDataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        scalar=scalar, 
        batch_size=batch_size)
    
    generate_func = data_generator.generate_evaluation(data_type)

    # Inference
    output_dict = forward(model, generate_func, cuda, return_input=False, 
        return_target=False)

    # Write submission
    write_submission(output_dict, subtask, data_type, submission_path)
Example #12
0
    def evaluate(self,
                 data_type,
                 target_source,
                 max_iteration=None,
                 verbose=False):
        '''Evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          target_source: 'curated' | 'noisy'
          max_iteration: None | int, maximum iteration to run to speed up evaluation
          verbose: bool
        '''

        assert (data_type in ['train', 'validate'])
        assert (target_source in ['curated', 'noisy'])

        generate_func = self.data_generator.generate_validate(
            data_type=data_type,
            target_source=target_source,
            max_iteration=max_iteration)

        # Results of segments
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True)

        # Results of audio recordings
        result_dict = segment_prediction_to_clip_prediction(
            output_dict, average='arithmetic')

        output = result_dict['output']
        target = result_dict['target']

        # Mean average precision
        average_precision = metrics.average_precision_score(target,
                                                            output,
                                                            average=None)
        mAP = np.mean(average_precision)

        # Label-weighted label-ranking average precision
        (per_class_lwlrap,
         weight_per_class) = calculate_per_class_lwlrap(target, output)
        mean_lwlrap = np.sum(per_class_lwlrap * weight_per_class)

        logging.info('    Target source: {}, mAP: {:.3f}, mean_lwlrap: {:.3f}'
                     ''.format(target_source, mAP, mean_lwlrap))

        statistics = {
            'average_precision': average_precision,
            'per_class_lwlrap': per_class_lwlrap,
            'weight_per_class': weight_per_class
        }

        if verbose:
            for n in range(self.classes_num):
                logging.info('    {:<20}{:.3f}'.format(self.labels[n],
                                                       per_class_lwlrap[n]))
            logging.info('')

        return statistics
Example #13
0
    def visualize(self, data_type, max_validate_num=None):
        '''Visualize the log mel spectrogram, reference and prediction of 
        sound events, elevation and azimuth. 
        
        Args:
          data_type: 'train' | 'validate'
          max_validate_num: None | int, maximum iteration to run to speed up 
              evaluation
        '''

        mel_bins = config.mel_bins
        frames_per_second = config.frames_per_second
        classes_num = config.classes_num
        labels = config.labels

        # Forward
        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_validate_num=max_validate_num)

        list_dict = forward(model=self.model,
                            generate_func=generate_func,
                            cuda=self.cuda,
                            return_input=True,
                            return_target=True)

        for n, dict in enumerate(list_dict):
            print('File: {}'.format(dict['name']))

            frames_num = dict['target_event'].shape[1]
            length_in_second = frames_num / float(frames_per_second)

            fig, axs = plt.subplots(4, 2, figsize=(15, 10))
            logmel = inverse_scale(dict['feature'][0][0],
                                   self.data_generator.scalar['mean'],
                                   self.data_generator.scalar['std'])
            axs[0, 0].matshow(logmel.T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')
            axs[1, 0].matshow(dict['target_event'][0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')
            axs[2, 0].matshow(dict['output_event'][0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')

            axs[0, 0].set_title('Log mel spectrogram', color='r')
            axs[1, 0].set_title('Reference sound events', color='r')
            axs[2, 0].set_title('Predicted sound events', color='b')

            for i in range(4):
                for j in range(1):
                    axs[i, j].set_xticks([0, frames_num])
                    axs[i, j].set_xticklabels(
                        ['0', '{:.1f} s'.format(length_in_second)])
                    axs[i, j].xaxis.set_ticks_position('bottom')
                    axs[i, j].set_yticks(np.arange(classes_num))
                    axs[i, j].set_yticklabels(labels)
                    axs[i, j].yaxis.grid(color='w',
                                         linestyle='solid',
                                         linewidth=0.2)

            axs[0, 0].set_ylabel('Mel bins')
            axs[0, 0].set_yticks([0, mel_bins])
            axs[0, 0].set_yticklabels([0, mel_bins])
            axs[3, 0].set_visible(False)
            axs[0, 1].set_visible(False)
            axs[1, 1].set_visible(False)
            axs[2, 1].set_visible(False)
            axs[3, 1].set_visible(False)

            fig.tight_layout()
            plt.show()
Example #14
0
def inference_test(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      iteration: int, load model of this iteration
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool, visualize the logmel spectrogram of segments
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    holdout_fold = 'none'  # Use model trained on full data without validation
    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    test_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'test.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'
        ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'
        ''.format(holdout_fold), model_type,
        '{}_iterations.pth'.format(iteration))

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'
        ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'
        ''.format(holdout_fold), model_type, '{}_iterations_submission.csv'
        ''.format(iteration))
    create_folder(os.path.dirname(submission_path))

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = TestDataGenerator(
        test_feature_hdf5_path=test_feature_hdf5_path,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    generate_func = data_generator.generate_test()

    # Results of segments
    output_dict = forward(model=model, generate_func=generate_func, cuda=cuda)

    # Results of audio recordings
    result_dict = segment_prediction_to_clip_prediction(output_dict,
                                                        average='arithmetic')

    # Write submission
    write_submission(result_dict, submission_path)
Example #15
0
    def evaluate(self,
                 data_type,
                 iteration,
                 max_iteration=None,
                 verbose=False):
        '''Evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          max_iteration: None | int, maximum iteration to run to speed up evaluation
          verbose: bool
        '''

        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True)
        file = 'wrong_list/' + 'wrong_classification_' + str(iteration)
        output = output_dict[
            'output']  # (audios_num*28, in_domain_classes_num)
        target = output_dict[
            'target']  # (audios_num*28, in_domain_classes_num)
        #         filename = output_dict['filename']

        prob = output  # Subtask a, b use log softmax as output

        # Evaluate
        y_true = np.argmax(target, axis=-1)
        y_pred = np.argmax(prob, axis=-1)
        #         for i in range(28*20):
        #             print(prob[i*28:(i+1)*28])
        #         for i in range(28*20):
        #             print(y_true[i*28:(i+1)*28])
        #         print(np.sum(y_pred==0.))
        #         print(np.sum(y_true==0.))
        #         print(np.sum(y_pred==y_true))
        confusion_matrix = metrics.confusion_matrix(
            y_true, y_pred, labels=np.arange(self.in_domain_classes_num))

        classwise_accuracy = np.diag(confusion_matrix) \
            / np.sum(confusion_matrix, axis=-1)

        length = len(y_true) // config.audio_num
        l = length // 64 + 1  #batch_size
        rest = (len(y_true) -
                (l - 1) * 64 * config.audio_num) // config.audio_num
        z_true = []
        z_pred = []
        for i in range(length):
            x = y_true[i * config.audio_num:(i + 1) * config.audio_num]
            tag = 1
            for j in range(len(x)):
                if x[j] < 0.5:
                    tag = 0
            if tag == 0:
                z_true.append(0)
            else:
                z_true.append(1)
            x = y_pred[i * config.audio_num:(i + 1) * config.audio_num]
            tag = 1
            for j in range(len(x)):
                if x[j] < 0.5:
                    tag = 0
            if tag == 0:
                z_pred.append(0)
            else:
                z_pred.append(1)
        num = 0
        for i in range(len(z_true)):
            if (z_true[i] == z_pred[i]):
                num = num + 1
        all_acc = float(num) * 100 / float(len(z_true))
        num = 0
        for i in range(len(y_true)):
            if (y_true[i] == y_pred[i]):
                num = num + 1
        segment_acc = float(num) * 100 / float(len(y_true))

        logging.info('Data type: {}'.format(data_type))
        logging.info('    Segment Average ccuracy: {:.3f}'.format(
            np.mean(segment_acc)))
        logging.info('    All Average ccuracy: {:.3f}'.format(
            np.mean(all_acc)))

        if verbose:
            classes_num = len(classwise_accuracy)
            for n in range(classes_num):
                logging.info('{:<20}{:.3f}'.format(self.labels[n],
                                                   classwise_accuracy[n]))

            logging.info(confusion_matrix)

        statistics = {
            'accuracy': classwise_accuracy,
            'confusion_matrix': confusion_matrix
        }

        return statistics
    def evaluate(self,
                 data_type,
                 submission_path=None,
                 annotation_path=None,
                 yaml_path=None,
                 max_iteration=None):
        '''Evaluate prediction performance. 
        
        Args:
          data_type: 'train' | 'validate'
          submission_path: None | string, path submission csv
          annotation_path: None | string, path of reference csv
          yaml_path: None | string, path of yaml taxonomy file
          max_iteration: None | int, use maximum iteration of partial data for
              fast evaluation
        '''

        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True)

        output = output_dict['output']
        #        target = output_dict['{}_target'.format(self.taxonomy_level)]
        #        target = self.get_binary_target(target)
        #
        #        average_precision = metrics.average_precision_score(target, output, average=None)
        #
        #        if self.verbose:
        #            logging.info('{} average precision:'.format(data_type))
        #            for k, label in enumerate(self.labels):
        #                logging.info('    {:<40}{:.3f}'.format(label, average_precision[k]))
        #            logging.info('    {:<40}{:.3f}'.format('Average', np.mean(average_precision)))
        #        else:
        #            logging.info('{}:'.format(data_type))
        #            logging.info('    mAP: {:.3f}'.format(np.mean(average_precision)))
        #
        #        statistics = {}
        #        statistics['average_precision'] = average_precision

        # Write submission and evaluate with official evaluation tool
        # https://github.com/sonyc-project/urban-sound-tagging-baseline
        if submission_path:
            write_submission_csv(audio_names=output_dict['audio_name'],
                                 outputs=output,
                                 taxonomy_level=self.taxonomy_level,
                                 submission_path=submission_path)

            # The following code are from official evaluation code


#            df_dict = offical_metrics.evaluate(
#                prediction_path=submission_path,
#                annotation_path=annotation_path,
#                yaml_path=yaml_path,
#                mode=self.taxonomy_level)
#
#            micro_auprc, eval_df = offical_metrics.micro_averaged_auprc(
#                df_dict, return_df=True)
#
#            macro_auprc, class_auprc = offical_metrics.macro_averaged_auprc(
#                df_dict, return_classwise=True)
#
#            # Get index of first threshold that is at least 0.5
#            thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0]
#
#            logging.info('    Official evaluation: ')
#            logging.info('    Micro AUPRC:           {:.3f}'.format(micro_auprc))
#            logging.info('    Micro F1-score (@0.5): {:.3f}'.format(eval_df['F'][thresh_0pt5_idx]))
#            logging.info('    Macro AUPRC:           {:.3f}'.format(macro_auprc))
#
#            statistics['micro_auprc'] = micro_auprc
#            statistics['micro_f1'] = eval_df['F'][thresh_0pt5_idx]
#            statistics['macro_auprc'] = macro_auprc

        return submission_path
Example #17
0
    def visualize(self, data_type, max_iteration=None):
        '''Visualize logmel spectrogram, reference and prediction. 
        
        Args: 
          data_type: 'train' | 'validate'
          max_iteration: None | int, maximum iteration to run to speed up 
              evaluation
        '''
        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_iteration=max_iteration)

        mel_bins = config.mel_bins
        audio_duration = config.audio_duration
        labels = config.labels

        # Forward
        generate_func = self.data_generator.generate_validate(
            data_type=data_type)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_input=True,
                              return_target=True)

        (audios_num, frames_num,
         classes_num) = output_dict['framewise_output'].shape

        for n in range(audios_num):
            print('File: {}'.format(output_dict['audio_name'][n]))

            for k in range(classes_num):
                print('{:<20}{:<8}{:.3f}'.format(
                    labels[k], output_dict['weak_target'][n, k],
                    output_dict['clipwise_output'][n, k]))

            event_prediction = np.zeros((frames_num, classes_num))

            for k in range(classes_num):
                if output_dict['clipwise_output'][n, k] \
                    > self.sed_params_dict['sed_high_threshold']:

                    bgn_fin_pairs = activity_detection(
                        x=output_dict['framewise_output'][n, :, k],
                        thres=self.sed_params_dict['sed_high_threshold'],
                        low_thres=self.sed_params_dict['sed_low_threshold'],
                        n_smooth=self.sed_params_dict['n_smooth'],
                        n_salt=self.sed_params_dict['n_salt'])

                    for pair in bgn_fin_pairs:
                        event_prediction[pair[0]:pair[1], k] = 1

            # Plot
            fig, axs = plt.subplots(4, 1, figsize=(10, 8))
            logmel = inverse_scale(output_dict['feature'][n],
                                   self.data_generator.scalar['mean'],
                                   self.data_generator.scalar['std'])
            axs[0].matshow(logmel.T, origin='lower', aspect='auto', cmap='jet')
            if 'strong_target' in output_dict.keys():
                axs[1].matshow(output_dict['strong_target'][n].T,
                               origin='lower',
                               aspect='auto',
                               cmap='jet')
            masked_framewise_output = output_dict['framewise_output'][
                n] * output_dict['clipwise_output'][n]
            axs[2].matshow(masked_framewise_output.T,
                           origin='lower',
                           aspect='auto',
                           cmap='jet')
            axs[3].matshow(event_prediction.T,
                           origin='lower',
                           aspect='auto',
                           cmap='jet')

            axs[0].set_title('Log mel spectrogram', color='r')
            axs[1].set_title('Reference sound events', color='r')
            axs[2].set_title('Framewise prediction', color='b')
            axs[3].set_title('Eventwise prediction', color='b')

            for i in range(4):
                axs[i].set_xticks([0, frames_num])
                axs[i].set_xticklabels(
                    ['0', '{:.1f} s'.format(audio_duration)])
                axs[i].xaxis.set_ticks_position('bottom')
                axs[i].set_yticks(np.arange(classes_num))
                axs[i].set_yticklabels(labels)
                axs[i].yaxis.grid(color='w', linestyle='solid', linewidth=0.2)

            axs[0].set_ylabel('Mel bins')
            axs[0].set_yticks([0, mel_bins])
            axs[0].set_yticklabels([0, mel_bins])

            fig.tight_layout()
            plt.show()
Example #18
0
    def evaluate(self,
                 data_type,
                 metadata_path,
                 submission_path,
                 max_iteration=None):
        '''Write out submission file and evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          metadata_path: string, path of reference csv
          submission_path: string, path to write out submission
          max_iteration: None | int, maximum iteration to run to speed up 
              evaluation
        '''
        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True)

        # Evaluate audio tagging
        if 'weak_target' in output_dict:
            weak_target = output_dict['weak_target']
            clipwise_output = output_dict['clipwise_output']
            average_precision = metrics.average_precision_score(
                weak_target, clipwise_output, average=None)
            mAP = np.mean(average_precision)

            logging.info('{} statistics:'.format(data_type))
            logging.info('    Audio tagging mAP: {:.3f}'.format(mAP))

        statistics = {}
        statistics['average_precision'] = average_precision

        if 'strong_target' in output_dict:
            # Write out submission file
            write_submission(output_dict, self.sed_params_dict,
                             submission_path)

            # Evaluate SED with official tools
            reference_dict = read_csv_file_for_sed_eval_tool(metadata_path)
            predict_dict = read_csv_file_for_sed_eval_tool(submission_path)

            # Event & segment based metrics
            event_based_metric = sed_eval.sound_event.EventBasedMetrics(
                event_label_list=config.labels,
                evaluate_onset=True,
                evaluate_offset=True,
                t_collar=0.200,
                percentage_of_length=0.2)

            segment_based_metric = sed_eval.sound_event.SegmentBasedMetrics(
                event_label_list=config.labels, time_resolution=0.2)

            for audio_name in output_dict['audio_name']:
                if audio_name in reference_dict.keys():
                    ref_list = reference_dict[audio_name]
                else:
                    ref_list = []

                if audio_name in predict_dict.keys():
                    pred_list = predict_dict[audio_name]
                else:
                    pred_list = []

                event_based_metric.evaluate(ref_list, pred_list)
                segment_based_metric.evaluate(ref_list, pred_list)

            event_metrics = event_based_metric.results_class_wise_average_metrics(
            )
            f_measure = event_metrics['f_measure']['f_measure']
            error_rate = event_metrics['error_rate']['error_rate']
            deletion_rate = event_metrics['error_rate']['deletion_rate']
            insertion_rate = event_metrics['error_rate']['insertion_rate']

            statistics['event_metrics'] = {
                'f_measure': f_measure,
                'error_rate': error_rate,
                'deletion_rate': deletion_rate,
                'insertion_rate': insertion_rate
            }

            logging.info('    Event-based, classwise F score: {:.3f}, ER: '
                         '{:.3f}, Del: {:.3f}, Ins: {:.3f}'.format(
                             f_measure, error_rate, deletion_rate,
                             insertion_rate))

            segment_metrics = segment_based_metric.results_class_wise_average_metrics(
            )
            f_measure = segment_metrics['f_measure']['f_measure']
            error_rate = segment_metrics['error_rate']['error_rate']
            deletion_rate = segment_metrics['error_rate']['deletion_rate']
            insertion_rate = segment_metrics['error_rate']['insertion_rate']

            statistics['segment_metrics'] = {
                'f_measure': f_measure,
                'error_rate': error_rate,
                'deletion_rate': deletion_rate,
                'insertion_rate': insertion_rate
            }

            logging.info('    Segment based, classwise F score: {:.3f}, ER: '
                         '{:.3f}, Del: {:.3f}, Ins: {:.3f}'.format(
                             f_measure, error_rate, deletion_rate,
                             insertion_rate))

            if self.verbose:
                logging.info(event_based_metric)
                logging.info(segment_based_metric)

            return statistics
    def transcribe(self, audio, midi_path):
        """Transcribe an audio recording.

        Args:
          audio: (audio_samples,)
          midi_path: str, path to write out the transcribed MIDI.

        Returns:
          transcribed_dict, dict: {'output_dict':, ..., 'est_note_events': ..., 
            'est_pedal_events': ...}
        """

        audio = audio[None, :]  # (1, audio_samples)

        # Pad audio to be evenly divided by segment_samples
        audio_len = audio.shape[1]
        pad_len = int(np.ceil(audio_len / self.segment_samples)) \
            * self.segment_samples - audio_len

        audio = np.concatenate((audio, np.zeros((1, pad_len))), axis=1)

        # Enframe to segments
        segments = self.enframe(audio, self.segment_samples)
        """(N, segment_samples)"""

        # Forward
        output_dict = forward(self.model, segments, batch_size=1)
        """{'reg_onset_output': (N, segment_frames, classes_num), ...}"""

        # Deframe to original length
        for key in output_dict.keys():
            output_dict[key] = self.deframe(output_dict[key])[0:audio_len]
        """output_dict: {
          'reg_onset_output': (segment_frames, classes_num), 
          'reg_offset_output': (segment_frames, classes_num), 
          'frame_output': (segment_frames, classes_num), 
          'velocity_output': (segment_frames, classes_num), 
          'reg_pedal_onset_output': (segment_frames, 1), 
          'reg_pedal_offset_output': (segment_frames, 1), 
          'pedal_frame_output': (segment_frames, 1)}"""

        # Post processor
        if self.post_processor_type == 'regression':
            """Proposed high-resolution regression post processing algorithm."""
            post_processor = RegressionPostProcessor(
                self.frames_per_second,
                classes_num=self.classes_num,
                onset_threshold=self.onset_threshold,
                offset_threshold=self.offset_threshod,
                frame_threshold=self.frame_threshold,
                pedal_offset_threshold=self.pedal_offset_threshold)

        elif self.post_processor_type == 'onsets_frames':
            """Google's onsets and frames post processing algorithm. Only used 
            for comparison."""
            post_processor = OnsetsFramesPostProcessor(self.frames_per_second,
                                                       self.classes_num)

        # Post process output_dict to MIDI events
        (est_note_events, est_pedal_events) = \
            post_processor.output_dict_to_midi_events(output_dict)

        # Write MIDI events to file
        if midi_path:
            write_events_to_midi(start_time=0,
                                 note_events=est_note_events,
                                 pedal_events=est_pedal_events,
                                 midi_path=midi_path)
            print('Write out to {}'.format(midi_path))

        transcribed_dict = {
            'output_dict': output_dict,
            'est_note_events': est_note_events,
            'est_pedal_events': est_pedal_events
        }

        return transcribed_dict
Example #20
0
    def visualize(self,
                  data_type,
                  target_source,
                  save_fig_path,
                  max_iteration=None):
        '''Visualize logmel of different sound classes. 
        
        Args: 
          data_type: 'train' | 'validate'
          target_source: 'curated' | 'noisy'
          save_fig_path: string, path to save figure
          max_iteration: None | int, maximum iteration to run to speed up evaluation
        '''

        generate_func = self.data_generator.generate_validate(
            data_type=data_type,
            target_source=target_source,
            max_iteration=max_iteration)

        # Results of segments
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True,
                              return_input=True)

        target = output_dict['target']
        output = output_dict['output']
        feature = output_dict['feature']

        (audios_num, segment_frames, mel_bins) = feature.shape
        segment_duration = segment_frames / self.frames_per_second

        # Plot log mel spectrogram of different sound classes
        rows_num = 10
        cols_num = 8

        fig, axs = plt.subplots(rows_num, cols_num, figsize=(15, 15))

        for k in range(self.classes_num):
            for n, audio_name in enumerate(output_dict['audio_name']):
                if target[n, k] == 1:
                    title = self.idx_to_lb[k][0:20]
                    row = k // cols_num
                    col = k % cols_num
                    axs[row, col].set_title(title, color='r', fontsize=9)
                    logmel = inverse_scale(feature[n],
                                           self.data_generator.scalar['mean'],
                                           self.data_generator.scalar['std'])
                    axs[row, col].matshow(logmel.T,
                                          origin='lower',
                                          aspect='auto',
                                          cmap='jet')
                    axs[row, col].set_xticks([0, segment_frames])
                    axs[row, col].set_xticklabels(
                        ['0', '{:.1f} s'.format(segment_duration)], fontsize=6)
                    axs[row, col].xaxis.set_ticks_position('bottom')
                    axs[row, col].set_ylabel('Mel bins', fontsize=7)
                    axs[row, col].set_yticks([])
                    break

        for k in range(self.classes_num, rows_num * cols_num):
            row = k // cols_num
            col = k % cols_num
            axs[row, col].set_visible(False)

        plt.tight_layout(pad=0, w_pad=0, h_pad=0)
        plt.savefig(save_fig_path)
        logging.info('Save figure to {}'.format(save_fig_path))
Example #21
0
def inference_evaluation(args):
    '''Inference on evaluation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      iteration: int
      holdout_fold: 'none', which means using model trained on all development data
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    iteration = args.iteration
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    evaluate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'evaluate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, 'best2.pth')

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'best2_submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    model = Model(classes_num)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = TestDataGenerator(hdf5_path=evaluate_hdf5_path,
                                       scalar=scalar,
                                       batch_size=batch_size)

    # Forward
    output_dict = forward(model=model,
                          generate_func=data_generator.generate(),
                          cuda=cuda,
                          return_target=False)

    # Write submission
    write_submission_csv(audio_names=output_dict['audio_name'],
                         outputs=output_dict['output'],
                         taxonomy_level=taxonomy_level,
                         submission_path=submission_path)
Example #22
0
    def visualize(self, data_type):

        mel_bins = config.mel_bins
        frames_per_second = config.frames_per_second
        classes_num = config.classes_num
        labels = config.labels

        # Forward
        list_dict = forward(
            model=self.model,
            generate_func=self.data_generator.generate_validate(data_type),
            cuda=self.cuda,
            return_input=True,
            return_target=True)

        for dict in list_dict:

            print('File: {}'.format(dict['name']))

            frames_num = dict['target_event'].shape[1]
            length_in_second = frames_num / float(frames_per_second)

            fig, axs = plt.subplots(4, 2, figsize=(15, 10))
            axs[0, 0].matshow(dict['feature'][0][0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')
            axs[1, 0].matshow(dict['target_event'][0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')
            axs[2, 0].matshow(dict['output_event'][0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')
            axs[0, 1].matshow(dict['target_elevation'][0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')
            axs[1, 1].matshow(dict['target_azimuth'][0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')
            masksed_evaluation = dict['output_elevation'] * dict['output_event']
            axs[2, 1].matshow(masksed_evaluation[0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')
            masksed_azimuth = dict['output_azimuth'] * dict['output_event']
            axs[3, 1].matshow(masksed_azimuth[0].T,
                              origin='lower',
                              aspect='auto',
                              cmap='jet')

            axs[0, 0].set_title('Log mel spectrogram', color='r')
            axs[1, 0].set_title('Reference sound events', color='r')
            axs[2, 0].set_title('Predicted sound events', color='b')
            axs[0, 1].set_title('Reference elevation', color='r')
            axs[1, 1].set_title('Reference azimuth', color='r')
            axs[2, 1].set_title('Predicted elevation', color='b')
            axs[3, 1].set_title('Predicted azimuth', color='b')

            for i in range(4):
                for j in range(2):
                    axs[i, j].set_xticks([0, frames_num])
                    axs[i, j].set_xticklabels(
                        ['0', '{:.1f} s'.format(length_in_second)])
                    axs[i, j].xaxis.set_ticks_position('bottom')
                    axs[i, j].set_yticks(np.arange(classes_num))
                    axs[i, j].set_yticklabels(labels)
                    axs[i, j].yaxis.grid(color='w',
                                         linestyle='solid',
                                         linewidth=0.2)

            axs[0, 0].set_ylabel('Mel bins')
            axs[0, 0].set_yticks([0, mel_bins])
            axs[0, 0].set_yticklabels([0, mel_bins])

            axs[3, 0].set_visible(False)

            fig.tight_layout()
            plt.show()
    def evaluate(self, data_type, source, max_iteration=None, verbose=False):
        '''Evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          source: 'a' | 'b' | 'c' | 's1' | 's2' | 's3'
          max_iteration: None | int, maximum iteration to run to speed up evaluation
          verbose: bool
        '''

        generate_func = self.data_generator.generate_validate(
            data_type=data_type, source=source, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True)

        if output_dict['output'].ndim == 2:  # single scale models
            output = output_dict[
                'output']  # (audios_num, in_domain_classes_num)
            target = output_dict[
                'target']  # (audios_num, in_domain_classes_num)
            loss = output_dict['loss']
            prob = np.exp(output)
            # Evaluate
            y_true = np.argmax(target, axis=-1)
            y_pred = np.argmax(prob, axis=-1)
            confusion_matrix = metrics.confusion_matrix(
                y_true, y_pred, labels=np.arange(self.in_domain_classes_num))
            classwise_accuracy = np.diag(confusion_matrix) \
                / np.sum(confusion_matrix, axis=-1)
            logging.info('Single-Classifier:')
            logging.info('Data type: {}'.format(data_type))
            logging.info('    Average ccuracy: {:.3f}'.format(
                np.mean(classwise_accuracy)))
            logging.info('    Log loss: {:.3f}'.format(log_loss(y_true, loss)))
        else:
            for i in range(output_dict['output'].shape[1] - 1):
                output = output_dict[
                    'output'][:, i, :]  # (audios_num, in_domain_classes_num)
                target = output_dict[
                    'target']  # (audios_num, in_domain_classes_num)
                loss = output_dict['loss'][:, i, :]
                prob = np.exp(output)
                # Evaluate
                y_true = np.argmax(target, axis=-1)
                y_pred = np.argmax(prob, axis=-1)
                confusion_matrix = metrics.confusion_matrix(
                    y_true,
                    y_pred,
                    labels=np.arange(self.in_domain_classes_num))
                classwise_accuracy = np.diag(confusion_matrix) \
                    / np.sum(confusion_matrix, axis=-1)
                logging.info('Scale' + str(i + 1) + '-Classifier:')
                logging.info('Data type: {}'.format(data_type))
                logging.info('    Average ccuracy: {:.3f}'.format(
                    np.mean(classwise_accuracy)))
                logging.info('    Log loss: {:.3f}'.format(
                    log_loss(y_true, loss)))

            output = output_dict[
                'output'][:, -1, :]  # (audios_num, in_domain_classes_num)
            target = output_dict[
                'target']  # (audios_num, in_domain_classes_num)
            output = output_dict['loss'][:, -1, :]
            prob = np.exp(output)
            # Evaluate
            y_true = np.argmax(target, axis=-1)
            y_pred = np.argmax(prob, axis=-1)
            confusion_matrix = metrics.confusion_matrix(
                y_true, y_pred, labels=np.arange(self.in_domain_classes_num))
            classwise_accuracy = np.diag(confusion_matrix) \
                / np.sum(confusion_matrix, axis=-1)
            logging.info('Global-Classifier:')
            logging.info('Data type: {}'.format(data_type))
            logging.info('    Average ccuracy: {:.3f}'.format(
                np.mean(classwise_accuracy)))
            logging.info('    Log loss: {:.3f}'.format(log_loss(y_true, loss)))

        if verbose:
            classes_num = len(classwise_accuracy)
            for n in range(classes_num):
                logging.info('{:<20}{:.3f}'.format(self.labels[n],
                                                   classwise_accuracy[n]))

            logging.info(confusion_matrix)

        statistics = {
            'accuracy': classwise_accuracy,
            'confusion_matrix': confusion_matrix
        }

        return statistics
Example #24
0
    def evaluate(self, data_type, iteration, max_iteration=None, verbose=False):
        '''Evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          max_iteration: None | int, maximum iteration to run to speed up evaluation
          verbose: bool
        '''

        generate_func = self.data_generator.generate_validate(
            data_type=data_type, 
            max_iteration=max_iteration)
        
        # Forward
        output_dict = forward(
            model=self.model, 
            generate_func=generate_func, 
            cuda=self.cuda, 
            return_target=True)
        file = 'wrong_list/'+ 'wrong_classification_' + str(iteration)
        output = output_dict['output']  # (audios_num, in_domain_classes_num)
        target = output_dict['target']  # (audios_num, in_domain_classes_num)
        filename = output_dict['filename']

        prob = np.exp(output)   # Subtask a, b use log softmax as output

        
        # Evaluate
        y_true = np.argmax(target, axis=-1)
        y_pred = np.argmax(prob, axis=-1)
#         print(y_pred)
        if data_type=='validate':
            for i in range(len(y_true)):
                if y_true[i] != y_pred[i]:
                    with open(file,'a') as f:
                        audioname = filename[i]
                        true_idx = str(y_true[i])
                        pred_idx = str(y_pred[i])
                        true_label = self.idx_to_lb[y_true[i]]
                        pred_label = self.idx_to_lb[y_pred[i]]
                        f.write(audioname+'\t'+true_idx+'\t'+true_label+'\t'+pred_idx+'\t'+pred_label+'\n')
                
    
        confusion_matrix = metrics.confusion_matrix(y_true, y_pred, labels=np.arange(self.in_domain_classes_num))
  
        classwise_accuracy = np.diag(confusion_matrix) \
            / np.sum(confusion_matrix, axis=-1)
        
        logging.info('Data type: {}'.format(data_type))
        

        logging.info('    Average ccuracy: {:.3f}'.format(np.mean(classwise_accuracy)))
        
        if verbose:
            classes_num = len(classwise_accuracy)
            for n in range(classes_num):
                logging.info('{:<20}{:.3f}'.format(self.labels[n], 
                    classwise_accuracy[n]))
                    
            logging.info(confusion_matrix)

        statistics = {
            'accuracy': classwise_accuracy, 
            'confusion_matrix': confusion_matrix}

        return statistics
Example #25
0
    def evaluate(self, data_type, source, max_iteration=None, verbose=False):
        '''Evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          source: 'a' | 'b' | 'c'
          max_iteration: None | int, maximum iteration to run to speed up evaluation
          verbose: bool
        '''

        generate_func = self.data_generator.generate_validate(
            data_type=data_type, source=source, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True)

        output = output_dict['output']  # (audios_num, in_domain_classes_num)
        target = output_dict['target']  # (audios_num, in_domain_classes_num)

        if self.subtask in ['a', 'b']:
            prob = np.exp(output)  # Subtask a, b use log softmax as output
        elif self.subtask == 'c':
            prob = output  # Subtask c use sigmoid as output

        # Evaluate
        y_true = np.argmax(target, axis=-1)
        y_pred = np.argmax(prob, axis=-1)

        if self.subtask == 'c':
            for n, class_id in enumerate(y_pred):
                if prob[n, class_id] < 0.5:
                    y_pred[n] = self.lb_to_idx['unknown']

        if self.subtask in ['a', 'b']:
            confusion_matrix = metrics.confusion_matrix(
                y_true, y_pred, labels=np.arange(self.in_domain_classes_num))
        elif self.subtask == 'c':
            confusion_matrix = metrics.confusion_matrix(
                y_true, y_pred, labels=np.arange(self.all_classes_num))

        classwise_accuracy = np.diag(confusion_matrix) \
            / np.sum(confusion_matrix, axis=-1)

        logging.info('Data type: {}'.format(data_type))
        logging.info('    Source: {}'.format(source))

        if self.subtask in ['a', 'b']:
            logging.info('    Average ccuracy: {:.3f}'.format(
                np.mean(classwise_accuracy)))
        elif self.subtask == 'c':
            logging.info(
                '    In domain accuracy: {:.3f}, Unknown accuracy: {:.3f}'
                ''.format(np.mean(classwise_accuracy[0:-1]),
                          classwise_accuracy[-1]))

        if verbose:
            classes_num = len(classwise_accuracy)
            for n in range(classes_num):
                logging.info('{:<20}{:.3f}'.format(self.labels[n],
                                                   classwise_accuracy[n]))

            logging.info(confusion_matrix)

        statistics = {
            'accuracy': classwise_accuracy,
            'confusion_matrix': confusion_matrix
        }

        return statistics