Beispiel #1
0
def evaluate_all(prediction_path, annotation_path, yaml_path, mode='coarse'):

    metrics = {'coarse': {}}

    df_dict = evaluate(prediction_path, annotation_path, yaml_path, mode)

    micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
    macro_auprc, class_auprc = macro_averaged_auprc(df_dict,
                                                    return_classwise=True)

    # Get index of first threshold that is at least 0.5
    thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0]

    metrics[mode]["micro_auprc"] = micro_auprc
    metrics[mode]["micro_f1"] = eval_df["F"][thresh_0pt5_idx]
    metrics[mode]["macro_auprc"] = macro_auprc

    print("{} level evaluation:".format(mode.capitalize()))
    print("======================")
    print(" * Micro AUPRC:           {}".format(metrics[mode]["micro_auprc"]))
    print(" * Micro F1-score (@0.5): {}".format(metrics[mode]["micro_f1"]))
    print(" * Macro AUPRC:           {}".format(metrics[mode]["macro_auprc"]))
    print(" * Coarse Tag AUPRC:")

    metrics[mode]["class_auprc"] = {}
    for coarse_id, auprc in class_auprc.items():
        coarse_name = taxonomy['coarse'][int(coarse_id)]
        metrics[mode]["class_auprc"][coarse_name] = auprc
        print("      - {}: {}".format(coarse_name, auprc))
Beispiel #2
0
def val(student, val_load, i):

    classes = [
        "1_engine", "2_machinery-impact", "3_non-machinery-impact",
        "4_powered-saw", "5_alert-signal", "6_music", "7_human-voice", "8_dog"
    ]

    student.eval()
    predictions = pd.DataFrame(columns=[
        "audio_filename", "1_engine", "2_machinery-impact",
        "3_non-machinery-impact", "4_powered-saw", "5_alert-signal", "6_music",
        "7_human-voice", "8_dog"
    ])
    with torch.no_grad():
        for j, sample in enumerate(tqdm(val_load)):
            student_input = sample['student'].to(device)
            target = sample['target'].to(device)
            filenames = sample['filename']
            student_input = student_input.float()
            target = target.float()
            output, _ = student(student_input)
            output = nn.Sigmoid()(output)
            for k in range(output.shape[0]):
                curr = output[k].detach().cpu().numpy()
                temp = {}
                temp["audio_filename"] = filenames[k]
                for p, class_name in enumerate(classes):
                    temp[class_name] = curr[p]
                predictions = predictions.append(temp, ignore_index=True)

    predictions.to_csv('pred/predictions_{}.csv'.format(i), index=False)
    df_dict = evaluate('pred/predictions_{}.csv'.format(i),
                       'annotations-dev.csv', 'dcase-ust-taxonomy.yaml',
                       "coarse")

    micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
    macro_auprc, class_auprc = macro_averaged_auprc(df_dict,
                                                    return_classwise=True)
    return micro_auprc
Beispiel #3
0
def train(annotation_path,
          taxonomy_path,
          train_feature_dir,
          val_feature_dir,
          output_dir,
          load_checkpoint,
          load_checkpoint_path,
          exp_id,
          label_mode,
          batch_size=32,
          n_epochs=100,
          kernel_size=3,
          layer_depth=[64, 128, 256, 512],
          chs=1,
          max_ckpt=20,
          lr=1e-3,
          hidden_layer_size=256,
          snapshot=5,
          num_hidden_layers=1,
          standardize=True,
          timestamp=None):
    """
    Train and evaluate a MIL MLP model.
    Parameters
    ----------
    annotation_path
    emb_dir
    output_dir
    label_mode
    batch_size
    num_epochs
    patience
    learning_rate
    hidden_layer_size
    l2_reg
    standardize
    timestamp
    random_state

    Returns
    -------
    """

    # Load annotations and taxonomy
    print("* Loading dataset.")
    annotation_data = pd.read_csv(annotation_path).sort_values(
        'audio_filename')
    with open(taxonomy_path, 'r') as f:
        taxonomy = yaml.load(f, Loader=yaml.Loader)

    annotation_data_trunc = annotation_data[[
        'audio_filename', 'latitude', 'longitude', 'week', 'day', 'hour'
    ]].drop_duplicates()
    file_list = annotation_data_trunc['audio_filename'].to_list()
    latitude_list = annotation_data_trunc['latitude'].to_list()
    longitude_list = annotation_data_trunc['longitude'].to_list()
    week_list = annotation_data_trunc['week'].to_list()
    day_list = annotation_data_trunc['day'].to_list()
    hour_list = annotation_data_trunc['hour'].to_list()

    full_fine_target_labels = [
        "{}-{}_{}".format(coarse_id, fine_id, fine_label)
        for coarse_id, fine_dict in taxonomy['fine'].items()
        for fine_id, fine_label in fine_dict.items()
    ]
    fine_target_labels = [
        x for x in full_fine_target_labels
        if x.split('_')[0].split('-')[1] != 'X'
    ]
    coarse_target_labels = [
        "_".join([str(k), v]) for k, v in taxonomy['coarse'].items()
    ]

    print("* Preparing training data.")

    # For fine, we include incomplete labels in targets for computing the loss
    fine_target_list = get_file_targets(annotation_data,
                                        full_fine_target_labels)
    coarse_target_list = get_file_targets(annotation_data,
                                          coarse_target_labels)
    train_file_idxs, valid_file_idxs = get_subset_split(annotation_data)

    if label_mode == "fine":
        target_list = fine_target_list
        labels = fine_target_labels
        num_classes = len(labels)
        y_true_num = len(full_fine_target_labels)
    elif label_mode == "coarse":
        target_list = coarse_target_list
        labels = coarse_target_labels
        num_classes = len(labels)
        y_true_num = num_classes
    else:
        raise ValueError("Invalid label mode: {}".format(label_mode))




    X_train_meta, y_train, X_valid_meta, y_valid_meta, scaler \
        = prepare_data(train_file_idxs, valid_file_idxs,
                       latitude_list, longitude_list,
                       week_list, day_list, hour_list,
                       target_list, standardize=standardize)

    print('X_train meta shape', X_train_meta.shape)
    print('y_train shape', y_train.shape)
    print('X_valid_meta shape', X_valid_meta.shape)
    print('y_valid shape', y_valid_meta.shape)

    meta_dims = X_train_meta.shape[2]

    X_train = load_train_data(file_list, train_file_idxs, train_feature_dir)
    X_valid = load_train_data(file_list, valid_file_idxs, val_feature_dir)
    _, frames, bins = X_train.shape
    print('X_train shape', X_train.shape)
    print('X_valid shape', X_valid.shape)

    (mean_train,
     std_train) = calculate_scalar_of_tensor(np.concatenate(X_train, axis=0))

    model = CNN9_Res_train(kernel_size, layer_depth, num_classes,
                           hidden_layer_size)

    if not timestamp:
        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

    model_path = os.path.join(output_dir, 'exp' + exp_id)

    if scaler is not None:
        scaler_path = os.path.join(model_path, 'stdizer.pkl')
        with open(scaler_path, 'wb') as f:
            pk.dump(scaler, f)

    if label_mode == "fine":
        full_coarse_to_fine_terminal_idxs = np.cumsum(
            [len(fine_dict) for fine_dict in taxonomy['fine'].values()])
        incomplete_fine_subidxs = [
            len(fine_dict) - 1 if 'X' in fine_dict else None
            for fine_dict in taxonomy['fine'].values()
        ]
        coarse_to_fine_end_idxs = np.cumsum([
            len(fine_dict) - 1 if 'X' in fine_dict else len(fine_dict)
            for fine_dict in taxonomy['fine'].values()
        ])

        # Create loss function that only adds loss for fine labels for which
        # the we don't have any incomplete labels
        def masked_loss(y_true, y_pred):
            loss = None
            for coarse_idx in range(len(full_coarse_to_fine_terminal_idxs)):
                true_terminal_idx = full_coarse_to_fine_terminal_idxs[
                    coarse_idx]
                true_incomplete_subidx = incomplete_fine_subidxs[coarse_idx]
                pred_end_idx = coarse_to_fine_end_idxs[coarse_idx]

                if coarse_idx != 0:
                    true_start_idx = full_coarse_to_fine_terminal_idxs[
                        coarse_idx - 1]
                    pred_start_idx = coarse_to_fine_end_idxs[coarse_idx - 1]
                else:
                    true_start_idx = 0
                    pred_start_idx = 0

                if true_incomplete_subidx is None:
                    true_end_idx = true_terminal_idx

                    sub_true = y_true[:, true_start_idx:true_end_idx]
                    sub_pred = y_pred[:, pred_start_idx:pred_end_idx]

                else:
                    # Don't include incomplete label
                    true_end_idx = true_terminal_idx - 1
                    true_incomplete_idx = true_incomplete_subidx + true_start_idx
                    assert true_end_idx - true_start_idx == pred_end_idx - pred_start_idx
                    assert true_incomplete_idx == true_end_idx

                    # 1 if not incomplete, 0 if incomplete
                    mask = K.expand_dims(1 - y_true[:, true_incomplete_idx])

                    # Mask the target and predictions. If the mask is 0,
                    # all entries will be 0 and the BCE will be 0.
                    # This has the effect of masking the BCE for each fine
                    # label within a coarse label if an incomplete label exists
                    sub_true = y_true[:, true_start_idx:true_end_idx] * mask
                    sub_pred = y_pred[:, pred_start_idx:pred_end_idx] * mask

                if loss is not None:
                    loss += K.sum(K.binary_crossentropy(sub_true, sub_pred))
                else:
                    loss = K.sum(K.binary_crossentropy(sub_true, sub_pred))

            return loss

        loss_func = masked_loss
    else:

        def unmasked_loss(y_true, y_pred):

            loss = None
            loss = K.sum(K.binary_crossentropy(y_true, y_pred))
            return loss

        loss_func = unmasked_loss

    ###     placeholder
    x = tf.placeholder(tf.float32, shape=[None, frames, bins, chs], name='x')
    meta_x = tf.placeholder(tf.float32, shape=[None, meta_dims], name='meta_x')
    y = tf.placeholder(tf.float32, shape=[None, y_true_num], name='y')
    is_training = tf.placeholder(tf.bool, shape=None, name='is_training')

    ###     net output
    output = model.forward(input_tensor=x,
                           input_meta=meta_x,
                           is_training=is_training)
    sigmoid_output = tf.nn.sigmoid(output, name='sigmoid_output')
    loss = loss_func(y, sigmoid_output)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    learning_rate = tf.Variable(float(lr), trainable=False, dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)
    with tf.control_dependencies(update_ops):
        #        train_op = tf.train.MomentumOptimizer(learning_rate=lr,momentum=momentum).minimize(loss)
        train_op = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(loss)

    ###     start session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    saver = tf.train.Saver(max_to_keep=max_ckpt)
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    if load_checkpoint:
        saver.restore(sess, load_checkpoint_path)

    ###     tensorboard summary

    train_summary_dir = os.path.join(model_path, 'summaries', 'train')
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

    loss_all = tf.placeholder(tf.float32, shape=None, name='loss_all')

    tf.add_to_collection("loss", loss_all)

    loss_summary = tf.summary.scalar('loss', loss_all)

    val_summary_dir = os.path.join(model_path, 'summaries', 'val')
    val_micro_auprc_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'micro_auprc'), sess.graph)
    val_macro_auprc_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'macro_auprc'), sess.graph)
    val_val_micro_F1score_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'micro_F1score'), sess.graph)
    val_summary = tf.placeholder(tf.float32, shape=None, name='loss_all')
    tf.add_to_collection("val_summary", val_summary)
    val_summary_op = tf.summary.scalar('val_summary', val_summary)

    ###     train loop
    print("* Training model.")
    class_auprc_dict = {}
    for epoch in range(n_epochs):
        train_loss = 0
        n_batch = 0
        for X_train_batch, X_meta_batch, y_train_batch in gen_train_batch(
                X_train, X_train_meta, y_train, batch_size):

            X_meta_batch = X_meta_batch.reshape(-1, meta_dims)
            X_train_batch = scale(X_train_batch, mean_train, std_train)
            X_train_batch = X_train_batch.reshape(-1, frames, bins, chs)
            _, train_loss_batch = sess.run(
                [train_op, loss],
                feed_dict={
                    x: X_train_batch,
                    meta_x: X_meta_batch,
                    y: y_train_batch,
                    is_training: True
                })
            train_loss += train_loss_batch
            n_batch += 1
        train_loss = train_loss / n_batch
        train_summary_op = tf.summary.merge([loss_summary])
        train_summaries = sess.run(train_summary_op,
                                   feed_dict={loss_all: train_loss})
        train_summary_writer.add_summary(train_summaries, epoch)

        print("step %d" % (epoch))
        print("   train loss: %f" % (train_loss))

        pre = []
        if ((epoch + 1) % snapshot == 0
                and epoch > 0) or epoch == n_epochs - 1:
            sess.run(learning_rate_decay_op)

            for val_data_batch, val_meta_batch in gen_val_batch(
                    X_valid, X_valid_meta, batch_size):

                val_meta_batch = val_meta_batch.reshape(-1, meta_dims)
                val_data_batch = scale(val_data_batch, mean_train, std_train)
                val_data_batch = val_data_batch.reshape(-1, frames, bins, chs)
                prediction = sess.run(sigmoid_output,
                                      feed_dict={
                                          x: val_data_batch,
                                          meta_x: val_meta_batch,
                                          is_training: False
                                      })
                pre.extend(prediction)
            # print(len(pre))
            generate_output_file(pre, valid_file_idxs, model_path, file_list,
                                 label_mode, taxonomy)
            submission_path = os.path.join(model_path, "output.csv")
            df_dict = metrics.evaluate(prediction_path=submission_path,
                                       annotation_path=annotation_path,
                                       yaml_path=taxonomy_path,
                                       mode=label_mode)
            val_micro_auprc, eval_df = metrics.micro_averaged_auprc(
                df_dict, return_df=True)
            val_macro_auprc, class_auprc = metrics.macro_averaged_auprc(
                df_dict, return_classwise=True)
            thresh_idx_05 = (eval_df['threshold'] >= 0.5).nonzero()[0][0]
            val_micro_F1score = eval_df['F'][thresh_idx_05]

            val_summaries = sess.run(val_summary_op,
                                     feed_dict={val_summary: val_micro_auprc})
            val_micro_auprc_summary_writer.add_summary(val_summaries, epoch)
            val_summaries = sess.run(val_summary_op,
                                     feed_dict={val_summary: val_macro_auprc})
            val_macro_auprc_summary_writer.add_summary(val_summaries, epoch)
            val_summaries = sess.run(
                val_summary_op, feed_dict={val_summary: val_micro_F1score})
            val_val_micro_F1score_summary_writer.add_summary(
                val_summaries, epoch)
            class_auprc_dict['class_auprc_' + str(epoch)] = class_auprc
            print('official')
            print('micro', val_micro_auprc)
            print('micro_F1', val_micro_F1score)
            print('macro', val_macro_auprc)

            print('-----save:{}-{}'.format(
                os.path.join(model_path, 'ckeckpoint', 'model'), epoch))
            saver.save(sess,
                       os.path.join(model_path, 'ckeckpoint', 'model'),
                       global_step=epoch)

            np.save(os.path.join(model_path, 'class_auprc_dict.npy'),
                    class_auprc_dict)
    sess.close()
                        help='Path to prediction CSV file.')
    parser.add_argument('annotation_path',
                        type=str,
                        help='Path to dataset annotation CSV file.')
    parser.add_argument('yaml_path',
                        type=str,
                        help='Path to dataset taxonomy YAML file.')

    args = parser.parse_args()

    for mode in ("fine", "coarse"):

        df_dict = evaluate(args.prediction_path, args.annotation_path,
                           args.yaml_path, mode)

        micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
        macro_auprc, class_auprc = macro_averaged_auprc(df_dict,
                                                        return_classwise=True)

        # Get index of first threshold that is at least 0.5
        thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0]

        print("{} level evaluation:".format(mode.capitalize()))
        print("======================")
        print(" * Micro AUPRC:           {}".format(micro_auprc))
        print(" * Micro F1-score (@0.5): {}".format(
            eval_df["F"][thresh_0pt5_idx]))
        print(" * Macro AUPRC:           {}".format(macro_auprc))
        print(" * Coarse Tag AUPRC:")

        for coarse_id, auprc in class_auprc.items():
    def evaluate(self,
                 data_type,
                 submission_path=None,
                 annotation_path=None,
                 yaml_path=None,
                 max_iteration=None):
        '''Evaluate prediction performance. 
        
        Args:
          data_type: 'train' | 'validate'
          submission_path: None | string, path submission csv
          annotation_path: None | string, path of reference csv
          yaml_path: None | string, path of yaml taxonomy file
          max_iteration: None | int, use maximum iteration of partial data for
              fast evaluation
        '''

        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True)

        output = output_dict['output']
        target = output_dict['{}_target'.format(self.taxonomy_level)]
        target = self.get_binary_target(target)

        average_precision = metrics.average_precision_score(target,
                                                            output,
                                                            average=None)

        if self.verbose:
            logging.info('{} average precision:'.format(data_type))
            for k, label in enumerate(self.labels):
                logging.info('    {:<40}{:.3f}'.format(label,
                                                       average_precision[k]))
            logging.info('    {:<40}{:.3f}'.format('Average',
                                                   np.mean(average_precision)))
        else:
            logging.info('{}:'.format(data_type))
            logging.info('    mAP: {:.3f}'.format(np.mean(average_precision)))

        statistics = {}
        statistics['average_precision'] = average_precision

        # Write submission and evaluate with official evaluation tool
        # https://github.com/sonyc-project/urban-sound-tagging-baseline
        if submission_path:
            write_submission_csv(audio_names=output_dict['audio_name'],
                                 outputs=output,
                                 taxonomy_level=self.taxonomy_level,
                                 submission_path=submission_path)

            # The following code are from official evaluation code
            df_dict = offical_metrics.evaluate(prediction_path=submission_path,
                                               annotation_path=annotation_path,
                                               yaml_path=yaml_path,
                                               mode=self.taxonomy_level)

            micro_auprc, eval_df = offical_metrics.micro_averaged_auprc(
                df_dict, return_df=True)

            macro_auprc, class_auprc = offical_metrics.macro_averaged_auprc(
                df_dict, return_classwise=True)

            # Get index of first threshold that is at least 0.5
            thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0]

            logging.info('    Official evaluation: ')
            logging.info(
                '    Micro AUPRC:           {:.3f}'.format(micro_auprc))
            logging.info('    Micro F1-score (@0.5): {:.3f}'.format(
                eval_df['F'][thresh_0pt5_idx]))
            logging.info(
                '    Macro AUPRC:           {:.3f}'.format(macro_auprc))

            statistics['micro_auprc'] = micro_auprc
            statistics['micro_f1'] = eval_df['F'][thresh_0pt5_idx]
            statistics['macro_auprc'] = macro_auprc

        return statistics
Beispiel #6
0
        accuracy = 0
        model.eval()

        ## get AUPRC scores
        with torch.no_grad():
            make_prediction_csv(model, PREDICTION_PATH, mode=TRAIN_MODE, embed=True,
                                test_path=test_dir)
            df_dict = evaluate(PREDICTION_PATH,
                               ANNOTATIONS_PATH,
                               YAML_PATH,
                               'coarse')
            df_dict_fine = evaluate(PREDICTION_PATH,
                               ANNOTATIONS_PATH,
                               YAML_PATH,
                               'fine')
            micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
            micro_auprc_fine, eval_df_fine = micro_averaged_auprc(df_dict_fine, return_df=True)
            print('Micro_AUPRC Coarse:', micro_auprc)
            print('Micro_AUPRC Fine:', micro_auprc_fine)
            if micro_auprc > best_micro_auprc_coarse or micro_auprc_fine > best_micro_auprc_fine:
                name, ext = os.path.splitext(PREDICTION_PATH)
                shutil.copy(PREDICTION_PATH, f'{name}_best_coarse={micro_auprc:.3f}_fine={micro_auprc_fine:.3f}{ext}')
                torch.save(model.state_dict(), f'models/{RUN_NAME}_coarse={micro_auprc:.3f}_fine={micro_auprc_fine:.3f}.ckpt')
                best_micro_auprc_coarse = micro_auprc
                best_micro_auprc_fine = micro_auprc_fine
                stagnation = 0
                print('Best so far')

            else:
                stagnation += 1
                print('Stagnation:', stagnation)