def evaluate_all(prediction_path, annotation_path, yaml_path, mode='coarse'): metrics = {'coarse': {}} df_dict = evaluate(prediction_path, annotation_path, yaml_path, mode) micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True) macro_auprc, class_auprc = macro_averaged_auprc(df_dict, return_classwise=True) # Get index of first threshold that is at least 0.5 thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0] metrics[mode]["micro_auprc"] = micro_auprc metrics[mode]["micro_f1"] = eval_df["F"][thresh_0pt5_idx] metrics[mode]["macro_auprc"] = macro_auprc print("{} level evaluation:".format(mode.capitalize())) print("======================") print(" * Micro AUPRC: {}".format(metrics[mode]["micro_auprc"])) print(" * Micro F1-score (@0.5): {}".format(metrics[mode]["micro_f1"])) print(" * Macro AUPRC: {}".format(metrics[mode]["macro_auprc"])) print(" * Coarse Tag AUPRC:") metrics[mode]["class_auprc"] = {} for coarse_id, auprc in class_auprc.items(): coarse_name = taxonomy['coarse'][int(coarse_id)] metrics[mode]["class_auprc"][coarse_name] = auprc print(" - {}: {}".format(coarse_name, auprc))
def val(student, val_load, i): classes = [ "1_engine", "2_machinery-impact", "3_non-machinery-impact", "4_powered-saw", "5_alert-signal", "6_music", "7_human-voice", "8_dog" ] student.eval() predictions = pd.DataFrame(columns=[ "audio_filename", "1_engine", "2_machinery-impact", "3_non-machinery-impact", "4_powered-saw", "5_alert-signal", "6_music", "7_human-voice", "8_dog" ]) with torch.no_grad(): for j, sample in enumerate(tqdm(val_load)): student_input = sample['student'].to(device) target = sample['target'].to(device) filenames = sample['filename'] student_input = student_input.float() target = target.float() output, _ = student(student_input) output = nn.Sigmoid()(output) for k in range(output.shape[0]): curr = output[k].detach().cpu().numpy() temp = {} temp["audio_filename"] = filenames[k] for p, class_name in enumerate(classes): temp[class_name] = curr[p] predictions = predictions.append(temp, ignore_index=True) predictions.to_csv('pred/predictions_{}.csv'.format(i), index=False) df_dict = evaluate('pred/predictions_{}.csv'.format(i), 'annotations-dev.csv', 'dcase-ust-taxonomy.yaml', "coarse") micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True) macro_auprc, class_auprc = macro_averaged_auprc(df_dict, return_classwise=True) return micro_auprc
def train(annotation_path, taxonomy_path, train_feature_dir, val_feature_dir, output_dir, load_checkpoint, load_checkpoint_path, exp_id, label_mode, batch_size=32, n_epochs=100, kernel_size=3, layer_depth=[64, 128, 256, 512], chs=1, max_ckpt=20, lr=1e-3, hidden_layer_size=256, snapshot=5, num_hidden_layers=1, standardize=True, timestamp=None): """ Train and evaluate a MIL MLP model. Parameters ---------- annotation_path emb_dir output_dir label_mode batch_size num_epochs patience learning_rate hidden_layer_size l2_reg standardize timestamp random_state Returns ------- """ # Load annotations and taxonomy print("* Loading dataset.") annotation_data = pd.read_csv(annotation_path).sort_values( 'audio_filename') with open(taxonomy_path, 'r') as f: taxonomy = yaml.load(f, Loader=yaml.Loader) annotation_data_trunc = annotation_data[[ 'audio_filename', 'latitude', 'longitude', 'week', 'day', 'hour' ]].drop_duplicates() file_list = annotation_data_trunc['audio_filename'].to_list() latitude_list = annotation_data_trunc['latitude'].to_list() longitude_list = annotation_data_trunc['longitude'].to_list() week_list = annotation_data_trunc['week'].to_list() day_list = annotation_data_trunc['day'].to_list() hour_list = annotation_data_trunc['hour'].to_list() full_fine_target_labels = [ "{}-{}_{}".format(coarse_id, fine_id, fine_label) for coarse_id, fine_dict in taxonomy['fine'].items() for fine_id, fine_label in fine_dict.items() ] fine_target_labels = [ x for x in full_fine_target_labels if x.split('_')[0].split('-')[1] != 'X' ] coarse_target_labels = [ "_".join([str(k), v]) for k, v in taxonomy['coarse'].items() ] print("* Preparing training data.") # For fine, we include incomplete labels in targets for computing the loss fine_target_list = get_file_targets(annotation_data, full_fine_target_labels) coarse_target_list = get_file_targets(annotation_data, coarse_target_labels) train_file_idxs, valid_file_idxs = get_subset_split(annotation_data) if label_mode == "fine": target_list = fine_target_list labels = fine_target_labels num_classes = len(labels) y_true_num = len(full_fine_target_labels) elif label_mode == "coarse": target_list = coarse_target_list labels = coarse_target_labels num_classes = len(labels) y_true_num = num_classes else: raise ValueError("Invalid label mode: {}".format(label_mode)) X_train_meta, y_train, X_valid_meta, y_valid_meta, scaler \ = prepare_data(train_file_idxs, valid_file_idxs, latitude_list, longitude_list, week_list, day_list, hour_list, target_list, standardize=standardize) print('X_train meta shape', X_train_meta.shape) print('y_train shape', y_train.shape) print('X_valid_meta shape', X_valid_meta.shape) print('y_valid shape', y_valid_meta.shape) meta_dims = X_train_meta.shape[2] X_train = load_train_data(file_list, train_file_idxs, train_feature_dir) X_valid = load_train_data(file_list, valid_file_idxs, val_feature_dir) _, frames, bins = X_train.shape print('X_train shape', X_train.shape) print('X_valid shape', X_valid.shape) (mean_train, std_train) = calculate_scalar_of_tensor(np.concatenate(X_train, axis=0)) model = CNN9_Res_train(kernel_size, layer_depth, num_classes, hidden_layer_size) if not timestamp: timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") model_path = os.path.join(output_dir, 'exp' + exp_id) if scaler is not None: scaler_path = os.path.join(model_path, 'stdizer.pkl') with open(scaler_path, 'wb') as f: pk.dump(scaler, f) if label_mode == "fine": full_coarse_to_fine_terminal_idxs = np.cumsum( [len(fine_dict) for fine_dict in taxonomy['fine'].values()]) incomplete_fine_subidxs = [ len(fine_dict) - 1 if 'X' in fine_dict else None for fine_dict in taxonomy['fine'].values() ] coarse_to_fine_end_idxs = np.cumsum([ len(fine_dict) - 1 if 'X' in fine_dict else len(fine_dict) for fine_dict in taxonomy['fine'].values() ]) # Create loss function that only adds loss for fine labels for which # the we don't have any incomplete labels def masked_loss(y_true, y_pred): loss = None for coarse_idx in range(len(full_coarse_to_fine_terminal_idxs)): true_terminal_idx = full_coarse_to_fine_terminal_idxs[ coarse_idx] true_incomplete_subidx = incomplete_fine_subidxs[coarse_idx] pred_end_idx = coarse_to_fine_end_idxs[coarse_idx] if coarse_idx != 0: true_start_idx = full_coarse_to_fine_terminal_idxs[ coarse_idx - 1] pred_start_idx = coarse_to_fine_end_idxs[coarse_idx - 1] else: true_start_idx = 0 pred_start_idx = 0 if true_incomplete_subidx is None: true_end_idx = true_terminal_idx sub_true = y_true[:, true_start_idx:true_end_idx] sub_pred = y_pred[:, pred_start_idx:pred_end_idx] else: # Don't include incomplete label true_end_idx = true_terminal_idx - 1 true_incomplete_idx = true_incomplete_subidx + true_start_idx assert true_end_idx - true_start_idx == pred_end_idx - pred_start_idx assert true_incomplete_idx == true_end_idx # 1 if not incomplete, 0 if incomplete mask = K.expand_dims(1 - y_true[:, true_incomplete_idx]) # Mask the target and predictions. If the mask is 0, # all entries will be 0 and the BCE will be 0. # This has the effect of masking the BCE for each fine # label within a coarse label if an incomplete label exists sub_true = y_true[:, true_start_idx:true_end_idx] * mask sub_pred = y_pred[:, pred_start_idx:pred_end_idx] * mask if loss is not None: loss += K.sum(K.binary_crossentropy(sub_true, sub_pred)) else: loss = K.sum(K.binary_crossentropy(sub_true, sub_pred)) return loss loss_func = masked_loss else: def unmasked_loss(y_true, y_pred): loss = None loss = K.sum(K.binary_crossentropy(y_true, y_pred)) return loss loss_func = unmasked_loss ### placeholder x = tf.placeholder(tf.float32, shape=[None, frames, bins, chs], name='x') meta_x = tf.placeholder(tf.float32, shape=[None, meta_dims], name='meta_x') y = tf.placeholder(tf.float32, shape=[None, y_true_num], name='y') is_training = tf.placeholder(tf.bool, shape=None, name='is_training') ### net output output = model.forward(input_tensor=x, input_meta=meta_x, is_training=is_training) sigmoid_output = tf.nn.sigmoid(output, name='sigmoid_output') loss = loss_func(y, sigmoid_output) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) learning_rate = tf.Variable(float(lr), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9) with tf.control_dependencies(update_ops): # train_op = tf.train.MomentumOptimizer(learning_rate=lr,momentum=momentum).minimize(loss) train_op = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) ### start session config = tf.ConfigProto() config.gpu_options.allow_growth = True saver = tf.train.Saver(max_to_keep=max_ckpt) sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) if load_checkpoint: saver.restore(sess, load_checkpoint_path) ### tensorboard summary train_summary_dir = os.path.join(model_path, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) loss_all = tf.placeholder(tf.float32, shape=None, name='loss_all') tf.add_to_collection("loss", loss_all) loss_summary = tf.summary.scalar('loss', loss_all) val_summary_dir = os.path.join(model_path, 'summaries', 'val') val_micro_auprc_summary_writer = tf.summary.FileWriter( os.path.join(val_summary_dir, 'micro_auprc'), sess.graph) val_macro_auprc_summary_writer = tf.summary.FileWriter( os.path.join(val_summary_dir, 'macro_auprc'), sess.graph) val_val_micro_F1score_summary_writer = tf.summary.FileWriter( os.path.join(val_summary_dir, 'micro_F1score'), sess.graph) val_summary = tf.placeholder(tf.float32, shape=None, name='loss_all') tf.add_to_collection("val_summary", val_summary) val_summary_op = tf.summary.scalar('val_summary', val_summary) ### train loop print("* Training model.") class_auprc_dict = {} for epoch in range(n_epochs): train_loss = 0 n_batch = 0 for X_train_batch, X_meta_batch, y_train_batch in gen_train_batch( X_train, X_train_meta, y_train, batch_size): X_meta_batch = X_meta_batch.reshape(-1, meta_dims) X_train_batch = scale(X_train_batch, mean_train, std_train) X_train_batch = X_train_batch.reshape(-1, frames, bins, chs) _, train_loss_batch = sess.run( [train_op, loss], feed_dict={ x: X_train_batch, meta_x: X_meta_batch, y: y_train_batch, is_training: True }) train_loss += train_loss_batch n_batch += 1 train_loss = train_loss / n_batch train_summary_op = tf.summary.merge([loss_summary]) train_summaries = sess.run(train_summary_op, feed_dict={loss_all: train_loss}) train_summary_writer.add_summary(train_summaries, epoch) print("step %d" % (epoch)) print(" train loss: %f" % (train_loss)) pre = [] if ((epoch + 1) % snapshot == 0 and epoch > 0) or epoch == n_epochs - 1: sess.run(learning_rate_decay_op) for val_data_batch, val_meta_batch in gen_val_batch( X_valid, X_valid_meta, batch_size): val_meta_batch = val_meta_batch.reshape(-1, meta_dims) val_data_batch = scale(val_data_batch, mean_train, std_train) val_data_batch = val_data_batch.reshape(-1, frames, bins, chs) prediction = sess.run(sigmoid_output, feed_dict={ x: val_data_batch, meta_x: val_meta_batch, is_training: False }) pre.extend(prediction) # print(len(pre)) generate_output_file(pre, valid_file_idxs, model_path, file_list, label_mode, taxonomy) submission_path = os.path.join(model_path, "output.csv") df_dict = metrics.evaluate(prediction_path=submission_path, annotation_path=annotation_path, yaml_path=taxonomy_path, mode=label_mode) val_micro_auprc, eval_df = metrics.micro_averaged_auprc( df_dict, return_df=True) val_macro_auprc, class_auprc = metrics.macro_averaged_auprc( df_dict, return_classwise=True) thresh_idx_05 = (eval_df['threshold'] >= 0.5).nonzero()[0][0] val_micro_F1score = eval_df['F'][thresh_idx_05] val_summaries = sess.run(val_summary_op, feed_dict={val_summary: val_micro_auprc}) val_micro_auprc_summary_writer.add_summary(val_summaries, epoch) val_summaries = sess.run(val_summary_op, feed_dict={val_summary: val_macro_auprc}) val_macro_auprc_summary_writer.add_summary(val_summaries, epoch) val_summaries = sess.run( val_summary_op, feed_dict={val_summary: val_micro_F1score}) val_val_micro_F1score_summary_writer.add_summary( val_summaries, epoch) class_auprc_dict['class_auprc_' + str(epoch)] = class_auprc print('official') print('micro', val_micro_auprc) print('micro_F1', val_micro_F1score) print('macro', val_macro_auprc) print('-----save:{}-{}'.format( os.path.join(model_path, 'ckeckpoint', 'model'), epoch)) saver.save(sess, os.path.join(model_path, 'ckeckpoint', 'model'), global_step=epoch) np.save(os.path.join(model_path, 'class_auprc_dict.npy'), class_auprc_dict) sess.close()
help='Path to prediction CSV file.') parser.add_argument('annotation_path', type=str, help='Path to dataset annotation CSV file.') parser.add_argument('yaml_path', type=str, help='Path to dataset taxonomy YAML file.') args = parser.parse_args() for mode in ("fine", "coarse"): df_dict = evaluate(args.prediction_path, args.annotation_path, args.yaml_path, mode) micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True) macro_auprc, class_auprc = macro_averaged_auprc(df_dict, return_classwise=True) # Get index of first threshold that is at least 0.5 thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0] print("{} level evaluation:".format(mode.capitalize())) print("======================") print(" * Micro AUPRC: {}".format(micro_auprc)) print(" * Micro F1-score (@0.5): {}".format( eval_df["F"][thresh_0pt5_idx])) print(" * Macro AUPRC: {}".format(macro_auprc)) print(" * Coarse Tag AUPRC:") for coarse_id, auprc in class_auprc.items():
def evaluate(self, data_type, submission_path=None, annotation_path=None, yaml_path=None, max_iteration=None): '''Evaluate prediction performance. Args: data_type: 'train' | 'validate' submission_path: None | string, path submission csv annotation_path: None | string, path of reference csv yaml_path: None | string, path of yaml taxonomy file max_iteration: None | int, use maximum iteration of partial data for fast evaluation ''' generate_func = self.data_generator.generate_validate( data_type=data_type, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) output = output_dict['output'] target = output_dict['{}_target'.format(self.taxonomy_level)] target = self.get_binary_target(target) average_precision = metrics.average_precision_score(target, output, average=None) if self.verbose: logging.info('{} average precision:'.format(data_type)) for k, label in enumerate(self.labels): logging.info(' {:<40}{:.3f}'.format(label, average_precision[k])) logging.info(' {:<40}{:.3f}'.format('Average', np.mean(average_precision))) else: logging.info('{}:'.format(data_type)) logging.info(' mAP: {:.3f}'.format(np.mean(average_precision))) statistics = {} statistics['average_precision'] = average_precision # Write submission and evaluate with official evaluation tool # https://github.com/sonyc-project/urban-sound-tagging-baseline if submission_path: write_submission_csv(audio_names=output_dict['audio_name'], outputs=output, taxonomy_level=self.taxonomy_level, submission_path=submission_path) # The following code are from official evaluation code df_dict = offical_metrics.evaluate(prediction_path=submission_path, annotation_path=annotation_path, yaml_path=yaml_path, mode=self.taxonomy_level) micro_auprc, eval_df = offical_metrics.micro_averaged_auprc( df_dict, return_df=True) macro_auprc, class_auprc = offical_metrics.macro_averaged_auprc( df_dict, return_classwise=True) # Get index of first threshold that is at least 0.5 thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0] logging.info(' Official evaluation: ') logging.info( ' Micro AUPRC: {:.3f}'.format(micro_auprc)) logging.info(' Micro F1-score (@0.5): {:.3f}'.format( eval_df['F'][thresh_0pt5_idx])) logging.info( ' Macro AUPRC: {:.3f}'.format(macro_auprc)) statistics['micro_auprc'] = micro_auprc statistics['micro_f1'] = eval_df['F'][thresh_0pt5_idx] statistics['macro_auprc'] = macro_auprc return statistics
accuracy = 0 model.eval() ## get AUPRC scores with torch.no_grad(): make_prediction_csv(model, PREDICTION_PATH, mode=TRAIN_MODE, embed=True, test_path=test_dir) df_dict = evaluate(PREDICTION_PATH, ANNOTATIONS_PATH, YAML_PATH, 'coarse') df_dict_fine = evaluate(PREDICTION_PATH, ANNOTATIONS_PATH, YAML_PATH, 'fine') micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True) micro_auprc_fine, eval_df_fine = micro_averaged_auprc(df_dict_fine, return_df=True) print('Micro_AUPRC Coarse:', micro_auprc) print('Micro_AUPRC Fine:', micro_auprc_fine) if micro_auprc > best_micro_auprc_coarse or micro_auprc_fine > best_micro_auprc_fine: name, ext = os.path.splitext(PREDICTION_PATH) shutil.copy(PREDICTION_PATH, f'{name}_best_coarse={micro_auprc:.3f}_fine={micro_auprc_fine:.3f}{ext}') torch.save(model.state_dict(), f'models/{RUN_NAME}_coarse={micro_auprc:.3f}_fine={micro_auprc_fine:.3f}.ckpt') best_micro_auprc_coarse = micro_auprc best_micro_auprc_fine = micro_auprc_fine stagnation = 0 print('Best so far') else: stagnation += 1 print('Stagnation:', stagnation)