def get_predictions(model, valid_dataset, decoder, save_predictions=None): for i, (input, _) in enumerate(valid_dataset): [input] = to_cuda_if_available([input]) pred_strong, _ = model(input.unsqueeze(0)) pred_strong = pred_strong.cpu() pred_strong = pred_strong.squeeze(0).detach().numpy() if i == 0: LOG.debug(pred_strong) pred_strong = ProbabilityEncoder().binarization(pred_strong, binarization_type="global_threshold", threshold=0.5) pred_strong = scipy.ndimage.filters.median_filter(pred_strong, (cfg.median_window, 1)) pred = decoder(pred_strong) pred = pd.DataFrame(pred, columns=["event_label", "onset", "offset"]) pred["filename"] = valid_dataset.filenames.iloc[i] if i == 0: LOG.debug("predictions: \n{}".format(pred)) LOG.debug("predictions strong: \n{}".format(pred_strong)) prediction_df = pred.copy() else: prediction_df = prediction_df.append(pred) if save_predictions is not None: LOG.info("Saving predictions at: {}".format(save_predictions)) prediction_df.to_csv(save_predictions, index=False, sep="\t") return prediction_df
def means(self, dataset): """ Splits a dataset in to train test validation. :param dataset: dataset, from DataLoad class, each sample is an (X, y) tuple. """ LOG.info('computing mean') start = time.time() sum_ = 0 sum_square = 0 n = 0 n_sq = 0 for sample in dataset: if type(sample) in [tuple, list] and len(sample) == 2: batch_x, _ = sample else: batch_x = sample if type(batch_x) is torch.Tensor: batch_x_arr = batch_x.numpy() else: batch_x_arr = batch_x su, nn = self.sum(batch_x_arr, axis=-1) sum_ += su n += nn su_sq, nn_sq = self.sum(batch_x_arr ** 2, axis=-1) sum_square += su_sq n_sq += nn_sq self.mean_ = sum_ / n self.mean_of_square_ = sum_square / n_sq LOG.debug('time to compute means: ' + str(time.time() - start)) return self
def get_weak_predictions(model, valid_dataset, weak_decoder, save_predictions=None): for i, (data, _) in enumerate(valid_dataset): data = to_cuda_if_available(data) pred_weak = model(data.unsqueeze(0)) pred_weak = pred_weak.cpu() pred_weak = pred_weak.squeeze(0).detach().numpy() if i == 0: LOG.debug(pred_weak) pred_weak = ProbabilityEncoder().binarization( pred_weak, binarization_type="global_threshold", threshold=0.5) pred = weak_decoder(pred_weak) pred = pd.DataFrame(pred, columns=["event_labels"]) pred["filename"] = valid_dataset.filenames.iloc[i] if i == 0: LOG.debug("predictions: \n{}".format(pred)) prediction_df = pred.copy() else: prediction_df = prediction_df.append(pred) if save_predictions is not None: LOG.info("Saving predictions at: {}".format(save_predictions)) prediction_df.to_csv(save_predictions, index=False, sep="\t") return prediction_df
def measure_embeddings(set_embed, model, emb_path, figure_path, set_name=''): df, embed = calculate_embedding(set_embed, model, savedir=emb_path, concatenate="append") df = df.dropna() embed = embed[df.index] LOG.debug("embed shape: {}".format(embed.shape)) LOG.debug("df shape: {}".format(df.shape)) tsne_emb = TSNE().fit_transform(X=embed.reshape(embed.shape[0], -1)) tsne_plots(tsne_emb, df, savefig=figure_path) scatter = scatter_ratio(embed.reshape(embed.shape[0], -1), df.reset_index()) silhouette = sklearn.metrics.silhouette_score(embed.reshape( embed.shape[0], -1), df.event_labels, metric='euclidean') # Just informative LOG.info( f"{set_name} silhouette for all classes in 2D (tsne) : " f"{sklearn.metrics.silhouette_score(df[['X', 'Y']], df.event_labels, metric='euclidean')}" ) proto = proto_acc(embed.reshape(embed.shape[0], -1), df.reset_index()) LOG.info("Proto accuracy {} : {}".format(set_name, proto)) return { "scatter" + set_name: scatter, "silhouette" + set_name: silhouette, "proto" + set_name: proto }
def train(train_loader, model, optimizer, epoch, weak_mask=None, strong_mask=None): class_criterion = nn.BCELoss() [class_criterion] = to_cuda_if_available([class_criterion]) meters = AverageMeterSet() meters.update('lr', optimizer.param_groups[0]['lr']) LOG.debug("Nb batches: {}".format(len(train_loader))) start = time.time() for i, (batch_input, target) in enumerate(train_loader): [batch_input, target] = to_cuda_if_available([batch_input, target]) LOG.debug(batch_input.mean()) strong_pred, weak_pred = model(batch_input) loss = 0 if weak_mask is not None: # Weak BCE Loss # Trick to not take unlabeled data # Todo figure out another way target_weak = target.max(-2)[0] weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask]) if i == 1: LOG.debug("target: {}".format(target.mean(-2))) LOG.debug("Target_weak: {}".format(target_weak)) LOG.debug(weak_class_loss) meters.update('Weak loss', weak_class_loss.item()) loss += weak_class_loss if strong_mask is not None: # Strong BCE loss strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask]) meters.update('Strong loss', strong_class_loss.item()) loss += strong_class_loss assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format( loss.item()) assert not loss.item() < 0, 'Loss problem, cannot be negative' meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters))
def train_triplet_epoch(loader, model_triplet, optimizer, semi_hard_input=None, semi_hard_embed=None, pit=False, margin=None, swap=False, acc_grad=False): start = time.time() loss_mean_triplet = [] nb_triplets_used = 0 nb_triplets = 0 if acc_grad: lder = loader.batch_sampler else: lder = loader # for i, samples in enumerate(concat_loader_triplet): for i, samples in enumerate(lder): optimizer.zero_grad() if acc_grad: outs = loop_batches_acc_grad(samples, loader.dataset, model_triplet, semi_hard_input, semi_hard_embed, i=i) else: outs = loop_batches(samples, model_triplet, semi_hard_input, semi_hard_embed, i=i) outputs, outputs_pos, outputs_neg = outs if i == 0: LOG.debug("output CNN shape: {}".format(outputs.shape)) LOG.debug(outputs.mean()) LOG.debug(outputs_pos.mean()) LOG.debug(outputs_neg.mean()) dist_pos, dist_neg = get_distances(outputs, outputs_pos, outputs_neg, pit, swap, ) if margin is not None: loss_triplet = torch.clamp(margin + dist_pos - dist_neg, min=0.0) else: loss_triplet = ratio_loss(dist_pos, dist_neg) pair_cnt = (loss_triplet.detach() > 0).sum().item() nb_triplets_used += pair_cnt nb_triplets += len(loss_triplet) # Normalize based on the number of pairs. if pair_cnt > 0: # loss_triplet = loss_triplet.sum() / pair_cnt loss_triplet = loss_triplet.mean() loss_triplet.backward() optimizer.step() loss_mean_triplet.append(loss_triplet.item()) else: LOG.debug("batch doesn't have any loss > 0") epoch_time = time.time() - start LOG.info("Loss: {:.4f}\t" "Time: {}\t" "\tnb_triplets used: {} / {}\t" "".format(np.mean(loss_mean_triplet), epoch_time, nb_triplets_used, nb_triplets)) ratio_triplet_used = nb_triplets_used / nb_triplets return model_triplet, loss_mean_triplet, ratio_triplet_used
def compute_strong_metrics(predictions, valid_df, pooling_time_ratio): # In seconds predictions.onset = predictions.onset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length) predictions.offset = predictions.offset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length) metric_event = event_based_evaluation_df(valid_df, predictions, t_collar=0.200, percentage_of_length=0.2) metric_segment = segment_based_evaluation_df(valid_df, predictions, time_resolution=1.) LOG.info(metric_event) LOG.info(metric_segment) return metric_event
def means(self, dataset): """ Splits a dataset in to train test validation. :param dataset: dataset, from DataLoad class, each sample is an (X, y) tuple. """ LOG.info('computing mean') start = time.time() shape = None counter = 0 for sample in dataset: if type(sample) in [tuple, list] and len(sample)==2: batch_X, _ = sample else: batch_X = sample if type(batch_X) is torch.Tensor: batch_X_arr = batch_X.numpy() else: batch_X_arr = batch_X data_square = batch_X_arr ** 2 counter += 1 if shape is None: shape = batch_X_arr.shape else: if not batch_X_arr.shape == shape: raise NotImplementedError("Not possible to add data with different shape in mean calculation yet") # assume first item will have shape info if self.mean_ is None: self.mean_ = self.mean(batch_X_arr, axis=-1) else: self.mean_ += self.mean(batch_X_arr, axis=-1) if self.mean_of_square_ is None: self.mean_of_square_ = self.mean(data_square, axis=-1) else: self.mean_of_square_ += self.mean(data_square, axis=-1) self.mean_ /= counter self.mean_of_square_ /= counter ## To be used if data different shape, but need to stop the iteration before. # rest = len(dataset) - i # if rest != 0: # weight = rest / float(i + rest) # X, y = dataset[-1] # data_square = X ** 2 # mean = mean * (1 - weight) + self.mean(X, axis=-1) * weight # mean_of_square = mean_of_square * (1 - weight) + self.mean(data_square, axis=-1) * weight LOG.debug('time to compute means: ' + str(time.time() - start)) return self
def compute_strong_metrics(predictions, valid_df, pooling_time_ratio=None): if pooling_time_ratio is not None: LOG.warning("pooling_time_ratio is deprecated, use it in get_predictions() instead.") # In seconds predictions.onset = predictions.onset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length) predictions.offset = predictions.offset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length) metric_event = event_based_evaluation_df(valid_df, predictions, t_collar=0.200, percentage_of_length=0.2) metric_segment = segment_based_evaluation_df(valid_df, predictions, time_resolution=1.) LOG.info(metric_event) LOG.info(metric_segment) return metric_event
def get_model(state, args, init_model_name=None): if init_model_name is not None and os.path.exists(init_model_name): model, optimizer, state = load_model(init_model_name, return_optimizer=True, return_state=True) else: if "conv_dropout" in args: conv_dropout = args.conv_dropout else: conv_dropout = cfg.conv_dropout cnn_args = {1} if args.fixed_segment is not None: frames = cfg.frames else: frames = None nb_layers = 4 cnn_kwargs = { "activation": cfg.activation, "conv_dropout": conv_dropout, "batch_norm": cfg.batch_norm, "kernel_size": nb_layers * [3], "padding": nb_layers * [1], "stride": nb_layers * [1], "nb_filters": [16, 16, 32, 65], "pooling": [(2, 2), (2, 2), (1, 4), (1, 2)], "aggregation": args.agg_time, "norm_out": args.norm_embed, "frames": frames, } nb_frames_staying = cfg.frames // (2**2) model = CNN(*cnn_args, **cnn_kwargs) # model.apply(weights_init) state.update({ 'model': { "name": model.__class__.__name__, 'args': cnn_args, "kwargs": cnn_kwargs, 'state_dict': model.state_dict() }, 'nb_frames_staying': nb_frames_staying }) if init_model_name is not None: save_model(state, init_model_name) pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) LOG.info( "number of parameters in the model: {}".format(pytorch_total_params)) return model, state
def extract_features_from_meta(self, csv_audio, feature_dir, subpart_data=None): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the path to the directory where the features are subpart_data: int, number of files to extract features from the csv. """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) for ind, wav_name in enumerate(df_meta.filename.unique()): if ind % 500 == 0: LOG.debug(ind) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) wav_path = os.path.join(wav_dir, wav_name) out_filename = os.path.join(feature_dir, name_only(wav_name) + ".npy") if not os.path.exists(out_filename): if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: (audio, _) = read_audio(wav_path, cfg.sample_rate) if audio.shape[0] == 0: print("File %s is corrupted!" % wav_path) else: mel_spec = self.calculate_mel_spec( audio, log_feature=self.save_log_feature) np.save(out_filename, mel_spec) LOG.debug("compute features time: %s" % (time.time() - t1)) return df_meta.reset_index(drop=True)
def test_model(state, reference_tsv_path, reduced_number_of_data=None, strore_predicitions_fname=None): dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace), base_feature_dir=os.path.join( cfg.workspace, "dataset", "features"), save_log_feature=False) crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) LOG.info(reference_tsv_path) df = dataset.initialize_and_get_df(reference_tsv_path, reduced_number_of_data) strong_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, strong_dataload, many_hot_encoder.decode_strong, pooling_time_ratio, save_predictions=strore_predicitions_fname) compute_strong_metrics(predictions, df) weak_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(weak_dataload, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
def proto_acc(embed, df): classes = ['Alarm_bell_ringing', 'Blender', 'Cat', 'Dishes', 'Dog', 'Electric_shaver_toothbrush', 'Frying', 'Running_water', 'Speech', 'Vacuum_cleaner'] vector_embed = embed.reshape(embed.shape[0], -1) classes_mean = np.zeros((10, embed.shape[-1])) for i, c in enumerate(classes): class_df = df[df.event_labels.fillna("").str.contains(c)] if not class_df.empty: class_embed = vector_embed[class_df.index] mean_class = np.mean(class_embed, axis=0) classes_mean[i] = mean_class acc_per_class = np.zeros((len(classes))) for i, c in enumerate(classes): class_df = df[df.event_labels.fillna("").str.contains(c)] if not class_df.empty: class_embed = vector_embed[class_df.index] distance_to_min = scipy.spatial.distance.cdist(class_embed, classes_mean) labels = distance_to_min.argmin(-1) acc_per_class[i] = (labels == i).mean() LOG.info(pd.DataFrame([classes, acc_per_class.tolist()]).transpose()) return acc_per_class.mean()
def trunc_pad_segment(df, fixed_segment): def apply_ps_func(row, length): duration = (row["offset"] - row["onset"]) # Choose fixed segment in the event if duration > length: ra = np.random.uniform(-1, 1) onset_bias = fixed_segment * ra row["onset"] = max(0, row["onset"] + onset_bias) # Bias the onset and the offset accordingly else: ra = np.random.rand() onset_bias = fixed_segment * ra row["onset"] = max(0, row["onset"] - onset_bias) row["offset"] = row["onset"] + fixed_segment if row["offset"] > cfg.max_len_seconds: row["offset"] = cfg.max_len_seconds row["onset"] = row["offset"] - fixed_segment return row assert "onset" in df.columns and "offset" in df.columns, "bias label only available with strong labels" LOG.info(f"Fix labels {fixed_segment} seconds") df = df.apply(apply_ps_func, axis=1, args=[fixed_segment]) return df
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None, valid_loader=None, state={}, dir_model="model", result_path="res", recompute=True): criterion_bce = nn.BCELoss() classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce) print(classif_model) early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup", init_patience=cfg.first_early_wait) save_best_call = SaveBest(val_comp="sup") # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr, # verbose=True) print(optimizer_classif) save_results = pd.DataFrame() create_folder(dir_model) if cfg.save_best: model_path_sup1 = os.path.join(dir_model, "best_model") else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) print("path of model : " + model_path_sup1) state['many_hot_encoder'] = many_hot_encoder.state_dict() if not os.path.exists(model_path_sup1) or recompute: for epoch_ in range(cfg.n_epoch_classifier): print(classif_model.training) start = time.time() loss_mean_bce = [] for i, samples in enumerate(train_loader): inputs, pred_labels = samples if i == 0: LOG.debug("classif input shape: {}".format(inputs.shape)) # zero the parameter gradients optimizer_classif.zero_grad() inputs = to_cuda_if_available(inputs) # forward + backward + optimize weak_out = classif_model(inputs) weak_out = to_cpu(weak_out) # print(output) loss_bce = criterion_bce(weak_out, pred_labels) loss_mean_bce.append(loss_bce.item()) loss_bce.backward() optimizer_classif.step() loss_mean_bce = np.mean(loss_mean_bce) classif_model.eval() n_class = len(many_hot_encoder.labels) macro_f_measure_train = get_f_measure_by_class(classif_model, n_class, train_loader) if valid_loader is not None: macro_f_measure = get_f_measure_by_class(classif_model, n_class, valid_loader) mean_macro_f_measure = np.mean(macro_f_measure) else: mean_macro_f_measure = -1 classif_model.train() print("Time to train an epoch: {}".format(time.time() - start)) # print statistics print('[%d / %d, %5d] loss: %.3f' % (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce)) results = {"train_loss": loss_mean_bce, "macro_measure_train": np.mean(macro_f_measure_train), "class_macro_train": np.array_str(macro_f_measure_train, precision=2), "macro_measure_valid": mean_macro_f_measure, "class_macro_valid": np.array_str(macro_f_measure, precision=2), } for key in results: LOG.info("\t\t ----> {} : {}".format(key, results[key])) save_results = save_results.append(results, ignore_index=True) # scheduler.step(mean_macro_f_measure) # ########## # # Callbacks # ########## state['epoch'] = epoch_ + 1 state["model"]["state_dict"] = classif_model.state_dict() state["optimizer"]["state_dict"] = optimizer_classif.state_dict() state["loss"] = loss_mean_bce state.update(results) if cfg.early_stopping is not None: if early_stopping_call.apply(mean_macro_f_measure): print("EARLY STOPPING") break if cfg.save_best and save_best_call.apply(mean_macro_f_measure): save_model(state, model_path_sup1) if cfg.save_best: LOG.info( "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val)) LOG.info("loading model from: {}".format(model_path_sup1)) classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) save_model(state, model_path_sup1) LOG.debug("model path: {}".format(model_path_sup1)) LOG.debug('Finished Training') else: classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) LOG.info("#### End classif") save_results.to_csv(result_path, sep="\t", header=True, index=False) return classif_model, state
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None): crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) # ############## # Validation # ############## crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) # # 2018 # LOG.info("Eval 2018") # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data) # # Strong # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong) # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio) # # Weak # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # Validation 2019 # LOG.info("Validation 2019 (original code)") # b_dataset = B_DatasetDcase2019Task4(cfg.workspace, # base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'), # save_log_feature=False) # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) # b_validation_df.to_csv('old.csv') # b_validation_strong = B_DataLoadDf(b_validation_df, # b_dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong, # save_predictions=strore_predicitions_fname) # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio) # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # ============================================================================================ # ============================================================================================ # ============================================================================================ dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir, local_path=cfg.workspace, exp_tag=cfg.exp_tag, save_log_feature=False) # Validation 2019 LOG.info("Validation 2019") validation_df = dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) validation_strong = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, validation_strong, many_hot_encoder.decode_strong, save_predictions=strore_predicitions_fname) vdf = validation_df.copy() vdf.filename = vdf.filename.str.replace('.npy', '.wav') pdf = predictions.copy() pdf.filename = pdf.filename.str.replace('.npy', '.wav') compute_strong_metrics(pdf, vdf, pooling_time_ratio) validation_weak = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(validation_weak, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
from utils.Transforms import ApplyLog, Unsqueeze, PadOrTrunc, ToTensor, Normalize, Compose from utils.utils import load_model, ManyHotEncoder # ########### # ## Argument # ########### t = time.time() print("Arguments have been set for a certain group of experiments, feel free to change it.") parser = argparse.ArgumentParser(description="") parser.add_argument('--subpart_data', type=int, default=None) parser.add_argument('--model_path', type=str, default=None) parser.add_argument('--embed_name', type=str, default=None) # Experiences to compare the impact of number of labaled vs unlabeled triplets # Be careful if subpart data is not None!!!!!! f_args = parser.parse_args() LOG.info(pformat(vars(f_args))) model_path = f_args.model_path assert model_path is not None, "model_path has to be defined to compute an embedding" embed_name = f_args.embed_name if embed_name is None: embed_name = model_path.split("/")[-2] ############ # Parameters experiences ########### subpart_data = f_args.subpart_data dataset = DesedSynthetic("../dcase2019", base_feature_dir="../dcase2019/features", save_log_feature=False) emb_model, state = load_model(model_path, return_state=True) epoch_model = state["epoch"] LOG.info("model loaded at epoch: {}".format(epoch_model))
optimizer.step() global_step += 1 if ema_model is not None: update_ema_variables(model, ema_model, 0.999, global_step) epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters)) print("\ncheck_cus_weak:\n", check_cus_weak / count) if __name__ == '__main__': LOG.info("MEAN TEACHER") parser = argparse.ArgumentParser(description="") parser.add_argument( "-s", '--subpart_data', type=int, default=None, dest="subpart_data", help= "Number of files to be used. Useful when testing on small number of files." ) parser.add_argument("-n", '--no_synthetic', dest='no_synthetic', action='store_true',
if train: loss.backward() optimizer.step() cnt += 1 if cnt > 0: loss_mean = loss_mean / cnt acc_mean = acc_mean / cnt else: warnings.warn("No training has been performed") return loss_mean, acc_mean if __name__ == '__main__': LOG.info(__file__) t = time.time() parser = argparse.ArgumentParser() parser.add_argument('--max-epoch', type=int, default=200) parser.add_argument('--save-epoch', type=int, default=20) parser.add_argument('--shot', type=int, default=1) # How many to get for proto parser.add_argument('--query', type=int, default=1) # How many to eval parser.add_argument('--train-way', type=int, default=10) parser.add_argument('--test-way', type=int, default=10) parser.add_argument('--n_layers_RNN', type=int, default=2) parser.add_argument('--dim_RNN', type=int, default=64) parser.add_argument('--test-only', action="store_true", default=False) parser.add_argument('--load',
def train(train_loader, model, optimizer, epoch, ema_model=None, weak_mask=None, strong_mask=None): """ One epoch of a Mean Teacher model :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch. Should return 3 values: teacher input, student input, labels :param model: torch.Module, model to be trained, should return a weak and strong prediction :param optimizer: torch.Module, optimizer used to train the model :param epoch: int, the current epoch of training :param ema_model: torch.Module, student model, should return a weak and strong prediction :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss) :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss) """ class_criterion = nn.BCELoss() ################################################## class_criterion1 = nn.BCELoss(reduction='none') ################################################## consistency_criterion = nn.MSELoss() # [class_criterion, consistency_criterion] = to_cuda_if_available( # [class_criterion, consistency_criterion]) [class_criterion, class_criterion1, consistency_criterion] = to_cuda_if_available( [class_criterion, class_criterion1, consistency_criterion]) meters = AverageMeterSet() LOG.debug("Nb batches: {}".format(len(train_loader))) start = time.time() rampup_length = len(train_loader) * cfg.n_epoch // 2 print("Train\n") # LOG.info("Weak[k] -> Weak[k]") # LOG.info("Weak[k] -> strong[k]") # print(weak_mask.start) # print(strong_mask.start) # exit() count = 0 check_cus_weak = 0 difficulty_loss = 0 loss_w = 1 LOG.info("loss paramater:{}".format(loss_w)) for i, (batch_input, ema_batch_input, target) in enumerate(train_loader): # print(batch_input.shape) # print(ema_batch_input.shape) # exit() global_step = epoch * len(train_loader) + i if global_step < rampup_length: rampup_value = ramps.sigmoid_rampup(global_step, rampup_length) else: rampup_value = 1.0 # Todo check if this improves the performance # adjust_learning_rate(optimizer, rampup_value, rampdown_value) meters.update('lr', optimizer.param_groups[0]['lr']) [batch_input, ema_batch_input, target] = to_cuda_if_available([batch_input, ema_batch_input, target]) LOG.debug("batch_input:{}".format(batch_input.mean())) # print(batch_input) # exit() # Outputs ################################################## # strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input) strong_pred_ema, weak_pred_ema, sof_ema = ema_model(ema_batch_input) sof_ema = sof_ema.detach() ################################################## strong_pred_ema = strong_pred_ema.detach() weak_pred_ema = weak_pred_ema.detach() ################################################## # strong_pred, weak_pred = model(batch_input) strong_pred, weak_pred, sof = model(batch_input) ################################################## ################################################## # custom_ema_loss = Custom_BCE_Loss(ema_batch_input, class_criterion1) if difficulty_loss == 0: LOG.info("############### Deffine Difficulty Loss ###############") difficulty_loss = 1 custom_ema_loss = Custom_BCE_Loss_difficulty(ema_batch_input, class_criterion1, paramater=loss_w) custom_ema_loss.initialize(strong_pred_ema, sof_ema) # custom_loss = Custom_BCE_Loss(batch_input, class_criterion1) custom_loss = Custom_BCE_Loss_difficulty(batch_input, class_criterion1, paramater=loss_w) custom_loss.initialize(strong_pred, sof) ################################################## # print(strong_pred.shape) # print(strong_pred) # print(weak_pred.shape) # print(weak_pred) # exit() loss = None # Weak BCE Loss # Take the max in the time axis # torch.set_printoptions(threshold=10000) # print(target[-10]) # # print(target.max(-2)) # # print(target.max(-2)[0]) # print(target.max(-1)[0][-10]) # exit() target_weak = target.max(-2)[0] if weak_mask is not None: weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask]) ema_class_loss = class_criterion(weak_pred_ema[weak_mask], target_weak[weak_mask]) print( "noraml_weak:", class_criterion(weak_pred[weak_mask], target_weak[weak_mask])) ################################################## custom_weak_class_loss = custom_loss.weak(target_weak, weak_mask) custom_ema_class_loss = custom_ema_loss.weak( target_weak, weak_mask) print("custom_weak:", custom_weak_class_loss) ################################################## count += 1 check_cus_weak += custom_weak_class_loss # print(custom_weak_class_loss.item()) if i == 0: LOG.debug("target: {}".format(target.mean(-2))) LOG.debug("Target_weak: {}".format(target_weak)) LOG.debug("Target_weak mask: {}".format( target_weak[weak_mask])) LOG.debug(custom_weak_class_loss) ### LOG.debug("rampup_value: {}".format(rampup_value)) meters.update('weak_class_loss', custom_weak_class_loss.item()) ### meters.update('Weak EMA loss', custom_ema_class_loss.item()) ### # loss = weak_class_loss loss = custom_weak_class_loss #################################################################################### # weak_class_loss = class_criterion(strong_pred[weak_mask], target[weak_mask]) # ema_class_loss = class_criterion(strong_pred_ema[weak_mask], target[weak_mask]) # # if i == 0: # # LOG.debug("target: {}".format(target.mean(-2))) # # LOG.debug("Target_weak: {}".format(target)) # # LOG.debug("Target_weak mask: {}".format(target[weak_mask])) # # LOG.debug(weak_class_loss) # # LOG.debug("rampup_value: {}".format(rampup_value)) # meters.update('weak_class_loss', weak_class_loss.item()) # meters.update('Weak EMA loss', ema_class_loss.item()) # loss = weak_class_loss #################################################################################### # Strong BCE loss if strong_mask is not None: strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask]) # meters.update('Strong loss', strong_class_loss.item()) strong_ema_class_loss = class_criterion( strong_pred_ema[strong_mask], target[strong_mask]) # meters.update('Strong EMA loss', strong_ema_class_loss.item()) print( "normal_strong:", class_criterion(strong_pred[strong_mask], target[strong_mask])) ################################################## custom_strong_class_loss = custom_loss.strong(target, strong_mask) meters.update('Strong loss', custom_strong_class_loss.item()) custom_strong_ema_class_loss = custom_ema_loss.strong( target, strong_mask) meters.update('Strong EMA loss', custom_strong_ema_class_loss.item()) print("custom_strong:", custom_strong_class_loss) ################################################## if loss is not None: # loss += strong_class_loss loss += custom_strong_class_loss else: # loss = strong_class_loss loss = custom_strong_class_loss # print("check_weak:", class_criterion1(weak_pred[weak_mask], target_weak[weak_mask]).mean()) # print("check_strong:", class_criterion1(strong_pred[strong_mask], target[strong_mask]).mean()) # print("\n") # exit() # Teacher-student consistency cost if ema_model is not None: consistency_cost = cfg.max_consistency_cost * rampup_value meters.update('Consistency weight', consistency_cost) # Take consistency about strong predictions (all data) consistency_loss_strong = consistency_cost * consistency_criterion( strong_pred, strong_pred_ema) meters.update('Consistency strong', consistency_loss_strong.item()) if loss is not None: loss += consistency_loss_strong else: loss = consistency_loss_strong meters.update('Consistency weight', consistency_cost) # Take consistency about weak predictions (all data) consistency_loss_weak = consistency_cost * consistency_criterion( weak_pred, weak_pred_ema) meters.update('Consistency weak', consistency_loss_weak.item()) if loss is not None: loss += consistency_loss_weak else: loss = consistency_loss_weak assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format( loss.item()) assert not loss.item() < 0, 'Loss problem, cannot be negative' meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if ema_model is not None: update_ema_variables(model, ema_model, 0.999, global_step) epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters)) print("\ncheck_cus_weak:\n", check_cus_weak / count)
from utils.Transforms import ApplyLog, Unsqueeze, ToTensor, View, Normalize, Compose from utils.Samplers import CategoriesSampler from pprint import pformat import config as cfg from DesedSynthetic import DesedSynthetic from evaluation_measures import get_f_measure_by_class, measure_classif from common import get_model, get_optimizer, shared_args, get_dfs, measure_embeddings from models.FullyConnected import FullyConnected from models.CombineModel import CombineModel from utils.Logger import LOG from utils.Scaler import ScalerSum from utils.utils import ManyHotEncoder, create_folder, to_cuda_if_available, EarlyStopping, SaveBest, to_cpu, \ load_model, save_model, ViewModule if __name__ == '__main__': LOG.info(__file__) t = time.time() parser = argparse.ArgumentParser() parser.add_argument('--n_layers_classif', type=int, default=1) parser.add_argument('--conv_dropout', type=float, default=cfg.conv_dropout) parser.add_argument('--dropout_classif', type=float, default=cfg.dropout_non_recurrent) parser.add_argument('--nb_layers', type=int, default=cfg.nb_layers) parser.add_argument('--pool_freq', type=int, default=cfg.pool_freq) parser.add_argument('--last_layer', type=int, default=cfg.last_layer) parser.add_argument('--epochs', type=float, default=cfg.n_epoch_classifier) parser = shared_args(parser) args = parser.parse_args()
class MyLogger(object): def debug(self, msg): pass def warning(self, msg): pass def error(self, msg): pass if __name__ == "__main__": base_missing_files_folder = ".." dataset_folder = os.path.join("..", "dataset") LOG.info("Download_data") LOG.info("\n\nOnce database is downloaded, do not forget to check your missing_files\n\n") LOG.info("You can change N_JOBS and CHUNK_SIZE to increase the download with more processes.") # Modify it with the number of process you want, but be careful, youtube can block you if you put too many. N_JOBS = 3 # Only useful when multiprocessing, # if chunk_size is high, download is faster. Be careful, progress bar only update after each chunk. CHUNK_SIZE = 10 LOG.info("Validation data") test = os.path.join(dataset_folder, "metadata", "validation", "validation.tsv") result_dir = os.path.join(dataset_folder, "audio", "validation") # read metadata file and get only one filename once df = pd.read_csv(test, header=0, sep='\t')
def extract_features_from_meta(self, csv_audio, subpart_data=None, training=False): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav subpart_data: int, number of files to extract features from the csv. """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) df_all = list() feature_fns = list() LOG.info('Extracting/loading features') LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) augmentation_funcs = [ ('orig', None), # original signal ] if training: augmentation_funcs += [ # ('lpf4k', partial(lpf, wc=4000, fs=cfg.sample_rate)), # ('lpf8k', partial(lpf, wc=8000, fs=cfg.sample_rate)), # ('lpf16k', partial(lpf, wc=16000, fs=cfg.sample_rate)), # ('ps-6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-6)), # ('ps-3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-3)), # ('ps+3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=3)), # ('ps+6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=6)), # ('ts1.25', partial(time_stretch, rate=1.25)), # ('ts1.5', partial(time_stretch, rate=1.5)), # ('amp0.5', partial(amplitude_scale, coeff=0.5)), # ('amp0.75', partial(amplitude_scale, coeff=0.75)), # ('hp0.25', partial(hp_reweight, lam=0.25)), # ('hp0.75', partial(hp_reweight, lam=0.75)) ] wav_fns = df_meta.filename.unique() flag = False for ind, wav_name in tqdm(enumerate(wav_fns), total=len(wav_fns)): if ind % 500 == 0: LOG.debug(ind) # verify the audio file is present wav_dir = self.get_audio_dir_path_from_meta(csv_audio) wav_path = os.path.join(wav_dir, wav_name) if os.path.isfile(wav_path): # defer loading audio until the need for feature extraction is verified audio = None # perform all augmentations (including no augmentation) for name, func in augmentation_funcs: if name == 'orig': out_filename = os.path.splitext(wav_name)[0] + ".npy" else: out_filename = os.path.splitext( wav_name)[0] + '_' + name + ".npy" out_path = os.path.join(self.feature_dir, out_filename) # add the metadata meta = df_meta.loc[df_meta.filename == wav_name] df_all.append(meta) # for synthetic data with time annotation of events, the meta df will have several entries for # each wav file. therefore, we need to append the feature filename len(meta) times. if len(meta) > 1: feature_fns += [out_filename] * len(meta) if flag: print('Length of meta: {}'.format(len(meta))) flag = False else: feature_fns.append(out_filename) if not os.path.exists(out_path): if audio is None: (audio, _) = read_audio(wav_path, cfg.sample_rate) if audio.shape[0] == 0: print("File %s is corrupted!" % wav_path) del feature_fns[-1] del df_all[-1] # perform any augmentation, extract features, save features # LOG.info('extracting {}'.format(out_filename)) if func is not None: mel_spec = self.calculate_mel_spec(func(audio)) else: mel_spec = self.calculate_mel_spec(audio) np.save(out_path, mel_spec) LOG.debug("compute features time: %s" % (time.time() - t1)) else: LOG.error( "File %s is in the csv file but the feature is not extracted!" % wav_path) # df_meta = df_meta.drop(df_meta[df_meta.filename == wav_name].index) # form the final DataFrame of meta data for features from original and augmented audio df_all = pd.concat(df_all).reset_index(drop=True) df_all['feature_filename'] = feature_fns return df_all
def extract_features_from_meta_segment(self, csv_audio, feature_dir, subpart_data=None, fixed_segment=None): """Extract log mel spectrogram features, but the csv needs to be strongly labeled. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the path of the features directory. subpart_data: int, number of files to extract features from the csv. fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept. If segment is True, and >label, it takes the surrounding (allow creating weak labels). """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) self.get_classes(df_meta) LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) ext_name = "_segment_" if subpart_data: ext_name += str(subpart_data) if fixed_segment is not None: LOG.debug( f" durations before: " f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}" ) ext_name += f"fix{fixed_segment}" df_meta = self.trunc_pad_segment(df_meta, fixed_segment) LOG.debug( f" durations after: " f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}" ) meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1]) csv_features = os.path.join(self.metadata_dir, meta_base + ext_name + meta_ext) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) df_features = pd.DataFrame() path_exists = os.path.exists(csv_features) if not path_exists: # Loop in all the filenames for ind, wav_name in enumerate(df_meta.filename.unique()): if ind % 500 == 0: LOG.debug(ind) wav_path = os.path.join(wav_dir, wav_name) if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted, deleting...!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: try: audio_len_sec = soundfile.info(wav_path).duration except Exception as e: print("File %s is corrupted, not added to df!" % wav_path) print(e) continue if audio_len_sec == 0: print("File %s is corrupted, not added to df!" % wav_path) else: files_exist = True # How many features we can compute from this file ? sub_df = df_meta[df_meta.filename == wav_name] cnt_max = len(sub_df) if cnt_max == 0: break base_wav_name = name_only(wav_name) ext_featname = "_seg" if fixed_segment: ext_featname += f"fix{fixed_segment}" files_exist = False # We should always recompute because of the randomness of onset offset # Check if files already exist out_filenames = [ base_wav_name + ext_featname + str(cnt) + ".npy" for cnt in range(cnt_max) ] for fname in out_filenames: fpath = os.path.join(feature_dir, fname) if not os.path.exists(fpath): files_exist = False break add_item = { "raw_filename": [], "filename": [], "event_labels": [] } for ii, (i, row) in enumerate(sub_df.iterrows()): if not pd.isna(row.event_label): if ii > 0: extnb = str(ii) else: extnb = "" out_filename = os.path.join( feature_dir, name_only(wav_name)) out_filename += ext_featname + extnb + ".npy" if not files_exist: sr = soundfile.info(wav_path).samplerate (audio, _) = read_audio(wav_path, cfg.sample_rate, start=int(row.onset * sr), stop=int(row.offset * sr)) mel_spec = self.calculate_mel_spec( audio, log_feature=self.save_log_feature) if fixed_segment: pad_trunc_length = int( fixed_segment * cfg.sample_rate // cfg.hop_length) mel_spec = pad_trunc_seq( mel_spec, pad_trunc_length) np.save(out_filename, mel_spec) add_item["raw_filename"].append(wav_name) add_item["filename"].append(out_filename) add_item["event_labels"].append( row["event_label"]) df_features = df_features.append( pd.DataFrame(add_item), ignore_index=True) df_features.to_csv(csv_features, sep="\t", header=True, index=False) df_features = pd.read_csv( csv_features, sep="\t") # Otherwise event_labels is "" and not NaN else: df_features = self.get_df_from_meta( csv_features) # No subpart data because should be in the name LOG.debug("compute features time: %s" % (time.time() - t1)) return df_features
) if margin is not None: triplet_loss = torch.clamp(margin + dist_pos - dist_neg, min=0.0).mean() else: triplet_loss = ratio_loss(dist_pos, dist_neg).mean() triplet_loss = to_cpu(triplet_loss) validation_loss.append(triplet_loss.item()) validation_loss = np.mean(validation_loss) triplet_model.train() return validation_loss if __name__ == '__main__': LOG.info(__file__) # ########### # ## Argument # ########### t = time.time() print("Arguments have been set for a certain group of experiments, feel free to change it.") parser = argparse.ArgumentParser(description="") parser.add_argument('--margin', type=float, default=None, dest="margin") parser.add_argument('--type_positive', type=str, default="nearest", dest="type_positive") parser.add_argument('--type_negative', type=str, default="semi_hard", dest="type_negative") # Experiences to compare the impact of number of labaled vs unlabeled triplets # Be careful if subpart data is not None!!!!!! parser.add_argument('--nb_labeled_triplets', type=int, default=None, dest="nb_labeled_triplets") parser.add_argument('--nb_unlabeled_triplets', type=int, default=None, dest="nb_unlabeled_triplets") parser.add_argument('--pit', action="store_true", default=False) parser.add_argument('--swap', action="store_true", default=False)
meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters)) if __name__ == '__main__': LOG.info("Simple CRNNs") parser = argparse.ArgumentParser(description="") parser.add_argument( "-s", '--subpart_data', type=int, default=None, dest="subpart_data", help= "Number of files to be used. Useful when testing on small number of files." ) parser.add_argument("-n", '--no_weak', dest='no_weak', action='store_true', default=False,
optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if ema_model is not None: update_ema_variables(model, ema_model, 0.999, global_step) epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters)) if __name__ == '__main__': LOG.info("MEAN TEACHER") parser = argparse.ArgumentParser(description="") parser.add_argument( "-s", '--subpart_data', type=int, default=None, dest="subpart_data", help= "Number of files to be used. Useful when testing on small number of files." ) parser.add_argument("-n", '--no_synthetic', dest='no_synthetic', action='store_true',
def extract_features_from_meta_frames(self, csv_audio, feature_dir, frames_in_sec, subpart_data=None): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the directory where the features are or will be created subpart_data: int, number of files to extract features from the csv. frames_in_sec: int, number of frames to take for a subsegment. """ frames = int(frames_in_sec * cfg.sample_rate / cfg.hop_length) t1 = time.time() df_meta = pd.read_csv(csv_audio, header=0, sep="\t") LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) # Csv to store the features ext_name = "_" + str(frames) if subpart_data is not None and subpart_data < len( df_meta.filename.unique()): ext_name += "_sub" + str(subpart_data) df_meta = self.get_subpart_data(df_meta, subpart_data) self.get_classes(df_meta) meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1]) csv_features = os.path.join(self.metadata_dir, meta_base + ext_name + meta_ext) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) df_features = pd.DataFrame() path_exists = os.path.exists(csv_features) if not path_exists: LOG.debug("Creating new feature df") # Loop in all the filenames cnt_new_features = 0 for ind, wav_name in enumerate(df_meta.filename.unique()): wav_path = os.path.join(wav_dir, wav_name) if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted, deleting...!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: try: audio_len_sec = soundfile.info(wav_path).duration except Exception as e: print("File %s is corrupted, not added to df!" % wav_path) print(e) continue if audio_len_sec == 0: print("File %s is corrupted, not added to df!" % wav_path) else: files_exist = True # How many features we can compute from this file ? cnt_max = min( int(audio_len_sec // frames_in_sec), int(cfg.max_len_seconds // frames_in_sec)) if cnt_max == 0: cnt_max = 1 base_wav_name = os.path.join(feature_dir, name_only(wav_name)) # Check if files already exist out_filenames = [ base_wav_name + "fr" + str(frames) + "_" + str(cnt * frames) + "-" + str( (cnt + 1) * frames) + ".npy" for cnt in range(cnt_max) ] for fname in out_filenames: if not os.path.exists(fname): files_exist = False break if not files_exist: if cnt_new_features % 500 == 0: LOG.debug(f"new features, {cnt_new_features}") cnt_new_features += 1 audio, cnt_max = self.get_features( wav_path, feature_dir, frames) out_filenames = [ base_wav_name + "fr" + str(frames) + "_" + str(cnt * frames) + "-" + str( (cnt + 1) * frames) + ".npy" for cnt in range(cnt_max) ] # features label to add to the dataframe add_item = self.get_labels(ind, df_meta, wav_name, frames, out_filenames) df_features = df_features.append( pd.DataFrame(add_item), ignore_index=True) LOG.info(csv_features) df_features.to_csv(csv_features, sep="\t", header=True, index=False) df_features = pd.read_csv( csv_features, sep="\t") # Otherwise event_labels is "" and not NaN else: df_features = self.get_df_from_meta( csv_features) # No subpart data because should be in the name LOG.debug("compute features time: %s" % (time.time() - t1)) return df_features
def download_file(result_dir, filename): """ download a file from youtube given an audioSet filename. (It takes only a part of the file thanks to information provided in the filename) Parameters ---------- result_dir : str, result directory which will contain the downloaded file filename : str, AudioSet filename to download Return ------ list : list, Empty list if the file is downloaded, otherwise contains the filename and the error associated """ LOG.debug(filename) tmp_filename = "" query_id = filename[1:12] segment_start = filename[13:-4].split('_')[0] segment_end = filename[13:-4].split('_')[1] audio_container = AudioContainer() # Define download parameters ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': TMP_FOLDER+'%(id)s.%(ext)s', 'noplaylist': True, 'quiet': True, 'prefer_ffmpeg': True, 'logger': MyLogger(), 'audioformat': 'wav' } try: # Download file with youtube_dl.YoutubeDL(ydl_opts) as ydl: meta = ydl.extract_info( 'https://www.youtube.com/watch?v={query_id}'.format(query_id=query_id), download=True) audio_formats = [f for f in meta["formats"] if f.get('vcodec') == 'none'] if audio_formats is []: return [filename, "no audio format available"] # get the best audio format best_audio_format = audio_formats[-1] tmp_filename = TMP_FOLDER + query_id + "." + best_audio_format["ext"] audio_container.load(filename=tmp_filename, fs=44100, res_type='kaiser_best', start=float(segment_start), stop=float(segment_end)) # Save segmented audio audio_container.filename = filename audio_container.detect_file_format() audio_container.save(filename=os.path.join(result_dir, filename)) #Remove temporary file os.remove(tmp_filename) return [] except (KeyboardInterrupt, SystemExit): # Remove temporary files and current audio file. for fpath in glob.glob(TMP_FOLDER + query_id + "*"): os.remove(fpath) raise # youtube-dl error, file often removed except (ExtractorError, DownloadError, OSError) as e: if os.path.exists(tmp_filename): os.remove(tmp_filename) return [filename, str(e)] # multiprocessing can give this error except IndexError as e: if os.path.exists(tmp_filename): os.remove(tmp_filename) LOG.info(filename) LOG.info(str(e)) return [filename, "Index Error"]
def train(cfg, train_loader, model, optimizer, epoch, ema_model=None, weak_mask=None, strong_mask=None): """ One epoch of a Mean Teacher model :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch. Should return 3 values: teacher input, student input, labels :param model: torch.Module, model to be trained, should return a weak and strong prediction :param optimizer: torch.Module, optimizer used to train the model :param epoch: int, the current epoch of training :param ema_model: torch.Module, student model, should return a weak and strong prediction :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss) :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss) """ class_criterion = nn.BCELoss() consistency_criterion_strong = nn.MSELoss() lds_criterion = LDSLoss(xi=cfg.vat_xi, eps=cfg.vat_eps, n_power_iter=cfg.vat_n_power_iter) [class_criterion, consistency_criterion_strong, lds_criterion] = to_cuda_if_available( [class_criterion, consistency_criterion_strong, lds_criterion]) meters = AverageMeterSet() LOG.debug("Nb batches: {}".format(len(train_loader))) start = time.time() rampup_length = len(train_loader) * cfg.n_epoch // 2 for i, (batch_input, ema_batch_input, target) in enumerate(train_loader): global_step = epoch * len(train_loader) + i if global_step < rampup_length: rampup_value = ramps.sigmoid_rampup(global_step, rampup_length) else: rampup_value = 1.0 # Todo check if this improves the performance # adjust_learning_rate(optimizer, rampup_value, rampdown_value) meters.update('lr', optimizer.param_groups[0]['lr']) [batch_input, ema_batch_input, target] = to_cuda_if_available([batch_input, ema_batch_input, target]) LOG.debug(batch_input.mean()) # Outputs strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input) strong_pred_ema = strong_pred_ema.detach() weak_pred_ema = weak_pred_ema.detach() strong_pred, weak_pred = model(batch_input) loss = None # Weak BCE Loss # Take the max in axis 2 (assumed to be time) if len(target.shape) > 2: target_weak = target.max(-2)[0] else: target_weak = target if weak_mask is not None: weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask]) ema_class_loss = class_criterion(weak_pred_ema[weak_mask], target_weak[weak_mask]) if i == 0: LOG.debug("target: {}".format(target.mean(-2))) LOG.debug("Target_weak: {}".format(target_weak)) LOG.debug("Target_weak mask: {}".format( target_weak[weak_mask])) LOG.debug(weak_class_loss) LOG.debug("rampup_value: {}".format(rampup_value)) meters.update('weak_class_loss', weak_class_loss.item()) meters.update('Weak EMA loss', ema_class_loss.item()) loss = weak_class_loss # Strong BCE loss if strong_mask is not None: strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask]) meters.update('Strong loss', strong_class_loss.item()) strong_ema_class_loss = class_criterion( strong_pred_ema[strong_mask], target[strong_mask]) meters.update('Strong EMA loss', strong_ema_class_loss.item()) if loss is not None: loss += strong_class_loss else: loss = strong_class_loss # Teacher-student consistency cost if ema_model is not None: consistency_cost = cfg.max_consistency_cost * rampup_value meters.update('Consistency weight', consistency_cost) # Take only the consistence with weak and unlabel consistency_loss_strong = consistency_cost * consistency_criterion_strong( strong_pred, strong_pred_ema) meters.update('Consistency strong', consistency_loss_strong.item()) if loss is not None: loss += consistency_loss_strong else: loss = consistency_loss_strong meters.update('Consistency weight', consistency_cost) # Take only the consistence with weak and unlabel consistency_loss_weak = consistency_cost * consistency_criterion_strong( weak_pred, weak_pred_ema) meters.update('Consistency weak', consistency_loss_weak.item()) if loss is not None: loss += consistency_loss_weak else: loss = consistency_loss_weak # LDS loss if cfg.vat_enabled: lds_loss = cfg.vat_coeff * lds_criterion(model, batch_input, weak_pred) LOG.info('loss: {:.3f}, lds loss: {:.3f}'.format( loss, cfg.vat_coeff * lds_loss.detach().cpu().numpy())) loss += lds_loss else: if i % 25 == 0: LOG.info('loss: {:.3f}'.format(loss)) assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format( loss.item()) assert not loss.item() < 0, 'Loss problem, cannot be negative' meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if ema_model is not None: update_ema_variables(model, ema_model, 0.999, global_step) epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters))