def get_subpart_data(df, subpart_data): column = "filename" if not subpart_data > len(df[column].unique()): filenames = df[column].drop_duplicates().sample(subpart_data, random_state=10) df = df[df[column].isin(filenames)].reset_index(drop=True) LOG.debug( "Taking subpart of the data, len : {}, df_len: {}".format( subpart_data, len(df))) return df
def means(self, dataset): """ Splits a dataset in to train test validation. :param dataset: dataset, from DataLoad class, each sample is an (X, y) tuple. """ LOG.info('computing mean') start = time.time() shape = None counter = 0 for sample in dataset: if type(sample) in [tuple, list] and len(sample)==2: batch_X, _ = sample else: batch_X = sample if type(batch_X) is torch.Tensor: batch_X_arr = batch_X.numpy() else: batch_X_arr = batch_X data_square = batch_X_arr ** 2 counter += 1 if shape is None: shape = batch_X_arr.shape else: if not batch_X_arr.shape == shape: raise NotImplementedError("Not possible to add data with different shape in mean calculation yet") # assume first item will have shape info if self.mean_ is None: self.mean_ = self.mean(batch_X_arr, axis=-1) else: self.mean_ += self.mean(batch_X_arr, axis=-1) if self.mean_of_square_ is None: self.mean_of_square_ = self.mean(data_square, axis=-1) else: self.mean_of_square_ += self.mean(data_square, axis=-1) self.mean_ /= counter self.mean_of_square_ /= counter ## To be used if data different shape, but need to stop the iteration before. # rest = len(dataset) - i # if rest != 0: # weight = rest / float(i + rest) # X, y = dataset[-1] # data_square = X ** 2 # mean = mean * (1 - weight) + self.mean(X, axis=-1) * weight # mean_of_square = mean_of_square * (1 - weight) + self.mean(data_square, axis=-1) * weight LOG.debug('time to compute means: ' + str(time.time() - start)) return self
def train(train_loader, model, optimizer, epoch, weak_mask=None, strong_mask=None): class_criterion = nn.BCELoss() [class_criterion] = to_cuda_if_available([class_criterion]) meters = AverageMeterSet() meters.update('lr', optimizer.param_groups[0]['lr']) LOG.debug("Nb batches: {}".format(len(train_loader))) start = time.time() for i, (batch_input, target) in enumerate(train_loader): [batch_input, target] = to_cuda_if_available([batch_input, target]) LOG.debug(batch_input.mean()) strong_pred, weak_pred = model(batch_input) loss = 0 if weak_mask is not None: # Weak BCE Loss # Trick to not take unlabeled data # Todo figure out another way target_weak = target.max(-2)[0] weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask]) if i == 1: LOG.debug("target: {}".format(target.mean(-2))) LOG.debug("Target_weak: {}".format(target_weak)) LOG.debug(weak_class_loss) meters.update('Weak loss', weak_class_loss.item()) loss += weak_class_loss if strong_mask is not None: # Strong BCE loss strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask]) meters.update('Strong loss', strong_class_loss.item()) loss += strong_class_loss assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format(loss.item()) assert not loss.item() < 0, 'Loss problem, cannot be negative' meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() epoch_time = time.time() - start LOG.info( 'Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format( epoch, epoch_time, meters=meters))
def loop_batches_acc_grad(indexes, dataset, model_triplet, semi_hard_input=None, semi_hard_embed=None, i=0): out = [] out_pos = [] out_neg = [] # zero the parameter gradients for j, ind in enumerate(indexes): samples = dataset[ind] inputs, inputs_pos, inputs_neg, pred_labels = samples inputs, inputs_pos = to_cuda_if_available(inputs, inputs_pos) if i < 2: LOG.debug("input shape: {}".format(inputs.shape)) if semi_hard_input is not None or semi_hard_embed is not None: assert semi_hard_input is not None, "semi_hard_input and semi_hard_embed should be defined" assert semi_hard_embed is not None, "semi_hard_input and semi_hard_embed should be defined" model_triplet.eval() embed = get_embeddings_numpy(inputs, model_triplet) embed_pos = get_embeddings_numpy(inputs_pos, model_triplet) label_mask = (pred_labels.numpy() == -1).all(-1) semi_hard_mask = np.isnan(inputs_neg.detach().numpy()).reshape(inputs_neg.shape[0], -1).all(-1) mask = label_mask & semi_hard_mask if i < 2: LOG.debug("mask: {}".format(mask)) negative_indexes = compute_semi_hard_indexes(embed[mask], embed_pos[mask], semi_hard_embed) inputs_neg[np.where(mask)] = semi_hard_input[negative_indexes] inputs_neg = to_cuda_if_available(inputs_neg) model_triplet.eval() with torch.no_grad(): outputs_pos = model_triplet(inputs_pos) outputs_neg = model_triplet(inputs_neg) model_triplet.train() # forward + backward + optimize outputs = model_triplet(inputs) out.append(outputs) out_pos.append(outputs_pos) out_neg.append(outputs_neg) outputs = torch.stack(out, 0) outputs_pos = torch.stack(out_pos, 0) outputs_neg = torch.stack(out_neg, 0) return outputs, outputs_pos, outputs_neg
def get_sample(self, index): """From an index, get the features and the labels to create a sample Args: index: int, Index of the sample desired Returns: tuple Tuple containing the features and the labels (numpy.array, numpy.array) """ features = self.get_feature_file_func(self.filenames.iloc[index]) # print("filenames:{}".format(self.filenames.iloc[index])) # event_labels means weak labels, event_label means strong labels if "event_labels" in self.df.columns or { "onset", "offset", "event_label" }.issubset(self.df.columns): if "event_labels" in self.df.columns: label = self.df.iloc[index]["event_labels"] if pd.isna(label): label = [] if type(label) is str: if label == "": label = [] else: label = label.split(",") else: cols = ["onset", "offset", "event_label"] label = self.df[self.df.filename == self.filenames.iloc[index]][cols] if label.empty: label = [] else: label = "empty" # trick to have -1 for unlabeled data and concat them with labeled if "filename" not in self.df.columns: raise NotImplementedError( "Dataframe to be encoded doesn't have specified columns: columns allowed: 'filename' for unlabeled;" "'filename', 'event_labels' for weak labels; 'filename' 'onset' 'offset' 'event_label' " "for strong labels, yours: {}".format(self.df.columns)) if index == 0: LOG.debug("label to encode: {}".format(label)) if self.encode_function is not None: # labels are a list of string or list of list [[label, onset, offset]] y = self.encode_function(label) else: y = label sample = features, y return sample
def extract_features_from_meta(self, csv_audio, feature_dir, subpart_data=None): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the path to the directory where the features are subpart_data: int, number of files to extract features from the csv. """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) for ind, wav_name in enumerate(df_meta.filename.unique()): if ind % 500 == 0: LOG.debug(ind) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) wav_path = os.path.join(wav_dir, wav_name) out_filename = os.path.join(feature_dir, name_only(wav_name) + ".npy") if not os.path.exists(out_filename): if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: (audio, _) = read_audio(wav_path, cfg.sample_rate) if audio.shape[0] == 0: print("File %s is corrupted!" % wav_path) else: mel_spec = self.calculate_mel_spec( audio, log_feature=self.save_log_feature) np.save(out_filename, mel_spec) LOG.debug("compute features time: %s" % (time.time() - t1)) return df_meta.reset_index(drop=True)
def __init__(self, serie_labels, classes, n_per_class, n_classes=None): super(CategoriesSampler, self).__init__(serie_labels) self.n_per_class = n_per_class self.classes = classes # self.n_batch = int(len(serie_labels) // (n_per_class * len(classes))) self.n_batch = int(serie_labels.value_counts().min() / n_per_class) self.serie_labels = serie_labels self.n_classes = n_classes LOG.debug( f"sampler has: {self.n_batch} batches of {n_per_class} samples per classes, " f"len serie: {len(serie_labels)}") self.ind_per_class = [] for label in classes: ind = np.argwhere( serie_labels.str.contains(label)).reshape(-1).tolist() if len(ind) > 0: self.ind_per_class.append(ind)
def train_loop(train_load, model): loss_bce = [] if args.segment: for cnt, indexes in enumerate(train_load.batch_sampler): optimizer.zero_grad() for j, ind in enumerate(indexes): inputs, pred_labels = train_set[ind] if cnt == 0 and epoch_ == 0: LOG.debug("classif input shape: {}".format( inputs.shape)) # zero the parameter gradients inputs, pred_labels = to_cuda_if_available( inputs, pred_labels) # forward + backward + optimize weak_out = model(inputs) loss_bce = criterion_bce( weak_out, pred_labels.argmax(0, keepdim=True)) loss_bce.backward() loss_bce.append(loss_bce.item()) optimizer.step() else: for cnt, samples in enumerate(train_load): optimizer.zero_grad() inputs, pred_labels = samples if cnt == 0 and epoch_ == 0: LOG.debug("classif input shape: {}".format(inputs.shape)) # zero the parameter gradients inputs, pred_labels = to_cuda_if_available(inputs, pred_labels) # forward + backward + optimize weak_out = model(inputs) loss_bce = criterion_bce(weak_out, pred_labels) loss_bce.backward() loss_bce.append(loss_bce.item()) optimizer.step() loss_bce = np.mean(loss_bce) print('[%d / %d, %5d] loss: %.3f' % (epoch_ + 1, n_epochs, cnt + 1, loss_bce)) return loss_bce, model
def get_predictions(model, valid_dataset, decoder, save_predictions=None): for i, (input, _) in enumerate(valid_dataset): [input] = to_cuda_if_available([input]) pred_strong, _ = model(input.unsqueeze(0)) pred_strong = pred_strong.cpu() pred_strong = pred_strong.squeeze(0).detach().numpy() if i == 0: LOG.debug(pred_strong) pred_strong = ProbabilityEncoder().binarization(pred_strong, binarization_type="global_threshold", threshold=0.5) pred_strong = scipy.ndimage.filters.median_filter(pred_strong, (cfg.median_window, 1)) pred = decoder(pred_strong) pred = pd.DataFrame(pred, columns=["event_label", "onset", "offset"]) pred["filename"] = valid_dataset.filenames.iloc[i] if i == 0: LOG.debug("predictions: \n{}".format(pred)) LOG.debug("predictions strong: \n{}".format(pred_strong)) prediction_df = pred.copy() else: prediction_df = prediction_df.append(pred) if save_predictions is not None: LOG.info("Saving predictions at: {}".format(save_predictions)) prediction_df.to_csv(save_predictions, index=False, sep="\t") return prediction_df
# ############## # Triplet dataset # ############# batch_size = cfg.batch_size num_workers = cfg.num_workers list_trans_fr = [ApplyLog(), ToTensor(), Unsqueeze(0)] if args.segment: list_trans_fr.append(Unsqueeze(0)) train_set = DataLoadDf(train_weak_df, many_hot_encoder.encode_weak, Compose(list_trans_fr), return_indexes=False) LOG.debug("len train : {}".format(len(train_set))) # train_load = DataLoader(train_set, batch_size=batch_size, num_workers=num_workers, shuffle=True, # drop_last=True, collate_fn=default_collate) # scaler = Scaler() scaler = ScalerSum() scaler.calculate_scaler(train_set) LOG.debug(scaler.mean_) list_trans_fr.append(Normalize(scaler)) train_set.set_transform(Compose(list_trans_fr)) # Validation data valid_weak_df = dfs["valid"] if valid_weak_df is not None: valid_set = DataLoadDf(valid_weak_df, many_hot_encoder.encode_weak,
many_hot_encoder.encode_weak, Compose(list_trans_fr), return_indexes=False) if args.balance: train_sampler = CategoriesSampler(train_set.df.event_labels, classes, round(cfg.batch_size / len(classes))) train_load = DataLoader(train_set, num_workers=num_workers, batch_sampler=train_sampler) else: train_load = DataLoader(train_set, num_workers=num_workers, batch_size=batch_size, shuffle=True) train_sampler = train_load.batch_sampler LOG.debug("len train : {}".format(len(train_set))) scaler = ScalerSum() scaler.calculate_scaler(train_set) LOG.debug(scaler.mean_) list_trans_fr.append(Normalize(scaler)) train_set.set_transform(Compose(list_trans_fr)) # Validation data valid_weak_df = dfs["valid"] if valid_weak_df is not None: valid_set = DataLoadDf(valid_weak_df, many_hot_encoder.encode_weak, Compose(list_trans_fr), return_indexes=False) if args.balance: val_sampler = CategoriesSampler(
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None, valid_loader=None, state={}, dir_model="model", result_path="res", recompute=True): criterion_bce = nn.BCELoss() classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce) print(classif_model) early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup", init_patience=cfg.first_early_wait) save_best_call = SaveBest(val_comp="sup") # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr, # verbose=True) print(optimizer_classif) save_results = pd.DataFrame() create_folder(dir_model) if cfg.save_best: model_path_sup1 = os.path.join(dir_model, "best_model") else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) print("path of model : " + model_path_sup1) state['many_hot_encoder'] = many_hot_encoder.state_dict() if not os.path.exists(model_path_sup1) or recompute: for epoch_ in range(cfg.n_epoch_classifier): print(classif_model.training) start = time.time() loss_mean_bce = [] for i, samples in enumerate(train_loader): inputs, pred_labels = samples if i == 0: LOG.debug("classif input shape: {}".format(inputs.shape)) # zero the parameter gradients optimizer_classif.zero_grad() inputs = to_cuda_if_available(inputs) # forward + backward + optimize weak_out = classif_model(inputs) weak_out = to_cpu(weak_out) # print(output) loss_bce = criterion_bce(weak_out, pred_labels) loss_mean_bce.append(loss_bce.item()) loss_bce.backward() optimizer_classif.step() loss_mean_bce = np.mean(loss_mean_bce) classif_model.eval() n_class = len(many_hot_encoder.labels) macro_f_measure_train = get_f_measure_by_class(classif_model, n_class, train_loader) if valid_loader is not None: macro_f_measure = get_f_measure_by_class(classif_model, n_class, valid_loader) mean_macro_f_measure = np.mean(macro_f_measure) else: mean_macro_f_measure = -1 classif_model.train() print("Time to train an epoch: {}".format(time.time() - start)) # print statistics print('[%d / %d, %5d] loss: %.3f' % (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce)) results = {"train_loss": loss_mean_bce, "macro_measure_train": np.mean(macro_f_measure_train), "class_macro_train": np.array_str(macro_f_measure_train, precision=2), "macro_measure_valid": mean_macro_f_measure, "class_macro_valid": np.array_str(macro_f_measure, precision=2), } for key in results: LOG.info("\t\t ----> {} : {}".format(key, results[key])) save_results = save_results.append(results, ignore_index=True) # scheduler.step(mean_macro_f_measure) # ########## # # Callbacks # ########## state['epoch'] = epoch_ + 1 state["model"]["state_dict"] = classif_model.state_dict() state["optimizer"]["state_dict"] = optimizer_classif.state_dict() state["loss"] = loss_mean_bce state.update(results) if cfg.early_stopping is not None: if early_stopping_call.apply(mean_macro_f_measure): print("EARLY STOPPING") break if cfg.save_best and save_best_call.apply(mean_macro_f_measure): save_model(state, model_path_sup1) if cfg.save_best: LOG.info( "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val)) LOG.info("loading model from: {}".format(model_path_sup1)) classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) save_model(state, model_path_sup1) LOG.debug("model path: {}".format(model_path_sup1)) LOG.debug('Finished Training') else: classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) LOG.info("#### End classif") save_results.to_csv(result_path, sep="\t", header=True, index=False) return classif_model, state
def extract_features_from_meta_frames(self, csv_audio, feature_dir, frames_in_sec, subpart_data=None): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the directory where the features are or will be created subpart_data: int, number of files to extract features from the csv. frames_in_sec: int, number of frames to take for a subsegment. """ frames = int(frames_in_sec * cfg.sample_rate / cfg.hop_length) t1 = time.time() df_meta = pd.read_csv(csv_audio, header=0, sep="\t") LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) # Csv to store the features ext_name = "_" + str(frames) if subpart_data is not None and subpart_data < len( df_meta.filename.unique()): ext_name += "_sub" + str(subpart_data) df_meta = self.get_subpart_data(df_meta, subpart_data) self.get_classes(df_meta) meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1]) csv_features = os.path.join(self.metadata_dir, meta_base + ext_name + meta_ext) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) df_features = pd.DataFrame() path_exists = os.path.exists(csv_features) if not path_exists: LOG.debug("Creating new feature df") # Loop in all the filenames cnt_new_features = 0 for ind, wav_name in enumerate(df_meta.filename.unique()): wav_path = os.path.join(wav_dir, wav_name) if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted, deleting...!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: try: audio_len_sec = soundfile.info(wav_path).duration except Exception as e: print("File %s is corrupted, not added to df!" % wav_path) print(e) continue if audio_len_sec == 0: print("File %s is corrupted, not added to df!" % wav_path) else: files_exist = True # How many features we can compute from this file ? cnt_max = min( int(audio_len_sec // frames_in_sec), int(cfg.max_len_seconds // frames_in_sec)) if cnt_max == 0: cnt_max = 1 base_wav_name = os.path.join(feature_dir, name_only(wav_name)) # Check if files already exist out_filenames = [ base_wav_name + "fr" + str(frames) + "_" + str(cnt * frames) + "-" + str( (cnt + 1) * frames) + ".npy" for cnt in range(cnt_max) ] for fname in out_filenames: if not os.path.exists(fname): files_exist = False break if not files_exist: if cnt_new_features % 500 == 0: LOG.debug(f"new features, {cnt_new_features}") cnt_new_features += 1 audio, cnt_max = self.get_features( wav_path, feature_dir, frames) out_filenames = [ base_wav_name + "fr" + str(frames) + "_" + str(cnt * frames) + "-" + str( (cnt + 1) * frames) + ".npy" for cnt in range(cnt_max) ] # features label to add to the dataframe add_item = self.get_labels(ind, df_meta, wav_name, frames, out_filenames) df_features = df_features.append( pd.DataFrame(add_item), ignore_index=True) LOG.info(csv_features) df_features.to_csv(csv_features, sep="\t", header=True, index=False) df_features = pd.read_csv( csv_features, sep="\t") # Otherwise event_labels is "" and not NaN else: df_features = self.get_df_from_meta( csv_features) # No subpart data because should be in the name LOG.debug("compute features time: %s" % (time.time() - t1)) return df_features
def train(train_loader, model, optimizer, epoch, ema_model=None, weak_mask=None, strong_mask=None): """ One epoch of a Mean Teacher model :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch. Should return 3 values: teacher input, student input, labels :param model: torch.Module, model to be trained, should return a weak and strong prediction :param optimizer: torch.Module, optimizer used to train the model :param epoch: int, the current epoch of training :param ema_model: torch.Module, student model, should return a weak and strong prediction :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss) :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss) """ class_criterion = nn.BCELoss() ################################################## class_criterion1 = nn.BCELoss(reduction='none') ################################################## consistency_criterion = nn.MSELoss() # [class_criterion, consistency_criterion] = to_cuda_if_available( # [class_criterion, consistency_criterion]) [class_criterion, class_criterion1, consistency_criterion] = to_cuda_if_available( [class_criterion, class_criterion1, consistency_criterion]) meters = AverageMeterSet() LOG.debug("Nb batches: {}".format(len(train_loader))) start = time.time() rampup_length = len(train_loader) * cfg.n_epoch // 2 print("Train\n") # LOG.info("Weak[k] -> Weak[k]") # LOG.info("Weak[k] -> strong[k]") # print(weak_mask.start) # print(strong_mask.start) # exit() count = 0 check_cus_weak = 0 difficulty_loss = 0 loss_w = 1 LOG.info("loss paramater:{}".format(loss_w)) for i, (batch_input, ema_batch_input, target) in enumerate(train_loader): # print(batch_input.shape) # print(ema_batch_input.shape) # exit() global_step = epoch * len(train_loader) + i if global_step < rampup_length: rampup_value = ramps.sigmoid_rampup(global_step, rampup_length) else: rampup_value = 1.0 # Todo check if this improves the performance # adjust_learning_rate(optimizer, rampup_value, rampdown_value) meters.update('lr', optimizer.param_groups[0]['lr']) [batch_input, ema_batch_input, target] = to_cuda_if_available([batch_input, ema_batch_input, target]) LOG.debug("batch_input:{}".format(batch_input.mean())) # print(batch_input) # exit() # Outputs ################################################## # strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input) strong_pred_ema, weak_pred_ema, sof_ema = ema_model(ema_batch_input) sof_ema = sof_ema.detach() ################################################## strong_pred_ema = strong_pred_ema.detach() weak_pred_ema = weak_pred_ema.detach() ################################################## # strong_pred, weak_pred = model(batch_input) strong_pred, weak_pred, sof = model(batch_input) ################################################## ################################################## # custom_ema_loss = Custom_BCE_Loss(ema_batch_input, class_criterion1) if difficulty_loss == 0: LOG.info("############### Deffine Difficulty Loss ###############") difficulty_loss = 1 custom_ema_loss = Custom_BCE_Loss_difficulty(ema_batch_input, class_criterion1, paramater=loss_w) custom_ema_loss.initialize(strong_pred_ema, sof_ema) # custom_loss = Custom_BCE_Loss(batch_input, class_criterion1) custom_loss = Custom_BCE_Loss_difficulty(batch_input, class_criterion1, paramater=loss_w) custom_loss.initialize(strong_pred, sof) ################################################## # print(strong_pred.shape) # print(strong_pred) # print(weak_pred.shape) # print(weak_pred) # exit() loss = None # Weak BCE Loss # Take the max in the time axis # torch.set_printoptions(threshold=10000) # print(target[-10]) # # print(target.max(-2)) # # print(target.max(-2)[0]) # print(target.max(-1)[0][-10]) # exit() target_weak = target.max(-2)[0] if weak_mask is not None: weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask]) ema_class_loss = class_criterion(weak_pred_ema[weak_mask], target_weak[weak_mask]) print( "noraml_weak:", class_criterion(weak_pred[weak_mask], target_weak[weak_mask])) ################################################## custom_weak_class_loss = custom_loss.weak(target_weak, weak_mask) custom_ema_class_loss = custom_ema_loss.weak( target_weak, weak_mask) print("custom_weak:", custom_weak_class_loss) ################################################## count += 1 check_cus_weak += custom_weak_class_loss # print(custom_weak_class_loss.item()) if i == 0: LOG.debug("target: {}".format(target.mean(-2))) LOG.debug("Target_weak: {}".format(target_weak)) LOG.debug("Target_weak mask: {}".format( target_weak[weak_mask])) LOG.debug(custom_weak_class_loss) ### LOG.debug("rampup_value: {}".format(rampup_value)) meters.update('weak_class_loss', custom_weak_class_loss.item()) ### meters.update('Weak EMA loss', custom_ema_class_loss.item()) ### # loss = weak_class_loss loss = custom_weak_class_loss #################################################################################### # weak_class_loss = class_criterion(strong_pred[weak_mask], target[weak_mask]) # ema_class_loss = class_criterion(strong_pred_ema[weak_mask], target[weak_mask]) # # if i == 0: # # LOG.debug("target: {}".format(target.mean(-2))) # # LOG.debug("Target_weak: {}".format(target)) # # LOG.debug("Target_weak mask: {}".format(target[weak_mask])) # # LOG.debug(weak_class_loss) # # LOG.debug("rampup_value: {}".format(rampup_value)) # meters.update('weak_class_loss', weak_class_loss.item()) # meters.update('Weak EMA loss', ema_class_loss.item()) # loss = weak_class_loss #################################################################################### # Strong BCE loss if strong_mask is not None: strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask]) # meters.update('Strong loss', strong_class_loss.item()) strong_ema_class_loss = class_criterion( strong_pred_ema[strong_mask], target[strong_mask]) # meters.update('Strong EMA loss', strong_ema_class_loss.item()) print( "normal_strong:", class_criterion(strong_pred[strong_mask], target[strong_mask])) ################################################## custom_strong_class_loss = custom_loss.strong(target, strong_mask) meters.update('Strong loss', custom_strong_class_loss.item()) custom_strong_ema_class_loss = custom_ema_loss.strong( target, strong_mask) meters.update('Strong EMA loss', custom_strong_ema_class_loss.item()) print("custom_strong:", custom_strong_class_loss) ################################################## if loss is not None: # loss += strong_class_loss loss += custom_strong_class_loss else: # loss = strong_class_loss loss = custom_strong_class_loss # print("check_weak:", class_criterion1(weak_pred[weak_mask], target_weak[weak_mask]).mean()) # print("check_strong:", class_criterion1(strong_pred[strong_mask], target[strong_mask]).mean()) # print("\n") # exit() # Teacher-student consistency cost if ema_model is not None: consistency_cost = cfg.max_consistency_cost * rampup_value meters.update('Consistency weight', consistency_cost) # Take consistency about strong predictions (all data) consistency_loss_strong = consistency_cost * consistency_criterion( strong_pred, strong_pred_ema) meters.update('Consistency strong', consistency_loss_strong.item()) if loss is not None: loss += consistency_loss_strong else: loss = consistency_loss_strong meters.update('Consistency weight', consistency_cost) # Take consistency about weak predictions (all data) consistency_loss_weak = consistency_cost * consistency_criterion( weak_pred, weak_pred_ema) meters.update('Consistency weak', consistency_loss_weak.item()) if loss is not None: loss += consistency_loss_weak else: loss = consistency_loss_weak assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format( loss.item()) assert not loss.item() < 0, 'Loss problem, cannot be negative' meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if ema_model is not None: update_ema_variables(model, ema_model, 0.999, global_step) epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters)) print("\ncheck_cus_weak:\n", check_cus_weak / count)
classes = DatasetDcase2019Task4.get_classes( [weak_df, validation_df, synthetic_df]) # Be careful, frames is max_frames // pooling_time_ratio because max_pooling is applied on time axis in the model many_hot_encoder = ManyHotEncoder(classes, n_frames=cfg.max_frames // pooling_time_ratio) transforms = get_transforms(cfg.max_frames) # Divide weak in train and valid train_weak_df = weak_df.sample(frac=0.8, random_state=26) valid_weak_df = weak_df.drop(train_weak_df.index).reset_index(drop=True) train_weak_df = train_weak_df.reset_index(drop=True) LOG.debug(valid_weak_df.event_labels.value_counts()) train_weak_data = DataLoadDf(train_weak_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms) # Divide synthetic in train and valid filenames_train = synthetic_df.filename.drop_duplicates().sample( frac=0.8, random_state=26) train_synth_df = synthetic_df[synthetic_df.filename.isin(filenames_train)] valid_synth_df = synthetic_df.drop( train_synth_df.index).reset_index(drop=True) # Put train_synth in frames so many_hot_encoder can work. # Not doing it for valid, because not using labels (when prediction) and event based metric expect sec. train_synth_df_frames = train_synth_df.copy()
def extract_features_from_meta_segment(self, csv_audio, feature_dir, subpart_data=None, fixed_segment=None): """Extract log mel spectrogram features, but the csv needs to be strongly labeled. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the path of the features directory. subpart_data: int, number of files to extract features from the csv. fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept. If segment is True, and >label, it takes the surrounding (allow creating weak labels). """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) self.get_classes(df_meta) LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) ext_name = "_segment_" if subpart_data: ext_name += str(subpart_data) if fixed_segment is not None: LOG.debug( f" durations before: " f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}" ) ext_name += f"fix{fixed_segment}" df_meta = self.trunc_pad_segment(df_meta, fixed_segment) LOG.debug( f" durations after: " f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}" ) meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1]) csv_features = os.path.join(self.metadata_dir, meta_base + ext_name + meta_ext) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) df_features = pd.DataFrame() path_exists = os.path.exists(csv_features) if not path_exists: # Loop in all the filenames for ind, wav_name in enumerate(df_meta.filename.unique()): if ind % 500 == 0: LOG.debug(ind) wav_path = os.path.join(wav_dir, wav_name) if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted, deleting...!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: try: audio_len_sec = soundfile.info(wav_path).duration except Exception as e: print("File %s is corrupted, not added to df!" % wav_path) print(e) continue if audio_len_sec == 0: print("File %s is corrupted, not added to df!" % wav_path) else: files_exist = True # How many features we can compute from this file ? sub_df = df_meta[df_meta.filename == wav_name] cnt_max = len(sub_df) if cnt_max == 0: break base_wav_name = name_only(wav_name) ext_featname = "_seg" if fixed_segment: ext_featname += f"fix{fixed_segment}" files_exist = False # We should always recompute because of the randomness of onset offset # Check if files already exist out_filenames = [ base_wav_name + ext_featname + str(cnt) + ".npy" for cnt in range(cnt_max) ] for fname in out_filenames: fpath = os.path.join(feature_dir, fname) if not os.path.exists(fpath): files_exist = False break add_item = { "raw_filename": [], "filename": [], "event_labels": [] } for ii, (i, row) in enumerate(sub_df.iterrows()): if not pd.isna(row.event_label): if ii > 0: extnb = str(ii) else: extnb = "" out_filename = os.path.join( feature_dir, name_only(wav_name)) out_filename += ext_featname + extnb + ".npy" if not files_exist: sr = soundfile.info(wav_path).samplerate (audio, _) = read_audio(wav_path, cfg.sample_rate, start=int(row.onset * sr), stop=int(row.offset * sr)) mel_spec = self.calculate_mel_spec( audio, log_feature=self.save_log_feature) if fixed_segment: pad_trunc_length = int( fixed_segment * cfg.sample_rate // cfg.hop_length) mel_spec = pad_trunc_seq( mel_spec, pad_trunc_length) np.save(out_filename, mel_spec) add_item["raw_filename"].append(wav_name) add_item["filename"].append(out_filename) add_item["event_labels"].append( row["event_label"]) df_features = df_features.append( pd.DataFrame(add_item), ignore_index=True) df_features.to_csv(csv_features, sep="\t", header=True, index=False) df_features = pd.read_csv( csv_features, sep="\t") # Otherwise event_labels is "" and not NaN else: df_features = self.get_df_from_meta( csv_features) # No subpart data because should be in the name LOG.debug("compute features time: %s" % (time.time() - t1)) return df_features
def extract_features_from_meta(self, csv_audio, subpart_data=None, training=False): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav subpart_data: int, number of files to extract features from the csv. """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) df_all = list() feature_fns = list() LOG.info('Extracting/loading features') LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) augmentation_funcs = [ ('orig', None), # original signal ] if training: augmentation_funcs += [ # ('lpf4k', partial(lpf, wc=4000, fs=cfg.sample_rate)), # ('lpf8k', partial(lpf, wc=8000, fs=cfg.sample_rate)), # ('lpf16k', partial(lpf, wc=16000, fs=cfg.sample_rate)), # ('ps-6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-6)), # ('ps-3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-3)), # ('ps+3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=3)), # ('ps+6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=6)), # ('ts1.25', partial(time_stretch, rate=1.25)), # ('ts1.5', partial(time_stretch, rate=1.5)), # ('amp0.5', partial(amplitude_scale, coeff=0.5)), # ('amp0.75', partial(amplitude_scale, coeff=0.75)), # ('hp0.25', partial(hp_reweight, lam=0.25)), # ('hp0.75', partial(hp_reweight, lam=0.75)) ] wav_fns = df_meta.filename.unique() flag = False for ind, wav_name in tqdm(enumerate(wav_fns), total=len(wav_fns)): if ind % 500 == 0: LOG.debug(ind) # verify the audio file is present wav_dir = self.get_audio_dir_path_from_meta(csv_audio) wav_path = os.path.join(wav_dir, wav_name) if os.path.isfile(wav_path): # defer loading audio until the need for feature extraction is verified audio = None # perform all augmentations (including no augmentation) for name, func in augmentation_funcs: if name == 'orig': out_filename = os.path.splitext(wav_name)[0] + ".npy" else: out_filename = os.path.splitext( wav_name)[0] + '_' + name + ".npy" out_path = os.path.join(self.feature_dir, out_filename) # add the metadata meta = df_meta.loc[df_meta.filename == wav_name] df_all.append(meta) # for synthetic data with time annotation of events, the meta df will have several entries for # each wav file. therefore, we need to append the feature filename len(meta) times. if len(meta) > 1: feature_fns += [out_filename] * len(meta) if flag: print('Length of meta: {}'.format(len(meta))) flag = False else: feature_fns.append(out_filename) if not os.path.exists(out_path): if audio is None: (audio, _) = read_audio(wav_path, cfg.sample_rate) if audio.shape[0] == 0: print("File %s is corrupted!" % wav_path) del feature_fns[-1] del df_all[-1] # perform any augmentation, extract features, save features # LOG.info('extracting {}'.format(out_filename)) if func is not None: mel_spec = self.calculate_mel_spec(func(audio)) else: mel_spec = self.calculate_mel_spec(audio) np.save(out_path, mel_spec) LOG.debug("compute features time: %s" % (time.time() - t1)) else: LOG.error( "File %s is in the csv file but the feature is not extracted!" % wav_path) # df_meta = df_meta.drop(df_meta[df_meta.filename == wav_name].index) # form the final DataFrame of meta data for features from original and augmented audio df_all = pd.concat(df_all).reset_index(drop=True) df_all['feature_filename'] = feature_fns return df_all
train_weak_df_fr = dfs["train"] train_weak_dl_fr = DataLoadDf(train_weak_df_fr, encode_function_label, transform=Compose(trans_fr)) if type_positive != "label" or type_negative != "label": unlabel_df_fr = dataset.get_df_feat_dir(cfg.unlabel, subpart_data=subpart_data, frames_in_sec=frames_in_sec) unlabel_dl_fr = DataLoadDf(unlabel_df_fr, encode_function_label, transform=Compose(trans_fr)) datasets_mean = [train_weak_dl_fr, unlabel_dl_fr] else: datasets_mean = [train_weak_dl_fr] # Normalize if resume_training is None: scaler = ScalerSum() scaler.calculate_scaler(ConcatDataset(datasets_mean)) else: scaler = ScalerSum.load_state_dict(state["scaler"]) LOG.debug(scaler.mean_) trans_fr_scale = trans_fr + [Normalize(scaler)] if segment: trans_fr_scale.append(Unsqueeze(0)) for dl in datasets_mean: dl.set_transform(Compose(trans_fr_scale)) print(dl.transform) concat_frames = ConcatDataset(datasets_mean) trans_fr_sc_embed = deepcopy(trans_fr_scale) if not segment: trans_fr_sc_embed.append(Unsqueeze(0)) train_weak_embed = DataLoadDf(train_weak_df_fr, encode_function_label,
def train(cfg, train_loader, model, optimizer, epoch, ema_model=None, weak_mask=None, strong_mask=None): """ One epoch of a Mean Teacher model :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch. Should return 3 values: teacher input, student input, labels :param model: torch.Module, model to be trained, should return a weak and strong prediction :param optimizer: torch.Module, optimizer used to train the model :param epoch: int, the current epoch of training :param ema_model: torch.Module, student model, should return a weak and strong prediction :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss) :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss) """ class_criterion = nn.BCELoss() consistency_criterion_strong = nn.MSELoss() lds_criterion = LDSLoss(xi=cfg.vat_xi, eps=cfg.vat_eps, n_power_iter=cfg.vat_n_power_iter) [class_criterion, consistency_criterion_strong, lds_criterion] = to_cuda_if_available( [class_criterion, consistency_criterion_strong, lds_criterion]) meters = AverageMeterSet() LOG.debug("Nb batches: {}".format(len(train_loader))) start = time.time() rampup_length = len(train_loader) * cfg.n_epoch // 2 for i, (batch_input, ema_batch_input, target) in enumerate(train_loader): global_step = epoch * len(train_loader) + i if global_step < rampup_length: rampup_value = ramps.sigmoid_rampup(global_step, rampup_length) else: rampup_value = 1.0 # Todo check if this improves the performance # adjust_learning_rate(optimizer, rampup_value, rampdown_value) meters.update('lr', optimizer.param_groups[0]['lr']) [batch_input, ema_batch_input, target] = to_cuda_if_available([batch_input, ema_batch_input, target]) LOG.debug(batch_input.mean()) # Outputs strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input) strong_pred_ema = strong_pred_ema.detach() weak_pred_ema = weak_pred_ema.detach() strong_pred, weak_pred = model(batch_input) loss = None # Weak BCE Loss # Take the max in axis 2 (assumed to be time) if len(target.shape) > 2: target_weak = target.max(-2)[0] else: target_weak = target if weak_mask is not None: weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask]) ema_class_loss = class_criterion(weak_pred_ema[weak_mask], target_weak[weak_mask]) if i == 0: LOG.debug("target: {}".format(target.mean(-2))) LOG.debug("Target_weak: {}".format(target_weak)) LOG.debug("Target_weak mask: {}".format( target_weak[weak_mask])) LOG.debug(weak_class_loss) LOG.debug("rampup_value: {}".format(rampup_value)) meters.update('weak_class_loss', weak_class_loss.item()) meters.update('Weak EMA loss', ema_class_loss.item()) loss = weak_class_loss # Strong BCE loss if strong_mask is not None: strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask]) meters.update('Strong loss', strong_class_loss.item()) strong_ema_class_loss = class_criterion( strong_pred_ema[strong_mask], target[strong_mask]) meters.update('Strong EMA loss', strong_ema_class_loss.item()) if loss is not None: loss += strong_class_loss else: loss = strong_class_loss # Teacher-student consistency cost if ema_model is not None: consistency_cost = cfg.max_consistency_cost * rampup_value meters.update('Consistency weight', consistency_cost) # Take only the consistence with weak and unlabel consistency_loss_strong = consistency_cost * consistency_criterion_strong( strong_pred, strong_pred_ema) meters.update('Consistency strong', consistency_loss_strong.item()) if loss is not None: loss += consistency_loss_strong else: loss = consistency_loss_strong meters.update('Consistency weight', consistency_cost) # Take only the consistence with weak and unlabel consistency_loss_weak = consistency_cost * consistency_criterion_strong( weak_pred, weak_pred_ema) meters.update('Consistency weak', consistency_loss_weak.item()) if loss is not None: loss += consistency_loss_weak else: loss = consistency_loss_weak # LDS loss if cfg.vat_enabled: lds_loss = cfg.vat_coeff * lds_criterion(model, batch_input, weak_pred) LOG.info('loss: {:.3f}, lds loss: {:.3f}'.format( loss, cfg.vat_coeff * lds_loss.detach().cpu().numpy())) loss += lds_loss else: if i % 25 == 0: LOG.info('loss: {:.3f}'.format(loss)) assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format( loss.item()) assert not loss.item() < 0, 'Loss problem, cannot be negative' meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if ema_model is not None: update_ema_variables(model, ema_model, 0.999, global_step) epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters))
def download_file(result_dir, filename): """ download a file from youtube given an audioSet filename. (It takes only a part of the file thanks to information provided in the filename) Parameters ---------- result_dir : str, result directory which will contain the downloaded file filename : str, AudioSet filename to download Return ------ list : list, Empty list if the file is downloaded, otherwise contains the filename and the error associated """ LOG.debug(filename) tmp_filename = "" query_id = filename[1:12] segment_start = filename[13:-4].split('_')[0] segment_end = filename[13:-4].split('_')[1] audio_container = AudioContainer() # Define download parameters ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': TMP_FOLDER+'%(id)s.%(ext)s', 'noplaylist': True, 'quiet': True, 'prefer_ffmpeg': True, 'logger': MyLogger(), 'audioformat': 'wav' } try: # Download file with youtube_dl.YoutubeDL(ydl_opts) as ydl: meta = ydl.extract_info( 'https://www.youtube.com/watch?v={query_id}'.format(query_id=query_id), download=True) audio_formats = [f for f in meta["formats"] if f.get('vcodec') == 'none'] if audio_formats is []: return [filename, "no audio format available"] # get the best audio format best_audio_format = audio_formats[-1] tmp_filename = TMP_FOLDER + query_id + "." + best_audio_format["ext"] audio_container.load(filename=tmp_filename, fs=44100, res_type='kaiser_best', start=float(segment_start), stop=float(segment_end)) # Save segmented audio audio_container.filename = filename audio_container.detect_file_format() audio_container.save(filename=os.path.join(result_dir, filename)) #Remove temporary file os.remove(tmp_filename) return [] except (KeyboardInterrupt, SystemExit): # Remove temporary files and current audio file. for fpath in glob.glob(TMP_FOLDER + query_id + "*"): os.remove(fpath) raise # youtube-dl error, file often removed except (ExtractorError, DownloadError, OSError) as e: if os.path.exists(tmp_filename): os.remove(tmp_filename) return [filename, str(e)] # multiprocessing can give this error except IndexError as e: if os.path.exists(tmp_filename): os.remove(tmp_filename) LOG.info(filename) LOG.info(str(e)) return [filename, "Index Error"]
def get_dfs(dataset, weak_path, test_path, eval_path=None, subpart_data=None, valid_list=None, frames_in_sec=None, segment=False, dropna=True, unique_fr=False, fixed_segment=False): weak_df_fr = dataset.get_df_feat_dir(weak_path, subpart_data=subpart_data, segment=segment, frames_in_sec=frames_in_sec, fixed_segment=fixed_segment) if unique_fr: if segment: raise NotImplementedError("cannot use unique fr with segment") def take_mid_fr(x): if len(x) > 2: x = x.iloc[1:-1] return x.sample(n=1) l_keep = weak_df_fr.groupby("raw_filename").apply( take_mid_fr).filename.tolist() weak_df_fr = weak_df_fr[weak_df_fr.filename.isin(l_keep)].reset_index( drop=True) if dropna: weak_df_fr = weak_df_fr.dropna().reset_index(drop=True) print("DROP NANS") valid_weak_df_fr = weak_df_fr[weak_df_fr.raw_filename.isin(valid_list)] train_weak_df_fr = weak_df_fr.drop( valid_weak_df_fr.index).reset_index(drop=True) valid_weak_df_fr = valid_weak_df_fr.reset_index(drop=True) valid_weak_df_fr = valid_weak_df_fr.dropna().reset_index(drop=True) valid_weak_df_fr = valid_weak_df_fr[~valid_weak_df_fr.event_labels. fillna("").str.contains(",")] valid_weak_df_fr = valid_weak_df_fr.reset_index(drop=True) LOG.debug("len weak df frames : {}".format(len(weak_df_fr))) LOG.debug("len train weak df frames : {}".format(len(train_weak_df_fr))) LOG.debug("len valid weak df frames : {}".format(len(valid_weak_df_fr))) # Todo, remove hard coded stuff test_df_fr = dataset.get_df_feat_dir(test_path, subpart_data=subpart_data, segment=segment, frames_in_sec=frames_in_sec, fixed_segment=0.2) test_df_fr = test_df_fr.dropna().reset_index(drop=True) test_df_1 = dataset.get_df_feat_dir(test_path, subpart_data=subpart_data, segment=segment, frames_in_sec=frames_in_sec, fixed_segment=1) test_df_1 = test_df_1.dropna().reset_index(drop=True) test_df_10 = dataset.get_df_feat_dir(test_path, subpart_data=subpart_data, segment=segment, frames_in_sec=frames_in_sec, fixed_segment=10) test_df_10 = test_df_10.dropna().reset_index(drop=True) print("drop test nans") if eval_path is not None: eval_df_fr = dataset.get_df_feat_dir(eval_path, subpart_data=subpart_data, segment=segment, frames_in_sec=frames_in_sec, fixed_segment=fixed_segment) else: eval_df_fr = None dfs = { "train": train_weak_df_fr, "valid": valid_weak_df_fr, "test": test_df_fr, "test1": test_df_1, "test10": test_df_10, "eval": eval_df_fr } return dfs