def get_sample(self, index): """From an index, get the features and the labels to create a sample Args: index: int, Index of the sample desired Returns: tuple Tuple containing the features and the labels (numpy.array, numpy.array) """ features = self.get_feature_file_func(self.filenames.iloc[index]) # event_labels means weak labels, event_label means strong labels if "label" in self.df.columns: label = self.df.iloc[index]["label"] if pd.isna(label): label = [] else: label = "empty" # trick to have -1 for unlabeled data and concat them with labeled if "filename" not in self.df.columns: raise NotImplementedError( "Dataframe to be encoded doesn't have specified columns: columns allowed: 'filename' for unlabeled;" "'filename', 'event_labels' for weak labels; 'filename' 'onset' 'offset' 'event_label' " "for strong labels, yours: {}".format(self.df.columns)) if index == 0: LOG.debug("label to encode: {}".format(label)) if self.encode_function is not None: # labels are a list of string or list of list [[label, onset, offset]] y = self.encode_function([label]) else: y = label sample = features, y return sample
def calculate_embedding(embedding_dl, model, savedir=None, concatenate=None, squeeze=True): # If frames, assume the savedir name or the filename is different than when it is not defined model.eval() if savedir is not None: create_folder(savedir) df = embedding_dl.df.copy() df.filename = df.filename.apply(lambda x: os.path.join(savedir, os.path.basename(x))) if savedir is not None: df.to_csv(os.path.join(savedir, "df"), sep="\t", index=False) if concatenate is not None: concat_embed = [] for cnt, (data_in, y) in enumerate(embedding_dl): data_in = to_cuda_if_available(data_in) emb = get_embeddings_numpy(data_in, model, flatten=False) if cnt == 0: LOG.debug(f"shapes: input: {data_in.shape}, embed: {emb.shape}, dir: {savedir}") if squeeze: emb = np.squeeze(emb) if savedir is not None: np.save(df.iloc[cnt].filename, emb) if concatenate == "append": concat_embed.append(emb) elif concatenate == "extend": concat_embed.extend(emb) else: if concatenate is not None: raise NotImplementedError("Impossible to aggregate with this value") model.train() if concatenate is not None: concat_embed = np.array(concat_embed) return df, concat_embed return df
def get_subpart_data(df, subpart_data): column = "filename" if not subpart_data > len(df[column].unique()): filenames = df[column].drop_duplicates().sample(subpart_data, random_state=10) df = df[df[column].isin(filenames)].reset_index(drop=True) LOG.debug("Taking subpart of the data, len : {}, df_len: {}".format(subpart_data, len(df))) return df
def loop_batches(samples, model_triplet, semi_hard_input=None, semi_hard_embed=None, i=0): inputs, inputs_pos, inputs_neg, pred_labels = samples inputs, inputs_pos = to_cuda_if_available(inputs, inputs_pos) if i < 2: LOG.debug("input shape: {}".format(inputs.shape)) if semi_hard_input is not None or semi_hard_embed is not None: assert semi_hard_input is not None, "semi_hard_input and semi_hard_embed should be defined" assert semi_hard_embed is not None, "semi_hard_input and semi_hard_embed should be defined" model_triplet.eval() embed = get_embeddings_numpy(inputs, model_triplet) embed_pos = get_embeddings_numpy(inputs_pos, model_triplet) label_mask = (pred_labels.numpy() == -1).all(-1) semi_hard_mask = np.isnan(inputs_neg.detach().numpy()).reshape(inputs_neg.shape[0], -1).all(-1) mask = label_mask & semi_hard_mask if i < 2: LOG.debug("mask: {}".format(mask)) negative_indexes = compute_semi_hard_indexes(embed[mask], embed_pos[mask], semi_hard_embed) inputs_neg[np.where(mask)] = semi_hard_input[negative_indexes] inputs_neg = to_cuda_if_available(inputs_neg) model_triplet.eval() with torch.no_grad(): outputs_pos = model_triplet(inputs_pos) outputs_neg = model_triplet(inputs_neg) model_triplet.train() # forward + backward + optimize outputs = model_triplet(inputs) return outputs, outputs_pos, outputs_neg
def means(self, dataset): """ Splits a dataset in to train test validation. :param dataset: dataset, from DataLoad class, each sample is an (X, y) tuple. """ LOG.info('computing mean') start = time.time() sum_ = 0 sum_square = 0 n = 0 n_sq = 0 for sample in dataset: if type(sample) in [tuple, list] and len(sample) == 2: batch_x, _ = sample else: batch_x = sample if type(batch_x) is torch.Tensor: batch_x_arr = batch_x.numpy() else: batch_x_arr = batch_x su, nn = self.sum(batch_x_arr, axis=-1) sum_ += su n += nn su_sq, nn_sq = self.sum(batch_x_arr ** 2, axis=-1) sum_square += su_sq n_sq += nn_sq self.mean_ = sum_ / n self.mean_of_square_ = sum_square / n_sq LOG.debug('time to compute means: ' + str(time.time() - start)) return self
def means(self, dataset): """ Splits a dataset in to train test validation. :param dataset: dataset, from DataLoad class, each sample is an (X, y) tuple. """ LOG.info('computing mean') start = time.time() shape = None counter = 0 for sample in dataset: if type(sample) in [tuple, list] and len(sample)==2: batch_X, _ = sample else: batch_X = sample if type(batch_X) is torch.Tensor: batch_X_arr = batch_X.numpy() else: batch_X_arr = batch_X data_square = batch_X_arr ** 2 counter += 1 if shape is None: shape = batch_X_arr.shape else: if not batch_X_arr.shape == shape: raise NotImplementedError("Not possible to add data with different shape in mean calculation yet") # assume first item will have shape info if self.mean_ is None: self.mean_ = self.mean(batch_X_arr, axis=-1) else: self.mean_ += self.mean(batch_X_arr, axis=-1) if self.mean_of_square_ is None: self.mean_of_square_ = self.mean(data_square, axis=-1) else: self.mean_of_square_ += self.mean(data_square, axis=-1) self.mean_ /= counter self.mean_of_square_ /= counter ## To be used if data different shape, but need to stop the iteration before. # rest = len(dataset) - i # if rest != 0: # weight = rest / float(i + rest) # X, y = dataset[-1] # data_square = X ** 2 # mean = mean * (1 - weight) + self.mean(X, axis=-1) * weight # mean_of_square = mean_of_square * (1 - weight) + self.mean(data_square, axis=-1) * weight LOG.debug('time to compute means: ' + str(time.time() - start)) return self
def compute_strong_metrics(predictions, valid_df, pooling_time_ratio): # In seconds predictions.onset = predictions.onset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length) predictions.offset = predictions.offset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length) metric_event = event_based_evaluation_df(valid_df, predictions, t_collar=0.200, percentage_of_length=0.2) metric_segment = segment_based_evaluation_df(valid_df, predictions, time_resolution=1.) LOG.info(metric_event) LOG.info(metric_segment) return metric_event
def get_predictions(model, valid_dataset, decoder, save_predictions=None): for i, (input, _) in enumerate(valid_dataset): [input] = to_cuda_if_available([input]) pred_strong, _ = model(input.unsqueeze(0)) pred_strong = pred_strong.cpu() pred_strong = pred_strong.squeeze(0).detach().numpy() if i == 0: LOG.debug(pred_strong) pred_strong = ProbabilityEncoder().binarization(pred_strong, binarization_type="global_threshold", threshold=0.5) pred_strong = scipy.ndimage.filters.median_filter(pred_strong, (cfg.median_window, 1)) pred = decoder(pred_strong) pred = pd.DataFrame(pred, columns=["event_label", "onset", "offset"]) pred["filename"] = valid_dataset.filenames.iloc[i] if i == 0: LOG.debug("predictions: \n{}".format(pred)) LOG.debug("predictions strong: \n{}".format(pred_strong)) prediction_df = pred.copy() else: prediction_df = prediction_df.append(pred) if save_predictions is not None: LOG.info("Saving predictions at: {}".format(save_predictions)) prediction_df.to_csv(save_predictions, index=False, sep="\t") return prediction_df
def get_model(state, args, init_model_name=None): if init_model_name is not None and os.path.exists(init_model_name): model, optimizer, state = load_model(init_model_name, return_optimizer=True, return_state=True) else: if "conv_dropout" in args: conv_dropout = args.conv_dropout else: conv_dropout = cfg.conv_dropout cnn_args = {1} if args.fixed_segment is not None: frames = cfg.frames else: frames = None nb_layers = 4 cnn_kwargs = { "activation": cfg.activation, "conv_dropout": conv_dropout, "batch_norm": cfg.batch_norm, "kernel_size": nb_layers * [3], "padding": nb_layers * [1], "stride": nb_layers * [1], "nb_filters": [16, 16, 32, 65], "pooling": [(2, 2), (2, 2), (1, 4), (1, 2)], "aggregation": args.agg_time, "norm_out": args.norm_embed, "frames": frames, } nb_frames_staying = cfg.frames // (2**2) model = CNN(*cnn_args, **cnn_kwargs) # model.apply(weights_init) state.update({ 'model': { "name": model.__class__.__name__, 'args': cnn_args, "kwargs": cnn_kwargs, 'state_dict': model.state_dict() }, 'nb_frames_staying': nb_frames_staying }) if init_model_name is not None: save_model(state, init_model_name) pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) LOG.info( "number of parameters in the model: {}".format(pytorch_total_params)) return model, state
def measure_embeddings(set_embed, model, emb_path, figure_path, set_name=''): df, embed = calculate_embedding(set_embed, model, savedir=emb_path, concatenate="append") df = df.dropna() embed = embed[df.index] LOG.debug("embed shape: {}".format(embed.shape)) LOG.debug("df shape: {}".format(df.shape)) tsne_emb = TSNE().fit_transform(X=embed.reshape(embed.shape[0], -1)) tsne_plots(tsne_emb, df, savefig=figure_path) scatter = scatter_ratio(embed.reshape(embed.shape[0], -1), df.reset_index()) silhouette = sklearn.metrics.silhouette_score(embed.reshape( embed.shape[0], -1), df.event_labels, metric='euclidean') # Just informative LOG.info( f"{set_name} silhouette for all classes in 2D (tsne) : " f"{sklearn.metrics.silhouette_score(df[['X', 'Y']], df.event_labels, metric='euclidean')}" ) proto = proto_acc(embed.reshape(embed.shape[0], -1), df.reset_index()) LOG.info("Proto accuracy {} : {}".format(set_name, proto)) return { "scatter" + set_name: scatter, "silhouette" + set_name: silhouette, "proto" + set_name: proto }
def __init__(self, serie_labels, classes, n_per_class, n_classes=None): super(CategoriesSampler, self).__init__(serie_labels) self.n_per_class = n_per_class self.classes = classes # self.n_batch = int(len(serie_labels) // (n_per_class * len(classes))) self.n_batch = int(serie_labels.value_counts().min() / n_per_class) self.serie_labels = serie_labels self.n_classes = n_classes LOG.debug( f"sampler has: {self.n_batch} batches of {n_per_class} samples per classes, " f"len serie: {len(serie_labels)}") self.ind_per_class = [] for label in classes: ind = np.argwhere( serie_labels.str.contains(label)).reshape(-1).tolist() if len(ind) > 0: self.ind_per_class.append(ind)
def train_loop(train_load, model): loss_bce = [] if args.segment: for cnt, indexes in enumerate(train_load.batch_sampler): optimizer.zero_grad() for j, ind in enumerate(indexes): inputs, pred_labels = train_set[ind] if cnt == 0 and epoch_ == 0: LOG.debug("classif input shape: {}".format( inputs.shape)) # zero the parameter gradients inputs, pred_labels = to_cuda_if_available( inputs, pred_labels) # forward + backward + optimize weak_out = model(inputs) loss_bce = criterion_bce( weak_out, pred_labels.argmax(0, keepdim=True)) loss_bce.backward() loss_bce.append(loss_bce.item()) optimizer.step() else: for cnt, samples in enumerate(train_load): optimizer.zero_grad() inputs, pred_labels = samples if cnt == 0 and epoch_ == 0: LOG.debug("classif input shape: {}".format(inputs.shape)) # zero the parameter gradients inputs, pred_labels = to_cuda_if_available(inputs, pred_labels) # forward + backward + optimize weak_out = model(inputs) loss_bce = criterion_bce(weak_out, pred_labels) loss_bce.backward() loss_bce.append(loss_bce.item()) optimizer.step() loss_bce = np.mean(loss_bce) print('[%d / %d, %5d] loss: %.3f' % (epoch_ + 1, n_epochs, cnt + 1, loss_bce)) return loss_bce, model
def test_model(state, reference_tsv_path, reduced_number_of_data=None, strore_predicitions_fname=None): dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace), base_feature_dir=os.path.join( cfg.workspace, "dataset", "features"), save_log_feature=False) crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) LOG.info(reference_tsv_path) df = dataset.initialize_and_get_df(reference_tsv_path, reduced_number_of_data) strong_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, strong_dataload, many_hot_encoder.decode_strong, pooling_time_ratio, save_predictions=strore_predicitions_fname) compute_strong_metrics(predictions, df) weak_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(weak_dataload, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
def get_weak_predictions(model, valid_dataset, weak_decoder, save_predictions=None): for i, (data, _) in enumerate(valid_dataset): data = to_cuda_if_available(data) pred_weak = model(data.unsqueeze(0)) pred_weak = pred_weak.cpu() pred_weak = pred_weak.squeeze(0).detach().numpy() if i == 0: LOG.debug(pred_weak) pred_weak = ProbabilityEncoder().binarization( pred_weak, binarization_type="global_threshold", threshold=0.5) pred = weak_decoder(pred_weak) pred = pd.DataFrame(pred, columns=["event_labels"]) pred["filename"] = valid_dataset.filenames.iloc[i] if i == 0: LOG.debug("predictions: \n{}".format(pred)) prediction_df = pred.copy() else: prediction_df = prediction_df.append(pred) if save_predictions is not None: LOG.info("Saving predictions at: {}".format(save_predictions)) prediction_df.to_csv(save_predictions, index=False, sep="\t") return prediction_df
def proto_acc(embed, df): classes = ['Alarm_bell_ringing', 'Blender', 'Cat', 'Dishes', 'Dog', 'Electric_shaver_toothbrush', 'Frying', 'Running_water', 'Speech', 'Vacuum_cleaner'] vector_embed = embed.reshape(embed.shape[0], -1) classes_mean = np.zeros((10, embed.shape[-1])) for i, c in enumerate(classes): class_df = df[df.event_labels.fillna("").str.contains(c)] if not class_df.empty: class_embed = vector_embed[class_df.index] mean_class = np.mean(class_embed, axis=0) classes_mean[i] = mean_class acc_per_class = np.zeros((len(classes))) for i, c in enumerate(classes): class_df = df[df.event_labels.fillna("").str.contains(c)] if not class_df.empty: class_embed = vector_embed[class_df.index] distance_to_min = scipy.spatial.distance.cdist(class_embed, classes_mean) labels = distance_to_min.argmin(-1) acc_per_class[i] = (labels == i).mean() LOG.info(pd.DataFrame([classes, acc_per_class.tolist()]).transpose()) return acc_per_class.mean()
def trunc_pad_segment(df, fixed_segment): def apply_ps_func(row, length): duration = (row["offset"] - row["onset"]) # Choose fixed segment in the event if duration > length: ra = np.random.uniform(-1, 1) onset_bias = fixed_segment * ra row["onset"] = max(0, row["onset"] + onset_bias) # Bias the onset and the offset accordingly else: ra = np.random.rand() onset_bias = fixed_segment * ra row["onset"] = max(0, row["onset"] - onset_bias) row["offset"] = row["onset"] + fixed_segment if row["offset"] > cfg.max_len_seconds: row["offset"] = cfg.max_len_seconds row["onset"] = row["offset"] - fixed_segment return row assert "onset" in df.columns and "offset" in df.columns, "bias label only available with strong labels" LOG.info(f"Fix labels {fixed_segment} seconds") df = df.apply(apply_ps_func, axis=1, args=[fixed_segment]) return df
def extract_features_from_meta(self, csv_audio, feature_dir, subpart_data=None): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the path to the directory where the features are subpart_data: int, number of files to extract features from the csv. """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) for ind, wav_name in enumerate(df_meta.filename.unique()): if ind % 500 == 0: LOG.debug(ind) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) wav_path = os.path.join(wav_dir, wav_name) out_filename = os.path.join(feature_dir, name_only(wav_name) + ".npy") if not os.path.exists(out_filename): if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: (audio, _) = read_audio(wav_path, cfg.sample_rate) if audio.shape[0] == 0: print("File %s is corrupted!" % wav_path) else: mel_spec = self.calculate_mel_spec( audio, log_feature=self.save_log_feature) np.save(out_filename, mel_spec) LOG.debug("compute features time: %s" % (time.time() - t1)) return df_meta.reset_index(drop=True)
def compute_strong_metrics(predictions, valid_df, pooling_time_ratio=None): if pooling_time_ratio is not None: LOG.warning("pooling_time_ratio is deprecated, use it in get_predictions() instead.") # In seconds predictions.onset = predictions.onset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length) predictions.offset = predictions.offset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length) metric_event = event_based_evaluation_df(valid_df, predictions, t_collar=0.200, percentage_of_length=0.2) metric_segment = segment_based_evaluation_df(valid_df, predictions, time_resolution=1.) LOG.info(metric_event) LOG.info(metric_segment) return metric_event
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None, valid_loader=None, state={}, dir_model="model", result_path="res", recompute=True): criterion_bce = nn.BCELoss() classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce) print(classif_model) early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup", init_patience=cfg.first_early_wait) save_best_call = SaveBest(val_comp="sup") # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr, # verbose=True) print(optimizer_classif) save_results = pd.DataFrame() create_folder(dir_model) if cfg.save_best: model_path_sup1 = os.path.join(dir_model, "best_model") else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) print("path of model : " + model_path_sup1) state['many_hot_encoder'] = many_hot_encoder.state_dict() if not os.path.exists(model_path_sup1) or recompute: for epoch_ in range(cfg.n_epoch_classifier): print(classif_model.training) start = time.time() loss_mean_bce = [] for i, samples in enumerate(train_loader): inputs, pred_labels = samples if i == 0: LOG.debug("classif input shape: {}".format(inputs.shape)) # zero the parameter gradients optimizer_classif.zero_grad() inputs = to_cuda_if_available(inputs) # forward + backward + optimize weak_out = classif_model(inputs) weak_out = to_cpu(weak_out) # print(output) loss_bce = criterion_bce(weak_out, pred_labels) loss_mean_bce.append(loss_bce.item()) loss_bce.backward() optimizer_classif.step() loss_mean_bce = np.mean(loss_mean_bce) classif_model.eval() n_class = len(many_hot_encoder.labels) macro_f_measure_train = get_f_measure_by_class(classif_model, n_class, train_loader) if valid_loader is not None: macro_f_measure = get_f_measure_by_class(classif_model, n_class, valid_loader) mean_macro_f_measure = np.mean(macro_f_measure) else: mean_macro_f_measure = -1 classif_model.train() print("Time to train an epoch: {}".format(time.time() - start)) # print statistics print('[%d / %d, %5d] loss: %.3f' % (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce)) results = {"train_loss": loss_mean_bce, "macro_measure_train": np.mean(macro_f_measure_train), "class_macro_train": np.array_str(macro_f_measure_train, precision=2), "macro_measure_valid": mean_macro_f_measure, "class_macro_valid": np.array_str(macro_f_measure, precision=2), } for key in results: LOG.info("\t\t ----> {} : {}".format(key, results[key])) save_results = save_results.append(results, ignore_index=True) # scheduler.step(mean_macro_f_measure) # ########## # # Callbacks # ########## state['epoch'] = epoch_ + 1 state["model"]["state_dict"] = classif_model.state_dict() state["optimizer"]["state_dict"] = optimizer_classif.state_dict() state["loss"] = loss_mean_bce state.update(results) if cfg.early_stopping is not None: if early_stopping_call.apply(mean_macro_f_measure): print("EARLY STOPPING") break if cfg.save_best and save_best_call.apply(mean_macro_f_measure): save_model(state, model_path_sup1) if cfg.save_best: LOG.info( "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val)) LOG.info("loading model from: {}".format(model_path_sup1)) classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) save_model(state, model_path_sup1) LOG.debug("model path: {}".format(model_path_sup1)) LOG.debug('Finished Training') else: classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) LOG.info("#### End classif") save_results.to_csv(result_path, sep="\t", header=True, index=False) return classif_model, state
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None): crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) # ############## # Validation # ############## crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) # # 2018 # LOG.info("Eval 2018") # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data) # # Strong # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong) # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio) # # Weak # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # Validation 2019 # LOG.info("Validation 2019 (original code)") # b_dataset = B_DatasetDcase2019Task4(cfg.workspace, # base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'), # save_log_feature=False) # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) # b_validation_df.to_csv('old.csv') # b_validation_strong = B_DataLoadDf(b_validation_df, # b_dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong, # save_predictions=strore_predicitions_fname) # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio) # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # ============================================================================================ # ============================================================================================ # ============================================================================================ dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir, local_path=cfg.workspace, exp_tag=cfg.exp_tag, save_log_feature=False) # Validation 2019 LOG.info("Validation 2019") validation_df = dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) validation_strong = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, validation_strong, many_hot_encoder.decode_strong, save_predictions=strore_predicitions_fname) vdf = validation_df.copy() vdf.filename = vdf.filename.str.replace('.npy', '.wav') pdf = predictions.copy() pdf.filename = pdf.filename.str.replace('.npy', '.wav') compute_strong_metrics(pdf, vdf, pooling_time_ratio) validation_weak = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(validation_weak, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters)) if __name__ == '__main__': LOG.info("Simple CRNNs") parser = argparse.ArgumentParser(description="") parser.add_argument( "-s", '--subpart_data', type=int, default=None, dest="subpart_data", help= "Number of files to be used. Useful when testing on small number of files." ) parser.add_argument("-n", '--no_weak', dest='no_weak', action='store_true', default=False,
class MyLogger(object): def debug(self, msg): pass def warning(self, msg): pass def error(self, msg): pass if __name__ == "__main__": base_missing_files_folder = ".." dataset_folder = os.path.join("..", "dataset") LOG.info("Download_data") LOG.info("\n\nOnce database is downloaded, do not forget to check your missing_files\n\n") LOG.info("You can change N_JOBS and CHUNK_SIZE to increase the download with more processes.") # Modify it with the number of process you want, but be careful, youtube can block you if you put too many. N_JOBS = 3 # Only useful when multiprocessing, # if chunk_size is high, download is faster. Be careful, progress bar only update after each chunk. CHUNK_SIZE = 10 LOG.info("Validation data") test = os.path.join(dataset_folder, "metadata", "validation", "validation.tsv") result_dir = os.path.join(dataset_folder, "audio", "validation") # read metadata file and get only one filename once df = pd.read_csv(test, header=0, sep='\t')
from utils.Transforms import ApplyLog, Unsqueeze, ToTensor, View, Normalize, Compose from utils.Samplers import CategoriesSampler from pprint import pformat import config as cfg from DesedSynthetic import DesedSynthetic from evaluation_measures import get_f_measure_by_class, measure_classif from common import get_model, get_optimizer, shared_args, get_dfs, measure_embeddings from models.FullyConnected import FullyConnected from models.CombineModel import CombineModel from utils.Logger import LOG from utils.Scaler import ScalerSum from utils.utils import ManyHotEncoder, create_folder, to_cuda_if_available, EarlyStopping, SaveBest, to_cpu, \ load_model, save_model, ViewModule if __name__ == '__main__': LOG.info(__file__) t = time.time() parser = argparse.ArgumentParser() parser.add_argument('--n_layers_classif', type=int, default=1) parser.add_argument('--conv_dropout', type=float, default=cfg.conv_dropout) parser.add_argument('--dropout_classif', type=float, default=cfg.dropout_non_recurrent) parser.add_argument('--nb_layers', type=int, default=cfg.nb_layers) parser.add_argument('--pool_freq', type=int, default=cfg.pool_freq) parser.add_argument('--last_layer', type=int, default=cfg.last_layer) parser.add_argument('--epochs', type=float, default=cfg.n_epoch_classifier) parser = shared_args(parser) args = parser.parse_args()
def train(cfg, train_loader, model, optimizer, epoch, ema_model=None, weak_mask=None, strong_mask=None): """ One epoch of a Mean Teacher model :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch. Should return 3 values: teacher input, student input, labels :param model: torch.Module, model to be trained, should return a weak and strong prediction :param optimizer: torch.Module, optimizer used to train the model :param epoch: int, the current epoch of training :param ema_model: torch.Module, student model, should return a weak and strong prediction :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss) :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss) """ class_criterion = nn.BCELoss() consistency_criterion_strong = nn.MSELoss() lds_criterion = LDSLoss(xi=cfg.vat_xi, eps=cfg.vat_eps, n_power_iter=cfg.vat_n_power_iter) [class_criterion, consistency_criterion_strong, lds_criterion] = to_cuda_if_available( [class_criterion, consistency_criterion_strong, lds_criterion]) meters = AverageMeterSet() LOG.debug("Nb batches: {}".format(len(train_loader))) start = time.time() rampup_length = len(train_loader) * cfg.n_epoch // 2 for i, (batch_input, ema_batch_input, target) in enumerate(train_loader): global_step = epoch * len(train_loader) + i if global_step < rampup_length: rampup_value = ramps.sigmoid_rampup(global_step, rampup_length) else: rampup_value = 1.0 # Todo check if this improves the performance # adjust_learning_rate(optimizer, rampup_value, rampdown_value) meters.update('lr', optimizer.param_groups[0]['lr']) [batch_input, ema_batch_input, target] = to_cuda_if_available([batch_input, ema_batch_input, target]) LOG.debug(batch_input.mean()) # Outputs strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input) strong_pred_ema = strong_pred_ema.detach() weak_pred_ema = weak_pred_ema.detach() strong_pred, weak_pred = model(batch_input) loss = None # Weak BCE Loss # Take the max in axis 2 (assumed to be time) if len(target.shape) > 2: target_weak = target.max(-2)[0] else: target_weak = target if weak_mask is not None: weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask]) ema_class_loss = class_criterion(weak_pred_ema[weak_mask], target_weak[weak_mask]) if i == 0: LOG.debug("target: {}".format(target.mean(-2))) LOG.debug("Target_weak: {}".format(target_weak)) LOG.debug("Target_weak mask: {}".format( target_weak[weak_mask])) LOG.debug(weak_class_loss) LOG.debug("rampup_value: {}".format(rampup_value)) meters.update('weak_class_loss', weak_class_loss.item()) meters.update('Weak EMA loss', ema_class_loss.item()) loss = weak_class_loss # Strong BCE loss if strong_mask is not None: strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask]) meters.update('Strong loss', strong_class_loss.item()) strong_ema_class_loss = class_criterion( strong_pred_ema[strong_mask], target[strong_mask]) meters.update('Strong EMA loss', strong_ema_class_loss.item()) if loss is not None: loss += strong_class_loss else: loss = strong_class_loss # Teacher-student consistency cost if ema_model is not None: consistency_cost = cfg.max_consistency_cost * rampup_value meters.update('Consistency weight', consistency_cost) # Take only the consistence with weak and unlabel consistency_loss_strong = consistency_cost * consistency_criterion_strong( strong_pred, strong_pred_ema) meters.update('Consistency strong', consistency_loss_strong.item()) if loss is not None: loss += consistency_loss_strong else: loss = consistency_loss_strong meters.update('Consistency weight', consistency_cost) # Take only the consistence with weak and unlabel consistency_loss_weak = consistency_cost * consistency_criterion_strong( weak_pred, weak_pred_ema) meters.update('Consistency weak', consistency_loss_weak.item()) if loss is not None: loss += consistency_loss_weak else: loss = consistency_loss_weak # LDS loss if cfg.vat_enabled: lds_loss = cfg.vat_coeff * lds_criterion(model, batch_input, weak_pred) LOG.info('loss: {:.3f}, lds loss: {:.3f}'.format( loss, cfg.vat_coeff * lds_loss.detach().cpu().numpy())) loss += lds_loss else: if i % 25 == 0: LOG.info('loss: {:.3f}'.format(loss)) assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format( loss.item()) assert not loss.item() < 0, 'Loss problem, cannot be negative' meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if ema_model is not None: update_ema_variables(model, ema_model, 0.999, global_step) epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters))
) if margin is not None: triplet_loss = torch.clamp(margin + dist_pos - dist_neg, min=0.0).mean() else: triplet_loss = ratio_loss(dist_pos, dist_neg).mean() triplet_loss = to_cpu(triplet_loss) validation_loss.append(triplet_loss.item()) validation_loss = np.mean(validation_loss) triplet_model.train() return validation_loss if __name__ == '__main__': LOG.info(__file__) # ########### # ## Argument # ########### t = time.time() print("Arguments have been set for a certain group of experiments, feel free to change it.") parser = argparse.ArgumentParser(description="") parser.add_argument('--margin', type=float, default=None, dest="margin") parser.add_argument('--type_positive', type=str, default="nearest", dest="type_positive") parser.add_argument('--type_negative', type=str, default="semi_hard", dest="type_negative") # Experiences to compare the impact of number of labaled vs unlabeled triplets # Be careful if subpart data is not None!!!!!! parser.add_argument('--nb_labeled_triplets', type=int, default=None, dest="nb_labeled_triplets") parser.add_argument('--nb_unlabeled_triplets', type=int, default=None, dest="nb_unlabeled_triplets") parser.add_argument('--pit', action="store_true", default=False) parser.add_argument('--swap', action="store_true", default=False)
def train(train_loader, model, optimizer, epoch, weak_mask=None, strong_mask=None): class_criterion = nn.BCELoss() [class_criterion] = to_cuda_if_available([class_criterion]) meters = AverageMeterSet() meters.update('lr', optimizer.param_groups[0]['lr']) LOG.debug("Nb batches: {}".format(len(train_loader))) start = time.time() for i, (batch_input, target) in enumerate(train_loader): [batch_input, target] = to_cuda_if_available([batch_input, target]) LOG.debug(batch_input.mean()) strong_pred, weak_pred = model(batch_input) loss = 0 if weak_mask is not None: # Weak BCE Loss # Trick to not take unlabeled data # Todo figure out another way target_weak = target.max(-2)[0] weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask]) if i == 1: LOG.debug("target: {}".format(target.mean(-2))) LOG.debug("Target_weak: {}".format(target_weak)) LOG.debug(weak_class_loss) meters.update('Weak loss', weak_class_loss.item()) loss += weak_class_loss if strong_mask is not None: # Strong BCE loss strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask]) meters.update('Strong loss', strong_class_loss.item()) loss += strong_class_loss assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format( loss.item()) assert not loss.item() < 0, 'Loss problem, cannot be negative' meters.update('Loss', loss.item()) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() epoch_time = time.time() - start LOG.info('Epoch: {}\t' 'Time {:.2f}\t' '{meters}'.format(epoch, epoch_time, meters=meters))
def train_triplet_epoch(loader, model_triplet, optimizer, semi_hard_input=None, semi_hard_embed=None, pit=False, margin=None, swap=False, acc_grad=False): start = time.time() loss_mean_triplet = [] nb_triplets_used = 0 nb_triplets = 0 if acc_grad: lder = loader.batch_sampler else: lder = loader # for i, samples in enumerate(concat_loader_triplet): for i, samples in enumerate(lder): optimizer.zero_grad() if acc_grad: outs = loop_batches_acc_grad(samples, loader.dataset, model_triplet, semi_hard_input, semi_hard_embed, i=i) else: outs = loop_batches(samples, model_triplet, semi_hard_input, semi_hard_embed, i=i) outputs, outputs_pos, outputs_neg = outs if i == 0: LOG.debug("output CNN shape: {}".format(outputs.shape)) LOG.debug(outputs.mean()) LOG.debug(outputs_pos.mean()) LOG.debug(outputs_neg.mean()) dist_pos, dist_neg = get_distances(outputs, outputs_pos, outputs_neg, pit, swap, ) if margin is not None: loss_triplet = torch.clamp(margin + dist_pos - dist_neg, min=0.0) else: loss_triplet = ratio_loss(dist_pos, dist_neg) pair_cnt = (loss_triplet.detach() > 0).sum().item() nb_triplets_used += pair_cnt nb_triplets += len(loss_triplet) # Normalize based on the number of pairs. if pair_cnt > 0: # loss_triplet = loss_triplet.sum() / pair_cnt loss_triplet = loss_triplet.mean() loss_triplet.backward() optimizer.step() loss_mean_triplet.append(loss_triplet.item()) else: LOG.debug("batch doesn't have any loss > 0") epoch_time = time.time() - start LOG.info("Loss: {:.4f}\t" "Time: {}\t" "\tnb_triplets used: {} / {}\t" "".format(np.mean(loss_mean_triplet), epoch_time, nb_triplets_used, nb_triplets)) ratio_triplet_used = nb_triplets_used / nb_triplets return model_triplet, loss_mean_triplet, ratio_triplet_used
from utils.Transforms import ApplyLog, Unsqueeze, PadOrTrunc, ToTensor, Normalize, Compose from utils.utils import load_model, ManyHotEncoder # ########### # ## Argument # ########### t = time.time() print("Arguments have been set for a certain group of experiments, feel free to change it.") parser = argparse.ArgumentParser(description="") parser.add_argument('--subpart_data', type=int, default=None) parser.add_argument('--model_path', type=str, default=None) parser.add_argument('--embed_name', type=str, default=None) # Experiences to compare the impact of number of labaled vs unlabeled triplets # Be careful if subpart data is not None!!!!!! f_args = parser.parse_args() LOG.info(pformat(vars(f_args))) model_path = f_args.model_path assert model_path is not None, "model_path has to be defined to compute an embedding" embed_name = f_args.embed_name if embed_name is None: embed_name = model_path.split("/")[-2] ############ # Parameters experiences ########### subpart_data = f_args.subpart_data dataset = DesedSynthetic("../dcase2019", base_feature_dir="../dcase2019/features", save_log_feature=False) emb_model, state = load_model(model_path, return_state=True) epoch_model = state["epoch"] LOG.info("model loaded at epoch: {}".format(epoch_model))
def download_file(result_dir, filename): """ download a file from youtube given an audioSet filename. (It takes only a part of the file thanks to information provided in the filename) Parameters ---------- result_dir : str, result directory which will contain the downloaded file filename : str, AudioSet filename to download Return ------ list : list, Empty list if the file is downloaded, otherwise contains the filename and the error associated """ LOG.debug(filename) tmp_filename = "" query_id = filename[1:12] segment_start = filename[13:-4].split('_')[0] segment_end = filename[13:-4].split('_')[1] audio_container = AudioContainer() # Define download parameters ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': TMP_FOLDER+'%(id)s.%(ext)s', 'noplaylist': True, 'quiet': True, 'prefer_ffmpeg': True, 'logger': MyLogger(), 'audioformat': 'wav' } try: # Download file with youtube_dl.YoutubeDL(ydl_opts) as ydl: meta = ydl.extract_info( 'https://www.youtube.com/watch?v={query_id}'.format(query_id=query_id), download=True) audio_formats = [f for f in meta["formats"] if f.get('vcodec') == 'none'] if audio_formats is []: return [filename, "no audio format available"] # get the best audio format best_audio_format = audio_formats[-1] tmp_filename = TMP_FOLDER + query_id + "." + best_audio_format["ext"] audio_container.load(filename=tmp_filename, fs=44100, res_type='kaiser_best', start=float(segment_start), stop=float(segment_end)) # Save segmented audio audio_container.filename = filename audio_container.detect_file_format() audio_container.save(filename=os.path.join(result_dir, filename)) #Remove temporary file os.remove(tmp_filename) return [] except (KeyboardInterrupt, SystemExit): # Remove temporary files and current audio file. for fpath in glob.glob(TMP_FOLDER + query_id + "*"): os.remove(fpath) raise # youtube-dl error, file often removed except (ExtractorError, DownloadError, OSError) as e: if os.path.exists(tmp_filename): os.remove(tmp_filename) return [filename, str(e)] # multiprocessing can give this error except IndexError as e: if os.path.exists(tmp_filename): os.remove(tmp_filename) LOG.info(filename) LOG.info(str(e)) return [filename, "Index Error"]
if train: loss.backward() optimizer.step() cnt += 1 if cnt > 0: loss_mean = loss_mean / cnt acc_mean = acc_mean / cnt else: warnings.warn("No training has been performed") return loss_mean, acc_mean if __name__ == '__main__': LOG.info(__file__) t = time.time() parser = argparse.ArgumentParser() parser.add_argument('--max-epoch', type=int, default=200) parser.add_argument('--save-epoch', type=int, default=20) parser.add_argument('--shot', type=int, default=1) # How many to get for proto parser.add_argument('--query', type=int, default=1) # How many to eval parser.add_argument('--train-way', type=int, default=10) parser.add_argument('--test-way', type=int, default=10) parser.add_argument('--n_layers_RNN', type=int, default=2) parser.add_argument('--dim_RNN', type=int, default=64) parser.add_argument('--test-only', action="store_true", default=False) parser.add_argument('--load',