def _load_state_vars(state, gtruth_df, median_win=None): pred_df = gtruth_df.copy() # Define dataloader many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) scaler = _load_scaler(state) crnn = _load_crnn(state) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler, add_axis=0) strong_dataload = DataLoadDf(pred_df, many_hot_encoder.encode_strong_df, transforms_valid, return_indexes=True) strong_dataloader_ind = DataLoader(strong_dataload, batch_size=cfg.batch_size, drop_last=False) pooling_time_ratio = state["pooling_time_ratio"] many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) if median_win is None: median_win = state["median_window"] return { "model": crnn, "dataloader": strong_dataloader_ind, "pooling_time_ratio": pooling_time_ratio, "many_hot_encoder": many_hot_encoder, "median_window": median_win }
def audio_tagging_results(reference, estimated): classes = [] if "event_label" in reference.columns: classes.extend(reference.event_label.dropna().unique()) classes.extend(estimated.event_label.dropna().unique()) classes = list(set(classes)) mhe = ManyHotEncoder(classes) reference = format_df(reference, mhe) estimated = format_df(estimated, mhe) else: classes.extend( reference.event_labels.str.split( ',', expand=True).unstack().dropna().unique()) classes.extend( estimated.event_labels.str.split( ',', expand=True).unstack().dropna().unique()) classes = list(set(classes)) mhe = ManyHotEncoder(classes) matching = reference.merge(estimated, how='outer', on="filename", suffixes=["_ref", "_pred"]) def na_values(val): if type(val) is np.ndarray: return val if pd.isna(val): return np.zeros(len(classes)) return val if not estimated.empty: matching.event_label_pred = matching.event_label_pred.apply(na_values) matching.event_label_ref = matching.event_label_ref.apply(na_values) tp, fp, fn, tn = intermediate_at_measures( np.array(matching.event_label_ref.tolist()), np.array(matching.event_label_pred.tolist())) macro_res = macro_f_measure(tp, fp, fn) else: macro_res = np.zeros(len(classes)) results_serie = pd.DataFrame(macro_res, index=mhe.labels) return results_serie[0]
def _load_state_vars(state, gtruth_df, median_win=None): pred_df = gtruth_df.copy() # Define dataloader many_hot_encoder = ManyHotEncoder.load_state_dict(state["many_hot_encoder"]) scaler = _load_scaler(state) crnn = _load_crnn(state) # Note, need to unsqueeze axis 1 transforms_valid = get_transforms(cfg.max_frames, scaler=scaler, add_axis=1) # Note, no dataloader here strong_dataload = DataLoadDf(pred_df, many_hot_encoder.encode_strong_df, transforms_valid, return_indexes=True) pooling_time_ratio = state["pooling_time_ratio"] many_hot_encoder = ManyHotEncoder.load_state_dict(state["many_hot_encoder"]) if median_win is None: median_win = state["median_window"] return { "model": crnn, "dataload": strong_dataload, "pooling_time_ratio": pooling_time_ratio, "many_hot_encoder": many_hot_encoder, "median_window": median_win }
out_nb_frames_1s = cfg.sample_rate / cfg.hop_size / pooling_time_ratio median_window = max(int(cfg.median_window_s * out_nb_frames_1s), 1) logger.debug(f"median_window: {median_window}") # ############## # DATA # ############## dataset = DESED(base_feature_dir=os.path.join(cfg.workspace, "dataset", "features"), compute_log=False) dfs = get_dfs(dataset, reduced_number_of_data) # Meta path for psds durations_synth = get_durations_df(cfg.synthetic) many_hot_encoder = ManyHotEncoder(cfg.classes, n_frames=cfg.max_frames // pooling_time_ratio) encod_func = many_hot_encoder.encode_strong_df # Normalisation per audio or on the full dataset if cfg.scaler_type == "dataset": transforms = get_transforms(cfg.max_frames, add_axis=add_axis_conv) weak_data = DataLoadDf(dfs["weak"], encod_func, transforms) unlabel_data = DataLoadDf(dfs["unlabel"], encod_func, transforms) train_synth_data = DataLoadDf(dfs["train_synthetic"], encod_func, transforms) scaler_args = [] scaler = Scaler() # # Only on real data since that's our final goal and test data are real scaler.calculate_scaler( ConcatDataset([weak_data, unlabel_data, train_synth_data]))