def test_model(state, reference_tsv_path, reduced_number_of_data=None, strore_predicitions_fname=None): dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace), base_feature_dir=os.path.join( cfg.workspace, "dataset", "features"), save_log_feature=False) crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) LOG.info(reference_tsv_path) df = dataset.initialize_and_get_df(reference_tsv_path, reduced_number_of_data) strong_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, strong_dataload, many_hot_encoder.decode_strong, pooling_time_ratio, save_predictions=strore_predicitions_fname) compute_strong_metrics(predictions, df) weak_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(weak_dataload, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
def set_df_list(self, train): dataset = DatasetDcase2019Task4(cfg.workspace, base_feature_dir=os.path.join(cfg.workspace, "dataset", "features"), save_log_feature=False) transforms = get_transforms(cfg.max_frames) weak_df = dataset.initialize_and_get_df(cfg.weak) load_weak = DataLoadDf(weak_df, dataset.get_feature_file, None, transform=transforms) if train ==True: self.list_dataset = [load_weak] else: synthetic_df = dataset.initialize_and_get_df(cfg.synthetic, download=False) synthetic_df.onset = synthetic_df.onset * cfg.sample_rate // cfg.hop_length synthetic_df.offset = synthetic_df.offset * cfg.sample_rate // cfg.hop_length validation_df = dataset.initialize_and_get_df(cfg.validation) validation_df.onset = validation_df.onset * cfg.sample_rate // cfg.hop_length validation_df.offset = validation_df.offset * cfg.sample_rate // cfg.hop_length eval_desed_df = dataset.initialize_and_get_df(cfg.eval_desed) eval_desed_df.onset = eval_desed_df.onset * cfg.sample_rate // cfg.hop_length eval_desed_df.offset = eval_desed_df.offset * cfg.sample_rate // cfg.hop_length # many_hot_encoder = ManyHotEncoder(classes, n_frames=cfg.max_frames // pooling_time_ratio) load_synthetic = DataLoadDf(synthetic_df, dataset.get_feature_file, None, transform=transforms) load_validation = DataLoadDf(validation_df, dataset.get_feature_file, None, transform=transforms) load_eval_desed = DataLoadDf(eval_desed_df, dataset.get_feature_file, None, transform=transforms) self.list_dataset = [load_weak, load_synthetic, load_validation, load_eval_desed] scaler = Scaler() scaler.calculate_scaler(ConcatDataset(self.list_dataset)) transforms = get_transforms(cfg.max_frames, scaler) for i in range(len(self.list_dataset)): self.list_dataset[i].set_transform(transforms) print(self.list_dataset)
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None): crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) # ############## # Validation # ############## crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) # # 2018 # LOG.info("Eval 2018") # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data) # # Strong # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong) # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio) # # Weak # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # Validation 2019 # LOG.info("Validation 2019 (original code)") # b_dataset = B_DatasetDcase2019Task4(cfg.workspace, # base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'), # save_log_feature=False) # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) # b_validation_df.to_csv('old.csv') # b_validation_strong = B_DataLoadDf(b_validation_df, # b_dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong, # save_predictions=strore_predicitions_fname) # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio) # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # ============================================================================================ # ============================================================================================ # ============================================================================================ dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir, local_path=cfg.workspace, exp_tag=cfg.exp_tag, save_log_feature=False) # Validation 2019 LOG.info("Validation 2019") validation_df = dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) validation_strong = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, validation_strong, many_hot_encoder.decode_strong, save_predictions=strore_predicitions_fname) vdf = validation_df.copy() vdf.filename = vdf.filename.str.replace('.npy', '.wav') pdf = predictions.copy() pdf.filename = pdf.filename.str.replace('.npy', '.wav') compute_strong_metrics(pdf, vdf, pooling_time_ratio) validation_weak = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(validation_weak, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
train_synth_df_frames.offset = train_synth_df_frames.offset * cfg.sample_rate // cfg.hop_length // pooling_time_ratio LOG.debug(valid_synth_df.event_label.value_counts()) LOG.debug(valid_synth_df) train_synth_data = DataLoadDf(train_synth_df_frames, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms) if not no_weak: list_datasets = [train_weak_data, train_synth_data] training_data = ConcatDataset(list_datasets) else: list_datasets = [train_synth_data] training_data = train_synth_data scaler = Scaler() scaler.calculate_scaler(training_data) LOG.debug(scaler.mean_) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) # Validation dataset is only used to get an idea of wha could be results on evaluation dataset validation_dataset = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) transforms = get_transforms(cfg.max_frames, scaler) train_synth_data.set_transform(transforms) if not no_weak: train_weak_data.set_transform(transforms) concat_dataset = ConcatDataset([train_weak_data, train_synth_data])
validation_data = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms) test_data = DataLoadDf(test_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms) list_dataset = [train_data] batch_sizes = [cfg.batch_size] # batch_sizes = [cfg.batch_size // len(list_dataset)] * len(list_dataset) weak_mask = slice(cfg.batch_size) strong_mask = None scaler = Scaler() if path.exists(cfg.scaler_fn): LOG.info('Loading scaler from {}'.format(cfg.scaler_fn)) scaler.load(cfg.scaler_fn) else: scaler.calculate_scaler(ConcatDataset(list_dataset)) LOG.info('Saving scaler to {}'.format(cfg.scaler_fn)) scaler.save(cfg.scaler_fn) LOG.debug(scaler.mean_) transforms = get_transforms(cfg.max_frames, scaler, augment_type="noise") transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) for i in range(len(list_dataset)): list_dataset[i].set_transform(transforms) validation_data.set_transform(transforms_valid)
batch_sizes = [ 6 * cfg.batch_size // 15, 2 * cfg.batch_size // 15, 7 * cfg.batch_size // 15 ] strong_mask = slice( 6 * cfg.batch_size // 15 + 2 * cfg.batch_size // 15, cfg.batch_size) # batch_sizes = [cfg.batch_size//3, 2*cfg.batch_size//3] # strong_mask = slice(cfg.batch_size//3, cfg.batch_size) weak_mask = slice(batch_sizes[0] + batch_sizes[1]) ############################################################################# scaler = Scaler() scaler.calculate_scaler(ConcatDataset(list_dataset)) LOG.debug(scaler.mean_) # print(train_weak_data.filenames) # exit() ############################################################################# # transforms = get_transforms(cfg.max_frames, scaler, augment_type="noise") LOG.info("Change Normalize(Zero-padding)") transforms = get_transforms_AANPT(cfg.max_frames, scaler, augment_type="noise") ############################################################################# for i in range(len(list_dataset)):