def update_plot2D(samples_per_class, x_select, y_select, active_tab, fold_ix, model_path, dataset_ix, sr): global X global X_pca global Y global file_names global feature_extractor print('start visualization') if (active_tab == 'tab_visualization'): fold_name = dataset.fold_list[fold_ix] exp_folder_fold = conv_path(os.path.join(model_path, fold_name)) scaler_path = os.path.join(exp_folder_fold, 'scaler.pickle') scaler = load_pickle(scaler_path) dataset_name = options_datasets[dataset_ix]['label'] params_dataset = params['datasets'][dataset_name] folds_train, folds_val, _ = evaluation_setup( fold_name, dataset.fold_list, params_dataset['evaluation_mode']) print(feature_extractor) data_gen_train = DataGenerator( dataset, feature_extractor, folds=folds_train, batch_size=params['train']['batch_size'], shuffle=True, train=False, scaler=scaler) X_list, Y_list = data_gen_train.get_data() file_names = data_gen_train.audio_file_list # file_names = data_gen_train.convert_features_path_to_audio_path( # file_names, sr=sr) Xt = [] Yt = [] for j in range(len(X_list)): ix = int(len(X_list[j]) / 2) if len(X_list[j]) > 1 else 0 Xj = np.expand_dims(X_list[j][ix], 0) Yj = np.expand_dims(Y_list[j][ix], 0) Xt.append(Xj) Yt.append(Yj) X = np.concatenate(Xt, axis=0) Yt = np.concatenate(Yt, axis=0) with graph.as_default(): model_container.load_model_weights(exp_folder_fold) X_emb = model_container.get_intermediate_output(-2, X) # output_select pca = PCA(n_components=4) pca.fit(X_emb) X_pca = pca.transform(X_emb) print('pca', X_pca.shape, Yt.shape) figure2D = generate_figure2D(X_pca, Yt, dataset.label_list, pca_components=[x_select, y_select], samples_per_class=samples_per_class) return [figure2D]
def test_evaluation_setup(): fold_list = ["fold1", "fold2", "fold3", "fold4"] fold = "fold1" evaluation_mode = "cross-validation" folds_train, folds_val, folds_test = evaluation_setup( fold, fold_list, evaluation_mode, use_validate_set=True) assert folds_test == ["fold1"] assert folds_val == ["fold2"] assert folds_train == ["fold3", "fold4"] folds_train, folds_val, folds_test = evaluation_setup( fold, fold_list, evaluation_mode, use_validate_set=False) assert folds_test == ["fold1"] assert folds_val == folds_train assert folds_train == ["fold2", "fold3", "fold4"] evaluation_mode = "train-validate-test" folds_train, folds_val, folds_test = evaluation_setup( fold, fold_list, evaluation_mode, use_validate_set=True) assert folds_test == ["test"] assert folds_val == ["validate"] assert folds_train == ["train"] evaluation_mode = "train-test" folds_train, folds_val, folds_test = evaluation_setup( fold, fold_list, evaluation_mode, use_validate_set=True) assert folds_test == ["test"] assert folds_val == ["train"] assert folds_train == ["train"] evaluation_mode = "cross-validation-with-test" folds_train, folds_val, folds_test = evaluation_setup( fold, fold_list, evaluation_mode, use_validate_set=True) assert folds_test == ["test"] assert folds_val == ["fold2"] assert folds_train == ["fold1", "fold3", "fold4"] evaluation_mode = "blablabla" with pytest.raises(AttributeError): folds_train, folds_val, folds_test = evaluation_setup( fold, fold_list, evaluation_mode, use_validate_set=True)
def main(): # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-d', '--dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='UrbanSound8k') parser.add_argument( '-f', '--features', type=str, help='features name (e.g. Spectrogram, MelSpectrogram, Openl3)', default='MelSpectrogram') parser.add_argument('-p', '--path', type=str, help='path to the parameters.json file', default='../') parser.add_argument( '-m', '--model', type=str, help='model name (e.g. MLP, SB_CNN, SB_CNN_SED, A_CRNN, VGGish)', default='SB_CNN') parser.add_argument('-fold', '--fold_name', type=str, help='fold name', default='fold1') parser.add_argument('-s', '--models_path', type=str, help='path to save the trained model', default='../trained_models') parser.add_argument('--aug', dest='augmentation', action='store_true') parser.add_argument('--no-aug', dest='augmentation', action='store_false') parser.set_defaults(augmentation=False) args = parser.parse_args() print(__doc__) if args.dataset not in get_available_datasets(): raise AttributeError('Dataset not available') if args.features not in get_available_features(): raise AttributeError('Features not available') if args.model not in get_available_models(): raise AttributeError('Model not available') # Get parameters parameters_file = os.path.join(args.path, 'parameters.json') params = load_json(parameters_file) params_dataset = params['datasets'][args.dataset] params_features = params['features'] params_model = params['models'][args.model] # Get and init dataset class dataset_class = get_available_datasets()[args.dataset] dataset_path = os.path.join(args.path, params_dataset['dataset_path']) dataset = dataset_class(dataset_path) if args.fold_name not in dataset.fold_list: raise AttributeError('Fold not available') # Data augmentation if args.augmentation: # Define the augmentations augmentations = params['data_augmentations'] # Initialize AugmentedDataset dataset = AugmentedDataset(dataset, params['features']['sr'], augmentations) # Process all files print('Doing data augmentation ...') dataset.process() print('Done!') # Get and init feature class features_class = get_available_features()[args.features] features = features_class( sequence_time=params_features['sequence_time'], sequence_hop_time=params_features['sequence_hop_time'], audio_win=params_features['audio_win'], audio_hop=params_features['audio_hop'], sr=params_features['sr'], **params_features[args.features]) print('Features shape: ', features.get_shape()) # Check if features were extracted if not features.check_if_extracted(dataset): print('Extracting features ...') features.extract(dataset) print('Done!') use_validate_set = True if args.dataset in ['TUTSoundEvents2017', 'ESC50', 'ESC10']: # When have less data, don't use validation set. use_validate_set = False folds_train, folds_val, _ = evaluation_setup( args.fold_name, dataset.fold_list, params_dataset['evaluation_mode'], use_validate_set=use_validate_set) data_gen_train = DataGenerator(dataset, features, folds=folds_train, batch_size=params['train']['batch_size'], shuffle=True, train=True, scaler=None) scaler = Scaler(normalizer=params_model['normalizer']) print('Fitting scaler ...') scaler.fit(data_gen_train) print('Done!') # Pass scaler to data_gen_train to be used when data # loading data_gen_train.set_scaler(scaler) data_gen_val = DataGenerator(dataset, features, folds=folds_val, batch_size=params['train']['batch_size'], shuffle=False, train=False, scaler=scaler) # Define model features_shape = features.get_shape() n_frames_cnn = features_shape[1] n_freq_cnn = features_shape[2] n_classes = len(dataset.label_list) model_class = get_available_models()[args.model] metrics = ['classification'] if args.dataset in sed_datasets: metrics = ['sed'] if args.dataset in tagging_datasets: metrics = ['tagging'] model_container = model_class(model=None, model_path=None, n_classes=n_classes, n_frames_cnn=n_frames_cnn, n_freq_cnn=n_freq_cnn, metrics=metrics, **params_model['model_arguments']) model_container.model.summary() # Set paths model_folder = os.path.join(args.models_path, args.model, args.dataset) exp_folder = os.path.join(model_folder, args.fold_name) mkdir_if_not_exists(exp_folder, parents=True) # Save model json and scaler model_container.save_model_json(model_folder) save_pickle(scaler, os.path.join(exp_folder, 'scaler.pickle')) # data_train = data_gen_train.get_data() # data_val = data_gen_val.get_data() # Train model model_container.train( data_gen_train, data_gen_val, # data_train, data_val, label_list=dataset.label_list, weights_path=exp_folder, **params['train'], sequence_time_sec=params_features['sequence_hop_time'])
def main(): # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( '-od', '--origin_dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='UrbanSound8k' ) parser.add_argument( '-ofold', '--origin_fold_name', type=str, help='origin fold name', default='fold1') parser.add_argument( '-d', '--dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='ESC50' ) parser.add_argument( '-fold', '--fold_name', type=str, help='destination fold name', default='fold1') parser.add_argument( '-f', '--features', type=str, help='features name (e.g. Spectrogram, MelSpectrogram, Openl3)', default='MelSpectrogram' ) parser.add_argument( '-p', '--path', type=str, help='path to the parameters.json file', default='../' ) parser.add_argument( '-m', '--model', type=str, help='model name (e.g. MLP, SB_CNN, SB_CNN_SED, A_CRNN, VGGish)', default='SB_CNN') parser.add_argument( '-s', '--models_path', type=str, help='path to save the trained model', default='../trained_models' ) args = parser.parse_args() print(__doc__) if args.dataset not in get_available_datasets(): raise AttributeError('Dataset not available') if args.features not in get_available_features(): raise AttributeError('Features not available') if args.model not in get_available_models(): raise AttributeError('Model not available') # Get parameters parameters_file = os.path.join(args.path, 'parameters.json') params = load_json(parameters_file) params_dataset = params['datasets'][args.dataset] params_features = params['features'] params_model = params['models'][args.model] # Load origin model model_path_origin = os.path.join(args.models_path, args.model, args.origin_dataset) model_class = get_available_models()[args.model] metrics = ['accuracy'] if args.dataset in sed_datasets: metrics = ['sed'] model_container = model_class( model=None, model_path=model_path_origin, metrics=metrics ) model_container.load_model_weights( os.path.join(model_path_origin, args.origin_fold_name)) kwargs = {} if args.dataset in sed_datasets: kwargs = {'sequence_hop_time': params_features['sequence_hop_time']} # Get and init dataset class dataset_class = get_available_datasets()[args.dataset] dataset_path = os.path.join(args.path, params_dataset['dataset_path']) dataset = dataset_class(dataset_path, **kwargs) if args.fold_name not in dataset.fold_list: raise AttributeError('Fold not available') # Get and init feature class features_class = get_available_features()[args.features] features = features_class( sequence_time=params_features['sequence_time'], sequence_hop_time=params_features['sequence_hop_time'], audio_win=params_features['audio_win'], audio_hop=params_features['audio_hop'], sr=params_features['sr'], **params_features[args.features] ) print('Features shape: ', features.get_shape()) # Check if features were extracted if not features.check_if_extracted(dataset): print('Extracting features ...') features.extract(dataset) print('Done!') use_validate_set = True if args.dataset in ['TUTSoundEvents2017', 'ESC50', 'ESC10']: # When have less data, don't use validation set. use_validate_set = False folds_train, folds_val, _ = evaluation_setup( args.fold_name, dataset.fold_list, params_dataset['evaluation_mode'], use_validate_set=use_validate_set ) data_gen_train = DataGenerator( dataset, features, folds=folds_train, batch_size=params['train']['batch_size'], shuffle=True, train=True, scaler=None ) scaler = Scaler(normalizer=params_model['normalizer']) print('Fitting features ...') scaler.fit(data_gen_train) print('Done!') data_gen_train.set_scaler(scaler) data_gen_val = DataGenerator( dataset, features, folds=folds_val, batch_size=params['train']['batch_size'], shuffle=False, train=False, scaler=scaler ) # Fine-tune model n_classes = len(dataset.label_list) layer_where_to_cut = -2 model_container.fine_tuning(layer_where_to_cut, new_number_of_classes=n_classes, new_activation='sigmoid', freeze_source_model=True) model_container.model.summary() # Set paths model_folder = os.path.join( args.models_path, args.model, args.origin_dataset+'_ft_'+args.dataset) exp_folder = os.path.join(model_folder, args.fold_name) mkdir_if_not_exists(exp_folder, parents=True) # Save model json and scaler model_container.save_model_json(model_folder) save_pickle(scaler, os.path.join(exp_folder, 'scaler.pickle')) # Train model model_container.train( data_gen_train, data_gen_val, label_list=dataset.label_list, weights_path=exp_folder, sequence_time_sec=params_features['sequence_hop_time'], **params['train'])
def start_training(status, fold_ix, normalizer, model_path, epochs, early_stopping, optimizer_ix, learning_rate, batch_size, considered_improvement, n_clicks_train, dataset_ix): global data_generator_train global data_generator_val if status == 'TRAINING': if fold_ix is None: return [True, 'Please select a Fold', 'danger', ""] if optimizer_ix is None: return [True, 'Please select an Optimizer', 'danger', ""] dataset_name = options_datasets[dataset_ix]['label'] fold_name = dataset.fold_list[fold_ix] params_dataset = params['datasets'][dataset_name] optimizer = options_optimizers[optimizer_ix]['label'] use_validate_set = True if dataset_name in ['TUTSoundEvents2017', 'ESC50', 'ESC10']: # When have less data, don't use validation set. use_validate_set = False folds_train, folds_val, _ = evaluation_setup( fold_name, dataset.fold_list, params_dataset['evaluation_mode'], use_validate_set=use_validate_set) data_generator_train = DataGenerator( dataset, feature_extractor, folds=folds_train, batch_size=params['train']['batch_size'], shuffle=True, train=True, scaler=None) scaler = Scaler(normalizer=normalizer) print('Fitting scaler ...') scaler.fit(data_generator_train) print('Done!') # Pass scaler to data_gen_train to be used when data # loading data_generator_train.set_scaler(scaler) data_generator_val = DataGenerator(dataset, feature_extractor, folds=folds_val, batch_size=batch_size, shuffle=False, train=False, scaler=scaler) exp_folder_fold = conv_path(os.path.join(model_path, fold_name)) mkdir_if_not_exists(exp_folder_fold, parents=True) scaler_path = os.path.join(exp_folder_fold, 'scaler.pickle') save_pickle(scaler, scaler_path) train_arguments = { 'epochs': epochs, 'early_stopping': early_stopping, 'optimizer': optimizer, 'learning_rate': learning_rate, 'batch_size': batch_size, 'considered_improvement': considered_improvement } with graph.as_default(): model_container.train(data_generator_train, data_generator_val, weights_path=exp_folder_fold, label_list=dataset.label_list, **train_arguments) model_container.load_model_weights(exp_folder_fold) return [True, "Model trained", 'success', 'True'] else: raise dash.exceptions.PreventUpdate