def main(): base_folder, return_list, test, timer, train, val = init_test() prefix = 'baseline_' for repetition in range(7): # hyper_params = {'model_size': [1024, 768, 512, 384, 256, 128, 64, ]} # config = Config() # config.training.model_type = 'baseline' # config.training.cnn_repetition = 0 # config.training.lstm_repetition = 1 # config.training.dense_repetition = 0 # config.training.dropout = 0 # config.training.initial_learning_rate = 0.001 # config.training.batch_size = 128 # config.training.label_smoothing = 0 # config.training.mixup_alpha = 0 # config.training.l2_regularization = 0 # config.training.x_groups = [['prev_word_id'], ] # config.training.y_groups = [['word_id'], ] # eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = {'mixup_alpha': [0.0, 0.25, 0.5, 0.75]} config = Config() config.training.model_type = ModelType.BASELINE config.training.cnn_repetition = 0 config.training.lstm_repetition = 1 config.training.dense_repetition = 0 config.training.dropout = 0 config.training.initial_learning_rate = 0.001 config.training.model_size = 384 config.training.batch_size = 128 config.training.label_smoothing = 0 config.training.l2_regularization = 0 config.training.x_groups = [ ['prev_word_id'], ] config.training.y_groups = [ ['word_id'], ] eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) pass
def main(): base_folder, return_list, test, timer, train, val = init_test() prefix = 'ddc_' for repetition in range(7): hyper_params = { 'model_size': [ 1024, 768, 512, 384, 256, 128, 64, ] } config = Config() config.training.model_type = 'dcc' config.training.cnn_repetition = 0 config.training.lstm_repetition = 2 config.training.dense_repetition = 0 config.training.dropout = 0.5 config.training.initial_learning_rate = 0.001 config.training.batch_size = 64 config.training.label_smoothing = 0 config.training.mixup_alpha = 0 config.training.l2_regularization = 0 config.training.x_groups = [['prev_word_id'], [ 'prev', 'next', ]] config.training.y_groups = [ ['word_id'], ] eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) pass
def main(): base_folder, return_list, test, timer, train, val = init_test() prefix = '' test_name = 'best_model_comparison2' print('Running best model comparison') csv_file = base_folder / 'temp' / f'{prefix}{test_name}.csv' for repetition in range(7): config = Config() configuration_name = ModelType.BASELINE config.training.model_type = 'baseline' config.training.cnn_repetition = 0 config.training.lstm_repetition = 1 config.training.dense_repetition = 0 config.training.model_size = 384 config.training.dropout = 0 config.training.initial_learning_rate = 0.001 config.training.batch_size = 128 config.training.label_smoothing = 0 config.training.mixup_alpha = 0 config.training.l2_regularization = 0 config.training.x_groups = [ ['prev_word_id'], ] config.training.y_groups = [ ['word_id'], ] hp = None eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config = Config() configuration_name = ModelType.DDC config.training.model_type = 'ddc' config.training.cnn_repetition = 0 config.training.lstm_repetition = 2 config.training.dense_repetition = 0 config.training.model_size = 512 config.training.dropout = 0.5 config.training.initial_learning_rate = 0.001 config.training.batch_size = 64 config.training.label_smoothing = 0 config.training.mixup_alpha = 0 config.training.l2_regularization = 0 config.training.x_groups = [['prev_word_id'], [ 'prev', 'next', ]] config.training.y_groups = [ ['word_id'], ] hp = None eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config = Config() configuration_name = 'Custom vec+id:id' config.training.model_type = ModelType.CUSTOM config.training.cnn_repetition = 2 config.training.lstm_repetition = 2 config.training.dense_repetition = 2 config.training.model_size = 512 config.training.dropout = 0.4 config.training.initial_learning_rate = 1e-2 config.training.batch_size = 128 config.training.label_smoothing = 0.5 config.training.mixup_alpha = 0.5 config.training.l2_regularization = 0 config.training.x_groups = [['prev_word_id', 'prev_word_vec'], DatasetConfig().categorical, DatasetConfig().audio, DatasetConfig().regression] config.training.y_groups = [ ['word_id'], ] hp = None eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config = Config() configuration_name = 'Custom vec+id:vec' config.training.model_type = ModelType.CUSTOM config.training.cnn_repetition = 2 config.training.lstm_repetition = 2 config.training.dense_repetition = 0 config.training.model_size = 512 config.training.dropout = 0.4 config.training.initial_learning_rate = 1e-2 config.training.batch_size = 128 config.training.label_smoothing = 0.5 config.training.mixup_alpha = 0.5 config.training.l2_regularization = 0 config.training.x_groups = [['prev_word_id', 'prev_word_vec'], DatasetConfig().categorical, DatasetConfig().audio, DatasetConfig().regression] config.training.y_groups = [ ['word_vec'], ] hp = None eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config = Config() configuration_name = 'MLSTM' config.training.model_type = ModelType.TUNE_MLSTM config.training.batch_size = 128 config.training.label_smoothing = 0.5 config.training.mixup_alpha = 0.5 config.training.x_groups = [['prev_word_id', 'prev_word_vec'], DatasetConfig().categorical, DatasetConfig().audio, DatasetConfig().regression] config.training.y_groups = [ ['word_id'], ] hp = kt.HyperParameters() fixed_params = { 'connections_0': 2, 'connections_1': 2, 'connections_2': 2, 'connections_3': 3, 'connections_4': 1, 'connections_5': 3, 'connections_6': 2, 'depth_0': 18, 'depth_1': 23, 'depth_2': 43, 'depth_3': 13, 'depth_4': 52, 'depth_5': 5, 'depth_6': 11, 'dropout_0': 0.25612932926324405, 'dropout_1': 0.1620424523625309, 'dropout_2': 0.4720468723284278, 'dropout_3': 0.43881829788147036, 'dropout_4': 0.44741780640383355, 'dropout_5': 0.3327191857714107, 'dropout_6': 0.1367707920005909, 'initial_learning_rate': 0.008, 'label_smoothing': 0.13716631669361445, 'lstm_layers': 3, 'width_0': 16, 'width_1': 9, 'width_2': 15, 'width_3': 16, 'width_4': 5, 'width_5': 11, 'width_6': 4, } for param, value in fixed_params.items(): hp.Fixed(param, value=value) eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) pass
def main(): base_folder, return_list, test, timer, train, val = init_test() prefix = 'custom_' for repetition in range(7): hyper_params = { 'mixup_alpha': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 1.25] } config = Config() config.training.label_smoothing = 0 eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = { 'label_smoothing': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] } config = Config() config.training.mixup_alpha = 0 eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = { 'label_smoothing': [ 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, ], 'mixup_alpha': [ 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, ] } config = Config() eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = {'batch_size': [1024, 512, 256, 128, 64, 32]} config = Config() config.training.mixup_alpha = 0.75 config.training.label_smoothing = 0 eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = { 'x_groups': [ # Without previous beat [ DatasetConfig.categorical, DatasetConfig.audio, DatasetConfig.regression ], # Without ActionVec information [['prev_word_id'], DatasetConfig.categorical, DatasetConfig.audio, DatasetConfig.regression], [ ['prev_word_id'], DatasetConfig.categorical, DatasetConfig.audio, ], [['prev_word_id'], DatasetConfig.categorical, DatasetConfig.regression], [['prev_word_id'], DatasetConfig.audio, DatasetConfig.regression], [ ['prev_word_id'], ], # Without one data stream [ ['prev_word_vec'], ], [['prev_word_vec'], DatasetConfig.categorical, DatasetConfig.audio, DatasetConfig.regression], [ ['prev_word_vec'], DatasetConfig.categorical, DatasetConfig.audio, ], [['prev_word_vec'], DatasetConfig.categorical, DatasetConfig.regression], [['prev_word_vec'], DatasetConfig.audio, DatasetConfig.regression], # Give it redundant inputs [['prev_word_vec', 'prev_word_id'], DatasetConfig.categorical, DatasetConfig.audio, DatasetConfig.regression], [['prev_word_vec', 'prev_word_id'], DatasetConfig.beat_elements_previous_prediction, DatasetConfig.categorical, DatasetConfig.audio, DatasetConfig.regression], [[ 'prev_word_vec', ], DatasetConfig.beat_elements_previous_prediction, DatasetConfig.categorical, DatasetConfig.audio, DatasetConfig.regression], ] } config = Config() config.training.mixup_alpha = 0.5 config.training.label_smoothing = 0.5 eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = { 'model_size': [ 1024, 768, 512, 384, 256, 128, 64, ] } config = Config() config.training.mixup_alpha = 0.5 config.training.label_smoothing = 0.5 eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = { 'dropout': [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ] } config = Config() config.training.mixup_alpha = 0.5 config.training.label_smoothing = 0.5 eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = {'cnn_repetition': range(5)} config = Config() eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = {'lstm_repetition': range(5)} config = Config() eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) hyper_params = {'dense_repetition': range(5)} config = Config() eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix) pass
def main(): base_folder, return_list, test, timer, train, val = init_test() prefix = 'hyper_' config = Config() model_path = base_folder / 'temp' find_model = True train_model = True eval_model = True if find_model: for model_type in [ModelType.TUNE_CLSTM, ModelType.TUNE_MLSTM]: train_seq = BeatmapSequence(df=train, is_train=True, config=config) val_seq = BeatmapSequence(df=val, is_train=False, config=config) test_seq = BeatmapSequence(df=test, is_train=False, config=config) # To search for a specific input:output combination, change `config` config.training.model_type = model_type tuner = kt.Hyperband( get_architecture_fn(config)(train_seq, False, config), objective=kt.Objective('val_avs_dist', direction='min'), hyperband_iterations=1, max_epochs=100, factor=4, directory=base_folder / 'temp' / 'hyper_search', project_name=f'{get_architecture_fn(config).__qualname__}', overwrite=False, # CAUTION! ) tuner.search_space_summary() callbacks = create_callbacks(train_seq, config) tuner.search( x=train_seq, validation_data=val_seq, callbacks=callbacks, epochs=60, verbose=2, workers=10, max_queue_size=16, use_multiprocessing=False, ) print(tuner.results_summary()) print(tuner.get_best_models(2)[0].summary()) print(tuner.get_best_models(2)[0].evaluate(test_seq)) if train_model: # Train specific huperparameters hp = kt.HyperParameters() fixed_params = { 'connections_0': 2, 'connections_1': 2, 'connections_2': 2, 'connections_3': 3, 'connections_4': 1, 'connections_5': 3, 'connections_6': 2, 'depth_0': 18, 'depth_1': 23, 'depth_2': 43, 'depth_3': 13, 'depth_4': 52, 'depth_5': 5, 'depth_6': 11, 'dropout_0': 0.25612932926324405, 'dropout_1': 0.1620424523625309, 'dropout_2': 0.4720468723284278, 'dropout_3': 0.43881829788147036, 'dropout_4': 0.44741780640383355, 'dropout_5': 0.3327191857714107, 'dropout_6': 0.1367707920005909, 'initial_learning_rate': 0.008, 'label_smoothing': 0.13716631669361445, 'lstm_layers': 3, 'width_0': 16, 'width_1': 9, 'width_2': 15, 'width_3': 16, 'width_4': 5, 'width_5': 11, 'width_6': 4, } for param, val in fixed_params.items(): hp.Fixed(param, value=val) model = get_architecture_fn(config)(train_seq, False, config)(hp) model.summary() tf.keras.utils.plot_model(model, to_file=base_folder / 'temp' / 'model_architecture.png', show_shapes=True) model.fit( x=train_seq, validation_data=val_seq, callbacks=callbacks, epochs=81, verbose=2, workers=10, max_queue_size=16, use_multiprocessing=False, ) model_path.mkdir(parents=True, exist_ok=True) save_model(model, model_path, train_seq, config, hp=hp) timer('Saved model', 5) if eval_model: stateful_model = tf.keras.models.load_model( model_path / 'stateful_model.keras', custom_objects={'Perplexity': Perplexity}) timer('Loaded stateful model', 5) input_folder = base_folder / 'human_beatmaps' / 'new_dataformat' output_folder = base_folder / 'testing' / 'generated_songs' song_codes_to_gen = list( x for x in test.index.to_frame()["name"].unique()[:5]) song_codes_to_gen = [ '133b', ] print(song_codes_to_gen) for song_code in song_codes_to_gen: beatmap_folder = input_folder / song_code print(beatmap_folder) generate_complete_beatmaps(beatmap_folder, output_folder, stateful_model, config) timer('Generated beatmaps', 5)
def main(): base_folder, return_list, test, timer, train, val = init_test() seed = 43 # random, non-fine tuned seed tf.random.set_seed(seed) np.random.seed(seed) random.seed(seed) config = Config() config.dataset.storage_folder = base_folder / 'new_datasets' config.audio_processing.use_cache = True model_path = base_folder / 'temp' model_path.mkdir(parents=True, exist_ok=True) # The best found temperature is ~0.91 configuration_name = 'MLSTM' config.training.model_type = ModelType.TUNE_MLSTM config.training.batch_size = 128 config.training.label_smoothing = 0.5 config.training.mixup_alpha = 0.5 config.training.x_groups = [['prev_word_id', 'prev_word_vec'], DatasetConfig().categorical, DatasetConfig().audio, DatasetConfig().regression] config.training.y_groups = [ ['word_id'], ] hp = kt.HyperParameters() fixed_params = { 'connections_0': 2, 'connections_1': 2, 'connections_2': 2, 'connections_3': 3, 'connections_4': 1, 'connections_5': 3, 'connections_6': 2, 'depth_0': 18, 'depth_1': 23, 'depth_2': 43, 'depth_3': 13, 'depth_4': 52, 'depth_5': 5, 'depth_6': 11, 'dropout_0': 0.25612932926324405, 'dropout_1': 0.1620424523625309, 'dropout_2': 0.4720468723284278, 'dropout_3': 0.43881829788147036, 'dropout_4': 0.44741780640383355, 'dropout_5': 0.3327191857714107, 'dropout_6': 0.1367707920005909, 'initial_learning_rate': 0.008, 'label_smoothing': 0.13716631669361445, 'lstm_layers': 3, 'width_0': 16, 'width_1': 9, 'width_2': 15, 'width_3': 16, 'width_4': 5, 'width_5': 11, 'width_6': 4, } for param, value in fixed_params.items(): hp.Fixed(param, value=value) find_temperature_and_generate(base_folder, train, val, test, model_path, configuration_name, deepcopy(config), hp) # The best found temperature is ~0.71 configuration_name = 'vec:id' config.training.model_type = ModelType.CUSTOM config.training.cnn_repetition = 2 config.training.lstm_repetition = 2 config.training.dense_repetition = 0 config.training.model_size = 512 config.training.dropout = 0.4 config.training.initial_learning_rate = 1e-2 config.training.batch_size = 128 config.training.label_smoothing = 0.5 config.training.mixup_alpha = 0.5 config.training.l2_regularization = 0 config.training.x_groups = [[ 'prev_word_id', ], DatasetConfig().categorical, DatasetConfig().audio, DatasetConfig().regression] config.training.y_groups = [ ['word_id'], ] find_temperature_and_generate(base_folder, train, val, test, model_path, configuration_name, deepcopy(config)) # The best found temperature is ~0.147 configuration_name = 'vec:vec' config.training.model_type = ModelType.CUSTOM config.training.cnn_repetition = 2 config.training.lstm_repetition = 2 config.training.dense_repetition = 0 config.training.model_size = 512 config.training.dropout = 0.4 config.training.initial_learning_rate = 1e-2 config.training.batch_size = 128 config.training.label_smoothing = 0.5 config.training.mixup_alpha = 0.5 config.training.l2_regularization = 0 config.training.x_groups = [[ 'prev_word_vec', ], DatasetConfig().categorical, DatasetConfig().audio, DatasetConfig().regression] config.training.y_groups = [ ['word_vec'], ] find_temperature_and_generate(base_folder, train, val, test, model_path, configuration_name, deepcopy(config))
def mainly_vec(): base_folder, return_list, test, timer, train, val = init_test() prefix = 'vec' test_name = 'information_comparison' print('Running information comparison') csv_file = base_folder / 'temp' / f'{prefix}{test_name}.csv' dataset = DatasetConfig() ALL = [ dataset.categorical + dataset.audio + dataset.regression, ] # Common configuration config = Config() config.training.model_type = ModelType.CUSTOM config.training.cnn_repetition = 0 config.training.lstm_repetition = 2 config.training.dense_repetition = 0 config.training.model_size = 512 config.training.dropout = 0.4 config.training.initial_learning_rate = 1e-2 config.training.batch_size = 128 config.training.label_smoothing = 0.5 config.training.mixup_alpha = 0.5 config.training.l2_regularization = 0 hp = None config.dataset.beat_maps_folder = config.dataset.beat_maps_folder.parent / 'test_new_dataformat' config.dataset.storage_folder = base_folder / 'test_new_datasets' song_folders = create_song_list(config.dataset.beat_maps_folder) # First generate all data using all of the audio features config.audio_processing.use_temp_derrivatives = True config.audio_processing.time_shift = -0.4 dataset_stats(train) for repetition in range(7): config.training.x_groups = ALL config.training.y_groups = [ ['word_vec'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ] config.training.y_groups = [ ['word_id'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ['mfcc'], ] config.training.y_groups = [ ['word_id'], ] configuration_name = f"X: {[['prev_word_id'], ['MFCC', 'dMFCC']]}\nY: {config.training.y_groups}" eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ['prev', 'next'], ] config.training.y_groups = [ ['word_id'], ] configuration_name = f"X: {config.training.x_groups}\nY: {config.training.y_groups}" eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ['part'], ] config.training.y_groups = [ ['word_id'], ] configuration_name = f"X: {config.training.x_groups}\nY: {config.training.y_groups}" eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ] + ALL config.training.y_groups = [ ['word_id'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec', 'prev_word_id'], ] + ALL config.training.y_groups = [ ['word_id'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec', 'prev_word_id'], ] + ALL config.training.y_groups = [ ['word_vec', 'word_id'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) pass