def init_test(): timer = Timer() seed = 43 # random, non-fine tuned seed tf.random.set_seed(seed) np.random.seed(seed) random.seed(seed) config = Config() base_folder = config.base_data_folder song_folders = create_song_list(config.dataset.beat_maps_folder) total = len(song_folders) print(f'Found {total} folders') config.dataset.storage_folder = base_folder / 'new_datasets' # config.dataset.storage_folder = base_folder / 'test_datasets' # only 100 songs, for testing # config.audio_processing.use_cache = False # generate_datasets(song_folders, config, prefix) train, val, test = load_datasets(config) timer('Loaded datasets', 0) # Ensure this song is excluded from the training data for hand tasting train.drop(index='133b', inplace=True, errors='ignore') train.drop(index='Daddy - PSY', inplace=True, errors='ignore') # dataset_stats(train) manager = multiprocessing.Manager() return_list = manager.list() return base_folder, return_list, test, timer, train, val
def velocities_from_config(config: Config): song_folders = create_song_list(config.dataset.beat_maps_folder) df = songs2dataset(song_folders, config) print(df.head(2)) df_vec = get_vec_df(df) generated_velocities_ = compute_multiple_velocities(df_vec) return generated_velocities_
def main(): timer = Timer() seed = 43 # random, non-fine tuned seed np.random.seed(seed) random.seed(seed) config = Config() base_folder = config.base_data_folder # To generate full dataset song_folders = create_song_list(config.dataset.beat_maps_folder) # config.dataset.storage_folder = base_folder / 'old_datasets' config.dataset.storage_folder = base_folder / 'new_datasets' # To generate test dataset # song_folders = create_song_list(config.dataset.beat_maps_folder)[:100] # config.dataset.storage_folder = base_folder / 'test_datasets' config.audio_processing.use_cache = False # The audio features need to be computed the first time config.use_multiprocessing = True # since TF is not imported total = len(song_folders) print(f'Found {total} folders') generate_datasets(song_folders, config) # Test loading datasets train, val, test = load_datasets(config) timer('Loaded datasets', 5)
def main(): timer = Timer() seed = 43 # random, non-fine tuned seed tf.random.set_seed(seed) np.random.seed(seed) random.seed(seed) config = Config() base_folder = config.base_data_folder # Use full dataset song_folders = create_song_list(config.dataset.beat_maps_folder) # config.dataset.storage_folder = base_folder / 'old_datasets' config.dataset.storage_folder = base_folder / 'new_datasets' # Use test set # song_folders = create_song_list(config.dataset.beat_maps_folder)[:100] # config.dataset.storage_folder = base_folder / 'test_datasets' config.audio_processing.use_cache = True total = len(song_folders) print(f'Found {total} folders') generate_datasets(song_folders, config) train, val, test = load_datasets(config) timer('Loaded datasets', 5) # Ensure this song is excluded from the training data for hand tasting train.drop(index='133b', inplace=True, errors='ignore') train.drop(index='Daddy - PSY', inplace=True, errors='ignore') dataset_stats(train) train_seq = BeatmapSequence(df=train, is_train=True, config=config) val_seq = BeatmapSequence(df=val, is_train=False, config=config) test_seq = BeatmapSequence(df=test, is_train=False, config=config) timer('Generated sequences', 5) # del train, val, test # delete the data if experiencing RAM problems gc.collect() # keras.mixed_precision.experimental.set_policy('mixed_float16') # Undefined behavior with advanced models model_path = base_folder / 'temp' model_path.mkdir(parents=True, exist_ok=True) train = True if train: model = get_architecture_fn(config)(train_seq, False, config) model.summary() callbacks = create_callbacks(train_seq, config) model.fit( train_seq, validation_data=val_seq, callbacks=callbacks, epochs=400, verbose=2, workers=10, max_queue_size=16, use_multiprocessing=False, ) timer('Trained model', 5) model.evaluate(test_seq) timer('Evaluated model', 5) save_model(model, model_path, train_seq, config) timer('Saved model', 5) stateful_model = keras.models.load_model(model_path / 'stateful_model.keras', custom_objects={ 'Perplexity': Perplexity, 'mish': tfa.activations.mish }) stateful_model.summary() timer('Loaded stateful model', 5) # Use generated action placement input_folder = base_folder / 'evaluation_dataset' / 'beat_sage' input_folder = base_folder / 'evaluation_dataset' / 'oxai_deepsaber_expert' output_folder = base_folder / 'testing' / 'generated_songs' dirs = [x for x in input_folder.glob('*/') if x.is_dir()] # Use human action placement from test set # input_folder = base_folder / 'human_beatmaps' / 'new_dataformat' # dirs = list(x for x in test.index.to_frame()["name"].unique()[:13]) for song_code in dirs: beatmap_folder = input_folder / song_code print(f'Working on {beatmap_folder.name}') generate_complete_beatmaps(beatmap_folder, output_folder, stateful_model, config) timer('Generated beatmaps', 5)
def mainly_vec(): base_folder, return_list, test, timer, train, val = init_test() prefix = 'vec' test_name = 'information_comparison' print('Running information comparison') csv_file = base_folder / 'temp' / f'{prefix}{test_name}.csv' dataset = DatasetConfig() ALL = [ dataset.categorical + dataset.audio + dataset.regression, ] # Common configuration config = Config() config.training.model_type = ModelType.CUSTOM config.training.cnn_repetition = 0 config.training.lstm_repetition = 2 config.training.dense_repetition = 0 config.training.model_size = 512 config.training.dropout = 0.4 config.training.initial_learning_rate = 1e-2 config.training.batch_size = 128 config.training.label_smoothing = 0.5 config.training.mixup_alpha = 0.5 config.training.l2_regularization = 0 hp = None config.dataset.beat_maps_folder = config.dataset.beat_maps_folder.parent / 'test_new_dataformat' config.dataset.storage_folder = base_folder / 'test_new_datasets' song_folders = create_song_list(config.dataset.beat_maps_folder) # First generate all data using all of the audio features config.audio_processing.use_temp_derrivatives = True config.audio_processing.time_shift = -0.4 dataset_stats(train) for repetition in range(7): config.training.x_groups = ALL config.training.y_groups = [ ['word_vec'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ] config.training.y_groups = [ ['word_id'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ['mfcc'], ] config.training.y_groups = [ ['word_id'], ] configuration_name = f"X: {[['prev_word_id'], ['MFCC', 'dMFCC']]}\nY: {config.training.y_groups}" eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ['prev', 'next'], ] config.training.y_groups = [ ['word_id'], ] configuration_name = f"X: {config.training.x_groups}\nY: {config.training.y_groups}" eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ['part'], ] config.training.y_groups = [ ['word_id'], ] configuration_name = f"X: {config.training.x_groups}\nY: {config.training.y_groups}" eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec'], ] + ALL config.training.y_groups = [ ['word_id'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec', 'prev_word_id'], ] + ALL config.training.y_groups = [ ['word_id'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) config.training.x_groups = [ ['prev_word_vec', 'prev_word_id'], ] + ALL config.training.y_groups = [ ['word_vec', 'word_id'], ] configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}' eval_config(csv_file, timer, return_list, train, val, test, config, test_name, configuration_name, hp) pass