예제 #1
0
def main():
    base_folder, return_list, test, timer, train, val = init_test()
    prefix = 'baseline_'

    for repetition in range(7):
        # hyper_params = {'model_size': [1024, 768, 512, 384, 256, 128, 64, ]}
        # config = Config()
        # config.training.model_type = 'baseline'
        # config.training.cnn_repetition = 0
        # config.training.lstm_repetition = 1
        # config.training.dense_repetition = 0
        # config.training.dropout = 0
        # config.training.initial_learning_rate = 0.001
        # config.training.batch_size = 128
        # config.training.label_smoothing = 0
        # config.training.mixup_alpha = 0
        # config.training.l2_regularization = 0
        # config.training.x_groups = [['prev_word_id'], ]
        # config.training.y_groups = [['word_id'], ]
        # eval_hyperparams(base_folder, timer, hyper_params, return_list, train, val, test, config, prefix)

        hyper_params = {'mixup_alpha': [0.0, 0.25, 0.5, 0.75]}
        config = Config()
        config.training.model_type = ModelType.BASELINE
        config.training.cnn_repetition = 0
        config.training.lstm_repetition = 1
        config.training.dense_repetition = 0
        config.training.dropout = 0
        config.training.initial_learning_rate = 0.001
        config.training.model_size = 384
        config.training.batch_size = 128
        config.training.label_smoothing = 0
        config.training.l2_regularization = 0
        config.training.x_groups = [
            ['prev_word_id'],
        ]
        config.training.y_groups = [
            ['word_id'],
        ]
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)
        pass
예제 #2
0
def main():
    base_folder, return_list, test, timer, train, val = init_test()
    prefix = 'ddc_'

    for repetition in range(7):
        hyper_params = {
            'model_size': [
                1024,
                768,
                512,
                384,
                256,
                128,
                64,
            ]
        }
        config = Config()
        config.training.model_type = 'dcc'
        config.training.cnn_repetition = 0
        config.training.lstm_repetition = 2
        config.training.dense_repetition = 0
        config.training.dropout = 0.5
        config.training.initial_learning_rate = 0.001
        config.training.batch_size = 64
        config.training.label_smoothing = 0
        config.training.mixup_alpha = 0
        config.training.l2_regularization = 0
        config.training.x_groups = [['prev_word_id'], [
            'prev',
            'next',
        ]]
        config.training.y_groups = [
            ['word_id'],
        ]
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)
        pass
예제 #3
0
def main():
    base_folder, return_list, test, timer, train, val = init_test()
    prefix = ''
    test_name = 'best_model_comparison2'
    print('Running best model comparison')

    csv_file = base_folder / 'temp' / f'{prefix}{test_name}.csv'

    for repetition in range(7):
        config = Config()
        configuration_name = ModelType.BASELINE
        config.training.model_type = 'baseline'
        config.training.cnn_repetition = 0
        config.training.lstm_repetition = 1
        config.training.dense_repetition = 0
        config.training.model_size = 384
        config.training.dropout = 0
        config.training.initial_learning_rate = 0.001
        config.training.batch_size = 128
        config.training.label_smoothing = 0
        config.training.mixup_alpha = 0
        config.training.l2_regularization = 0
        config.training.x_groups = [
            ['prev_word_id'],
        ]
        config.training.y_groups = [
            ['word_id'],
        ]
        hp = None

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config = Config()
        configuration_name = ModelType.DDC
        config.training.model_type = 'ddc'
        config.training.cnn_repetition = 0
        config.training.lstm_repetition = 2
        config.training.dense_repetition = 0
        config.training.model_size = 512
        config.training.dropout = 0.5
        config.training.initial_learning_rate = 0.001
        config.training.batch_size = 64
        config.training.label_smoothing = 0
        config.training.mixup_alpha = 0
        config.training.l2_regularization = 0
        config.training.x_groups = [['prev_word_id'], [
            'prev',
            'next',
        ]]
        config.training.y_groups = [
            ['word_id'],
        ]
        hp = None

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config = Config()
        configuration_name = 'Custom vec+id:id'
        config.training.model_type = ModelType.CUSTOM
        config.training.cnn_repetition = 2
        config.training.lstm_repetition = 2
        config.training.dense_repetition = 2
        config.training.model_size = 512
        config.training.dropout = 0.4
        config.training.initial_learning_rate = 1e-2
        config.training.batch_size = 128
        config.training.label_smoothing = 0.5
        config.training.mixup_alpha = 0.5
        config.training.l2_regularization = 0
        config.training.x_groups = [['prev_word_id', 'prev_word_vec'],
                                    DatasetConfig().categorical,
                                    DatasetConfig().audio,
                                    DatasetConfig().regression]
        config.training.y_groups = [
            ['word_id'],
        ]
        hp = None

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config = Config()
        configuration_name = 'Custom vec+id:vec'
        config.training.model_type = ModelType.CUSTOM
        config.training.cnn_repetition = 2
        config.training.lstm_repetition = 2
        config.training.dense_repetition = 0
        config.training.model_size = 512
        config.training.dropout = 0.4
        config.training.initial_learning_rate = 1e-2
        config.training.batch_size = 128
        config.training.label_smoothing = 0.5
        config.training.mixup_alpha = 0.5
        config.training.l2_regularization = 0
        config.training.x_groups = [['prev_word_id', 'prev_word_vec'],
                                    DatasetConfig().categorical,
                                    DatasetConfig().audio,
                                    DatasetConfig().regression]
        config.training.y_groups = [
            ['word_vec'],
        ]
        hp = None

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config = Config()
        configuration_name = 'MLSTM'
        config.training.model_type = ModelType.TUNE_MLSTM
        config.training.batch_size = 128
        config.training.label_smoothing = 0.5
        config.training.mixup_alpha = 0.5
        config.training.x_groups = [['prev_word_id', 'prev_word_vec'],
                                    DatasetConfig().categorical,
                                    DatasetConfig().audio,
                                    DatasetConfig().regression]
        config.training.y_groups = [
            ['word_id'],
        ]
        hp = kt.HyperParameters()
        fixed_params = {
            'connections_0': 2,
            'connections_1': 2,
            'connections_2': 2,
            'connections_3': 3,
            'connections_4': 1,
            'connections_5': 3,
            'connections_6': 2,
            'depth_0': 18,
            'depth_1': 23,
            'depth_2': 43,
            'depth_3': 13,
            'depth_4': 52,
            'depth_5': 5,
            'depth_6': 11,
            'dropout_0': 0.25612932926324405,
            'dropout_1': 0.1620424523625309,
            'dropout_2': 0.4720468723284278,
            'dropout_3': 0.43881829788147036,
            'dropout_4': 0.44741780640383355,
            'dropout_5': 0.3327191857714107,
            'dropout_6': 0.1367707920005909,
            'initial_learning_rate': 0.008,
            'label_smoothing': 0.13716631669361445,
            'lstm_layers': 3,
            'width_0': 16,
            'width_1': 9,
            'width_2': 15,
            'width_3': 16,
            'width_4': 5,
            'width_5': 11,
            'width_6': 4,
        }
        for param, value in fixed_params.items():
            hp.Fixed(param, value=value)

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        pass
예제 #4
0
def main():
    base_folder, return_list, test, timer, train, val = init_test()
    prefix = 'custom_'

    for repetition in range(7):
        hyper_params = {
            'mixup_alpha': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 1.25]
        }
        config = Config()
        config.training.label_smoothing = 0
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {
            'label_smoothing':
            [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
        }
        config = Config()
        config.training.mixup_alpha = 0
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {
            'label_smoothing': [
                0,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
                0.75,
            ],
            'mixup_alpha': [
                0,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
                0.75,
            ]
        }
        config = Config()
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {'batch_size': [1024, 512, 256, 128, 64, 32]}
        config = Config()
        config.training.mixup_alpha = 0.75
        config.training.label_smoothing = 0
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {
            'x_groups': [
                # Without previous beat
                [
                    DatasetConfig.categorical, DatasetConfig.audio,
                    DatasetConfig.regression
                ],
                # Without ActionVec information
                [['prev_word_id'], DatasetConfig.categorical,
                 DatasetConfig.audio, DatasetConfig.regression],
                [
                    ['prev_word_id'],
                    DatasetConfig.categorical,
                    DatasetConfig.audio,
                ],
                [['prev_word_id'], DatasetConfig.categorical,
                 DatasetConfig.regression],
                [['prev_word_id'], DatasetConfig.audio,
                 DatasetConfig.regression],
                [
                    ['prev_word_id'],
                ],
                # Without one data stream
                [
                    ['prev_word_vec'],
                ],
                [['prev_word_vec'], DatasetConfig.categorical,
                 DatasetConfig.audio, DatasetConfig.regression],
                [
                    ['prev_word_vec'],
                    DatasetConfig.categorical,
                    DatasetConfig.audio,
                ],
                [['prev_word_vec'], DatasetConfig.categorical,
                 DatasetConfig.regression],
                [['prev_word_vec'], DatasetConfig.audio,
                 DatasetConfig.regression],
                # Give it redundant inputs
                [['prev_word_vec', 'prev_word_id'], DatasetConfig.categorical,
                 DatasetConfig.audio, DatasetConfig.regression],
                [['prev_word_vec', 'prev_word_id'],
                 DatasetConfig.beat_elements_previous_prediction,
                 DatasetConfig.categorical, DatasetConfig.audio,
                 DatasetConfig.regression],
                [[
                    'prev_word_vec',
                ], DatasetConfig.beat_elements_previous_prediction,
                 DatasetConfig.categorical, DatasetConfig.audio,
                 DatasetConfig.regression],
            ]
        }
        config = Config()
        config.training.mixup_alpha = 0.5
        config.training.label_smoothing = 0.5
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {
            'model_size': [
                1024,
                768,
                512,
                384,
                256,
                128,
                64,
            ]
        }
        config = Config()
        config.training.mixup_alpha = 0.5
        config.training.label_smoothing = 0.5
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {
            'dropout': [
                0.0,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
                0.6,
                0.7,
                0.8,
                0.9,
            ]
        }
        config = Config()
        config.training.mixup_alpha = 0.5
        config.training.label_smoothing = 0.5
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {'cnn_repetition': range(5)}
        config = Config()
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {'lstm_repetition': range(5)}
        config = Config()
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)

        hyper_params = {'dense_repetition': range(5)}
        config = Config()
        eval_hyperparams(base_folder, timer, hyper_params, return_list, train,
                         val, test, config, prefix)
        pass
예제 #5
0
def main():
    base_folder, return_list, test, timer, train, val = init_test()
    prefix = 'hyper_'
    config = Config()
    model_path = base_folder / 'temp'

    find_model = True
    train_model = True
    eval_model = True
    if find_model:
        for model_type in [ModelType.TUNE_CLSTM, ModelType.TUNE_MLSTM]:
            train_seq = BeatmapSequence(df=train, is_train=True, config=config)
            val_seq = BeatmapSequence(df=val, is_train=False, config=config)
            test_seq = BeatmapSequence(df=test, is_train=False, config=config)

            # To search for a specific input:output combination, change `config`
            config.training.model_type = model_type

            tuner = kt.Hyperband(
                get_architecture_fn(config)(train_seq, False, config),
                objective=kt.Objective('val_avs_dist', direction='min'),
                hyperband_iterations=1,
                max_epochs=100,
                factor=4,
                directory=base_folder / 'temp' / 'hyper_search',
                project_name=f'{get_architecture_fn(config).__qualname__}',
                overwrite=False,  # CAUTION!
            )
            tuner.search_space_summary()

            callbacks = create_callbacks(train_seq, config)

            tuner.search(
                x=train_seq,
                validation_data=val_seq,
                callbacks=callbacks,
                epochs=60,
                verbose=2,
                workers=10,
                max_queue_size=16,
                use_multiprocessing=False,
            )

            print(tuner.results_summary())
            print(tuner.get_best_models(2)[0].summary())
            print(tuner.get_best_models(2)[0].evaluate(test_seq))

    if train_model:
        # Train specific huperparameters
        hp = kt.HyperParameters()
        fixed_params = {
            'connections_0': 2,
            'connections_1': 2,
            'connections_2': 2,
            'connections_3': 3,
            'connections_4': 1,
            'connections_5': 3,
            'connections_6': 2,
            'depth_0': 18,
            'depth_1': 23,
            'depth_2': 43,
            'depth_3': 13,
            'depth_4': 52,
            'depth_5': 5,
            'depth_6': 11,
            'dropout_0': 0.25612932926324405,
            'dropout_1': 0.1620424523625309,
            'dropout_2': 0.4720468723284278,
            'dropout_3': 0.43881829788147036,
            'dropout_4': 0.44741780640383355,
            'dropout_5': 0.3327191857714107,
            'dropout_6': 0.1367707920005909,
            'initial_learning_rate': 0.008,
            'label_smoothing': 0.13716631669361445,
            'lstm_layers': 3,
            'width_0': 16,
            'width_1': 9,
            'width_2': 15,
            'width_3': 16,
            'width_4': 5,
            'width_5': 11,
            'width_6': 4,
        }
        for param, val in fixed_params.items():
            hp.Fixed(param, value=val)

        model = get_architecture_fn(config)(train_seq, False, config)(hp)
        model.summary()
        tf.keras.utils.plot_model(model,
                                  to_file=base_folder / 'temp' /
                                  'model_architecture.png',
                                  show_shapes=True)
        model.fit(
            x=train_seq,
            validation_data=val_seq,
            callbacks=callbacks,
            epochs=81,
            verbose=2,
            workers=10,
            max_queue_size=16,
            use_multiprocessing=False,
        )

        model_path.mkdir(parents=True, exist_ok=True)

        save_model(model, model_path, train_seq, config, hp=hp)
        timer('Saved model', 5)

    if eval_model:
        stateful_model = tf.keras.models.load_model(
            model_path / 'stateful_model.keras',
            custom_objects={'Perplexity': Perplexity})

        timer('Loaded stateful model', 5)

        input_folder = base_folder / 'human_beatmaps' / 'new_dataformat'
        output_folder = base_folder / 'testing' / 'generated_songs'
        song_codes_to_gen = list(
            x for x in test.index.to_frame()["name"].unique()[:5])
        song_codes_to_gen = [
            '133b',
        ]
        print(song_codes_to_gen)
        for song_code in song_codes_to_gen:
            beatmap_folder = input_folder / song_code
            print(beatmap_folder)
            generate_complete_beatmaps(beatmap_folder, output_folder,
                                       stateful_model, config)
            timer('Generated beatmaps', 5)
예제 #6
0
def main():
    base_folder, return_list, test, timer, train, val = init_test()

    seed = 43  # random, non-fine tuned seed
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    config = Config()

    config.dataset.storage_folder = base_folder / 'new_datasets'
    config.audio_processing.use_cache = True

    model_path = base_folder / 'temp'
    model_path.mkdir(parents=True, exist_ok=True)

    # The best found temperature is ~0.91
    configuration_name = 'MLSTM'
    config.training.model_type = ModelType.TUNE_MLSTM
    config.training.batch_size = 128
    config.training.label_smoothing = 0.5
    config.training.mixup_alpha = 0.5
    config.training.x_groups = [['prev_word_id', 'prev_word_vec'],
                                DatasetConfig().categorical,
                                DatasetConfig().audio,
                                DatasetConfig().regression]
    config.training.y_groups = [
        ['word_id'],
    ]
    hp = kt.HyperParameters()
    fixed_params = {
        'connections_0': 2,
        'connections_1': 2,
        'connections_2': 2,
        'connections_3': 3,
        'connections_4': 1,
        'connections_5': 3,
        'connections_6': 2,
        'depth_0': 18,
        'depth_1': 23,
        'depth_2': 43,
        'depth_3': 13,
        'depth_4': 52,
        'depth_5': 5,
        'depth_6': 11,
        'dropout_0': 0.25612932926324405,
        'dropout_1': 0.1620424523625309,
        'dropout_2': 0.4720468723284278,
        'dropout_3': 0.43881829788147036,
        'dropout_4': 0.44741780640383355,
        'dropout_5': 0.3327191857714107,
        'dropout_6': 0.1367707920005909,
        'initial_learning_rate': 0.008,
        'label_smoothing': 0.13716631669361445,
        'lstm_layers': 3,
        'width_0': 16,
        'width_1': 9,
        'width_2': 15,
        'width_3': 16,
        'width_4': 5,
        'width_5': 11,
        'width_6': 4,
    }
    for param, value in fixed_params.items():
        hp.Fixed(param, value=value)
    find_temperature_and_generate(base_folder, train, val, test, model_path,
                                  configuration_name, deepcopy(config), hp)

    # The best found temperature is ~0.71
    configuration_name = 'vec:id'
    config.training.model_type = ModelType.CUSTOM
    config.training.cnn_repetition = 2
    config.training.lstm_repetition = 2
    config.training.dense_repetition = 0
    config.training.model_size = 512
    config.training.dropout = 0.4
    config.training.initial_learning_rate = 1e-2
    config.training.batch_size = 128
    config.training.label_smoothing = 0.5
    config.training.mixup_alpha = 0.5
    config.training.l2_regularization = 0
    config.training.x_groups = [[
        'prev_word_id',
    ],
                                DatasetConfig().categorical,
                                DatasetConfig().audio,
                                DatasetConfig().regression]
    config.training.y_groups = [
        ['word_id'],
    ]
    find_temperature_and_generate(base_folder, train, val, test, model_path,
                                  configuration_name, deepcopy(config))

    # The best found temperature is ~0.147
    configuration_name = 'vec:vec'
    config.training.model_type = ModelType.CUSTOM
    config.training.cnn_repetition = 2
    config.training.lstm_repetition = 2
    config.training.dense_repetition = 0
    config.training.model_size = 512
    config.training.dropout = 0.4
    config.training.initial_learning_rate = 1e-2
    config.training.batch_size = 128
    config.training.label_smoothing = 0.5
    config.training.mixup_alpha = 0.5
    config.training.l2_regularization = 0
    config.training.x_groups = [[
        'prev_word_vec',
    ],
                                DatasetConfig().categorical,
                                DatasetConfig().audio,
                                DatasetConfig().regression]
    config.training.y_groups = [
        ['word_vec'],
    ]
    find_temperature_and_generate(base_folder, train, val, test, model_path,
                                  configuration_name, deepcopy(config))
예제 #7
0
def mainly_vec():
    base_folder, return_list, test, timer, train, val = init_test()
    prefix = 'vec'
    test_name = 'information_comparison'
    print('Running information comparison')

    csv_file = base_folder / 'temp' / f'{prefix}{test_name}.csv'

    dataset = DatasetConfig()
    ALL = [
        dataset.categorical + dataset.audio + dataset.regression,
    ]

    # Common configuration
    config = Config()
    config.training.model_type = ModelType.CUSTOM
    config.training.cnn_repetition = 0
    config.training.lstm_repetition = 2
    config.training.dense_repetition = 0
    config.training.model_size = 512
    config.training.dropout = 0.4
    config.training.initial_learning_rate = 1e-2
    config.training.batch_size = 128
    config.training.label_smoothing = 0.5
    config.training.mixup_alpha = 0.5
    config.training.l2_regularization = 0
    hp = None

    config.dataset.beat_maps_folder = config.dataset.beat_maps_folder.parent / 'test_new_dataformat'
    config.dataset.storage_folder = base_folder / 'test_new_datasets'
    song_folders = create_song_list(config.dataset.beat_maps_folder)

    # First generate all data using all of the audio features
    config.audio_processing.use_temp_derrivatives = True
    config.audio_processing.time_shift = -0.4
    dataset_stats(train)

    for repetition in range(7):
        config.training.x_groups = ALL
        config.training.y_groups = [
            ['word_vec'],
        ]
        configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}'

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config.training.x_groups = [
            ['prev_word_vec'],
        ]
        config.training.y_groups = [
            ['word_id'],
        ]
        configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}'

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config.training.x_groups = [
            ['prev_word_vec'],
            ['mfcc'],
        ]
        config.training.y_groups = [
            ['word_id'],
        ]
        configuration_name = f"X: {[['prev_word_id'], ['MFCC', 'dMFCC']]}\nY: {config.training.y_groups}"

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config.training.x_groups = [
            ['prev_word_vec'],
            ['prev', 'next'],
        ]
        config.training.y_groups = [
            ['word_id'],
        ]
        configuration_name = f"X: {config.training.x_groups}\nY: {config.training.y_groups}"

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config.training.x_groups = [
            ['prev_word_vec'],
            ['part'],
        ]
        config.training.y_groups = [
            ['word_id'],
        ]
        configuration_name = f"X: {config.training.x_groups}\nY: {config.training.y_groups}"

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config.training.x_groups = [
            ['prev_word_vec'],
        ] + ALL
        config.training.y_groups = [
            ['word_id'],
        ]
        configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}'

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config.training.x_groups = [
            ['prev_word_vec', 'prev_word_id'],
        ] + ALL
        config.training.y_groups = [
            ['word_id'],
        ]
        configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}'

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config.training.x_groups = [
            ['prev_word_vec', 'prev_word_id'],
        ] + ALL
        config.training.y_groups = [
            ['word_vec', 'word_id'],
        ]
        configuration_name = f'X: {config.training.x_groups}\nY: {config.training.y_groups}'

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)
        pass