Example #1
0
        # PREFIX_INDEX_FOLDER + "hand_picked_Spotify_test.txt",
        # PREFIX_INDEX_FOLDER + "liszt_classical_archives_test.txt"
    ]

    build_data(index_files_dict=index_files_dict,
               meta_info_path=data_folder + '/temp.p',
               quantization=script_param['quantization'],
               temporal_granularity=script_param['temporal_granularity'],
               store_folder=data_folder,
               logging=logging)

############################################################
# Hyper parameter space
############################################################
model_space = Model_class.get_hp_space()
optim_space = Optimization_method.get_hp_space()
space = {'model': model_space, 'optim': optim_space, 'train': train_param, 'script': script_param}

############################################################
# MongoDB
############################################################
host = "localhost"
port = 27017
db_name =\
    unit_type[0] + '-' +\
    script_param['temporal_granularity'][0] + '-' +\
    str(script_param['quantization']) + '-' +\
    Optimization_method.name() + '-' +\
    Model_class.name()
mongo_adress = 'mongo://' + host + ':' + str(port) + '/' + db_name
Example #2
0
def train_hopt(max_evals, csv_file_path):
    # Create/reinit csv file
    open(csv_file_path, 'w').close()

    logger_hopt.info((u'WITH HYPERPARAMETER OPTIMIZATION').encode('utf8'))
    logger_hopt.info((u'**** Model : ' + Model_class.name()).encode('utf8'))
    logger_hopt.info((u'**** Optimization technic : ' + Optimization_method.name()).encode('utf8'))
    logger_hopt.info((u'**** Temporal granularity : ' + temporal_granularity).encode('utf8'))
    if binary_unit:
        logger_hopt.info((u'**** Binary unit (intensity discarded)').encode('utf8'))
    else:
        logger_hopt.info((u'**** Real valued unit (intensity taken into consideration)').encode('utf8'))
    logger_hopt.info((u'**** Quantization : ' + str(quantization)).encode('utf8'))

    # Define hyper-parameter search space for the model
    # Those are given by the static methods get_param_dico and get_hp_space
    model_space = Model_class.get_hp_space()
    optim_space = Optimization_method.get_hp_space()
    space = {'model': model_space, 'optim': optim_space}

    # Get the headers (i.e. list of hyperparameters tuned for printing and
    # save purposes)
    header = model_space.keys() + optim_space.keys() + ['accuracy']

    global run_counter
    run_counter = 0

    def run_wrapper(params):
        global run_counter
        run_counter += 1
        logger_hopt.info(('\n').encode('utf8'))
        logger_hopt.info((u'#'*40).encode('utf8'))
        logger_hopt.info((u'# Config :  {}'.format(run_counter)).encode('utf8'))

        # Build model and optim dico ################
        model_param = params['model']
        optim_param = params['optim']
        #############################################

        # Weights plotted and stored in a folder ####
        # Same for generated midi sequences #########
        weights_folder = result_folder + '/' + str(run_counter) + '/' + 'weights'
        if not os.path.isdir(weights_folder):
            os.makedirs(weights_folder)
        generated_folder = result_folder + '/' + str(run_counter) + '/generated_sequences'
        if not os.path.isdir(generated_folder):
            os.makedirs(generated_folder)
        model_folder = result_folder + '/' + str(run_counter) + '/model'
        if not os.path.isdir(model_folder):
            os.makedirs(model_folder)
        #############################################

        # Load data #################################
        time_load_0 = time.time()
        piano_train, orchestra_train, train_index, \
            piano_valid, orchestra_valid, valid_index, \
            piano_test, orchestra_test, test_index, generation_index \
            = load_data(LOCAL_SCRATCH + '/Data',
                        model_param['temporal_order'],
                        model_param['batch_size'],
                        binary_unit=binary_unit,
                        skip_sample=1,
                        logger_load=logger_load)
        time_load_1 = time.time()
        logger_load.info('TTT : Loading data took {} seconds'.format(time_load_1-time_load_0))
        ##############
        # visualize_mat(piano_train.get_value(), 'DEBUG', 'piano_train')
        # visualize_mat(orchestra_train.get_value(), 'DEBUG', 'orchestra_train')
        # visualize_mat(piano_test.get_value(), 'DEBUG', 'piano_test')
        # visualize_mat(orchestra_test.get_value(), 'DEBUG', 'orchestra_test')
        # visualize_mat(piano_valid.get_value(), 'DEBUG', 'piano_valid')
        # visualize_mat(orchestra_valid.get_value(), 'DEBUG', 'orchestra_valid')
        ##############
        # For large datasets
        #   http://deeplearning.net/software/theano/tutorial/aliasing.html
        #   use borrow=True (avoid copying the whole matrix) ?
        #   Load as much as the GPU can handle, train then load other
        #       part of the dataset using shared_variable.set_value(new_value)
        #############################################

        # Train #####################################
        time_train_0 = time.time()
        model, dico_res = train(piano_train, orchestra_train, train_index,
                                piano_valid, orchestra_valid, valid_index,
                                model_param, optim_param, max_iter, weights_folder)
        time_train_1 = time.time()
        logger_train.info('TTT : Training data took {} seconds'.format(time_train_1-time_train_0))
        error = -dico_res['accuracy']  # Search for a min
        #############################################

        # Generate ##################################
        time_generate_0 = time.time()
        generate(model,
                 piano_test, orchestra_test, generation_index,
                 generation_length, seed_size, quantization_write,
                 generated_folder, logger_generate)
        time_generate_1 = time.time()
        logger_generate.info('TTT : Generating data took {} seconds'.format(time_generate_1-time_generate_0))
        #############################################

        # Save ######################################
        save_model_file = open(model_folder + '/model.pkl', 'wb')
        pickle.dump(model, save_model_file, protocol=pickle.HIGHEST_PROTOCOL)
        #############################################

        # log
        logger_hopt.info((u'# Accuracy :  {}'.format(dico_res['accuracy'])).encode('utf8'))
        logger_hopt.info((u'###################\n').encode('utf8'))

        # Write the result in result.csv
        with open(csv_file_path, 'ab') as csvfile:
            writer = csv.DictWriter(csvfile, delimiter=',', fieldnames=header)
            writer.writerow(dico_res)

        return error

    # Calling get_param_dico with None return an empty dictionary,
    # Useful to get the header of hparam
    with open(csv_file_path, 'ab') as csvfile:
        # Write headers if they don't already exist
        writerHead = csv.writer(csvfile, delimiter=',')
        writerHead.writerow(header)

    best = fmin(run_wrapper, space, algo=tpe.suggest, max_evals=max_evals)

    return best