def main(args):

    # load dataset information from setup json file
    metad = utils.load_json_data(args.metadata_file)

    # generate data splits and keep them fixed for the whole project
    # MAKE SURE THIS IS ONLY CALLED ONCE
    splits_path = os.path.join(config.data_save_folder, 'data_splits.json')
    utils.create_data_split(metad, splits_path)

    mtracks = []
    for ky in metad.keys():
        mtrack = metad[ky]
        mtrack['filename'] = ky
        mtracks.append(mtrack)


    nmixes = len(metad.keys())
    print("{} mixes to be processed".format(nmixes))
    idx=0

    Parallel(n_jobs=4, verbose=5)(
            delayed(utils.compute_features_mtrack)(
                mtrack, args.save_dir, args.wavmixes_path, idx
            ) for mtrack in mtracks)
Exemple #2
0
w2v_path = config['w2v_file']

print('[INFO] Tokenizing input and output sequences')
x, input_word_index = utils.tokenize_sequence(input_sentences, 
                                                filters, 
                                                config['encoder_num_tokens'], 
                                                config['encoder_vocab'])

y, output_word_index = utils.tokenize_sequence(output_sentences, 
                                                filters, 
                                                config['decoder_num_tokens'], 
                                                config['decoder_vocab'])

print('[INFO] Split data into train-validation-test sets')
dataset_sizes = [train_data.shape[0], val_data.shape[0], test_data.shape[0]]
x_train, y_train, x_val, y_val, x_test, y_test = utils.create_data_split(x, y, dataset_sizes)

encoder_embeddings_matrix = utils.create_embedding_matrix(input_word_index, 
                                                               config['embedding_size'], 
                                                               w2v_path)

decoder_embeddings_matrix = utils.create_embedding_matrix(output_word_index, 
                                                               config['embedding_size'], 
                                                               w2v_path)

# Re-calculate the vocab size based on the word_idx dictionary
config['encoder_vocab'] = len(input_word_index)
config['decoder_vocab'] = len(output_word_index)

#----------------------------------------------------------------#
Exemple #3
0
def create_data_splits(path_to_metadata_file, exper_dir):

    metadata = load_data(path_to_metadata_file)

    utils.create_data_split(metadata,
                            os.path.join(exper_dir, 'data_splits.json'))