def gen(folder, actF, fft, inputSongs): config = nn_config.get_neural_net_configuration() nn = "TOYPAJ-NPWeights50-" model_filename = folder + nn + actF actF = actF + "moid" if actF == "sig" else actF sample_frequency = config['sampling_frequency'] inputFile = folder + "TOYPAYJ-Processed" output_filename = folder + actF + 'generated_' #Load up the training data print ('Loading training data') #X_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) #y_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) #X_mean is a matrix of size (num_frequency_dims,) containing the mean for each frequency dimension #X_var is a matrix of size (num_frequency_dims,) containing the variance for each frequency dimension X_train = np.load(inputFile + '_x.npy') print( X_train.shape) print( type(X_train)) y_train = np.load(inputFile + '_y.npy') X_mean = np.load(inputFile + '_mean.npy') X_var = np.load(inputFile + '_var.npy') print ('Finished loading training data') #Figure out how many frequencies we have in the data freq_space_dims = X_train.shape[1:] hidden_dims = config['hidden_dimension_size'] #Creates a lstm network model = network_utils.create_lstm_network(num_frequency_dimensions=freq_space_dims, num_hidden_dimensions=hidden_dims, actF = actF) #You could also substitute this with a RNN or GRU #model = network_utils.create_gru_network() print( model_filename) #Load existing weights if available if os.path.isfile(model_filename): model.load_weights(model_filename) else: print('Model filename ' + model_filename + ' could not be found!') print ('Starting generation!') #Here's the interesting part #We need to create some seed sequence for the algorithm to start with #Currently, we just grab an existing seed sequence from our training data and use that #However, this will generally produce verbatum copies of the original songs #In a sense, choosing good seed sequences = how you get interesting compositions #There are many, many ways we can pick these seed sequences such as taking linear combinations of certain songs #We could even provide a uniformly random sequence, but that is highly unlikely to produce good results seed_len = 1 block_size = X_train.shape[2] / 2 if fft else X_train.shape[2] for song in inputSongs: name = song[song.rfind('/') + 1:] print( name) """ seed_seq = seed_generator.generate_from_file( filename=song, seed_length = 1, block_size = block_size, seq_len = 40, std = X_var, mean = X_mean, fft = fft, offsetSec = 53) """ seed_seq = seed_generator.generate_copy_seed_sequence(1, X_train) print(seed_seq.shape) max_seq_len = 6; #Defines how long the final song is. Total song length in samples = max_seq_len * example_len output = [] for i in xrange(seed_seq.shape[1]): output.append(seed_seq[0][i].copy()) save_generated_example(folder + "input_3" + name, output, sample_frequency=sample_frequency, useTimeDomain=not fft) output = sequence_generator.generate_from_seed(model=model, seed=seed_seq, sequence_length=max_seq_len, data_variance=X_var, data_mean=X_mean) print( len(output)) print ('Finished generation!') #Save the generated sequence to a WAV file save_generated_example(output_filename + "3" + name, output, sample_frequency=sample_frequency, useTimeDomain=not fft)
#Load up the training data print ('Loading training data') #X_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) #y_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) X_train = np.load(inputFile + '_x.npy') y_train = np.load(inputFile + '_y.npy') print ('Finished loading training data') #Figure out how many frequencies we have in the data freq_space_dims = X_train.shape[2] hidden_dims = config['hidden_dimension_size'] print (X_train.shape) print (hidden_dims) #Creates a lstm network model = network_utils.create_lstm_network(num_frequency_dimensions=freq_space_dims, num_hidden_dimensions=hidden_dims, sz=( X_train.shape[1], X_train.shape[2]) ) #You could also substitute this with a RNN or GRU #model = network_utils.create_gru_network() #Load existing weights if available if os.path.isfile(model_filename): model.load_weights(model_filename) num_iters = 2001 #Number of iterations for training epochs_per_iter = 60 #Number of iterations before we save our model batch_size = 90 #Number of training examples pushed to the GPU per batch. #Larger batch sizes require more memory, but training will be faster print ('Starting training!') while cur_iter < num_iters: print('Iteration: ' + str(cur_iter)) #We set cross-validation to 0,
# X_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) # y_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) # X_mean is a matrix of size (num_frequency_dims,) containing the mean for each frequency dimension # X_var is a matrix of size (num_frequency_dims,) containing the variance for each frequency dimension X_train = np.load(inputFile + '_x.npy') y_train = np.load(inputFile + '_y.npy') X_mean = np.load(inputFile + '_mean.npy') X_var = np.load(inputFile + '_var.npy') print ('Finished loading training data') # Figure out how many frequencies we have in the data freq_space_dims = X_train.shape[2] hidden_dims = config['hidden_dimension_size'] # Creates a LSTM network model = network_utils.create_lstm_network(num_frequency_dimensions=freq_space_dims, num_hidden_dimensions=hidden_dims) # You could also substitute this with a RNN or GRU # model = network_utils.create_gru_network() # # Load existing weights if available if os.path.isfile(model_filename): model.load_weights(model_filename) else: print('Model filename ' + model_filename + ' could not be found!') print ('Starting generation!') # Here's the interesting part # We need to create some seed sequence for the algorithm to start with # Currently, we just grab an existing seed sequence from our training data and use that # However, this will generally produce verbatum copies of the original songs
def __main__(): parser = argparse.ArgumentParser(description="Generate song from current saved training data.") parser.add_argument("dataset", default='train', type=str, help='The dataset to draw from. Defaults to "train".') parser.add_argument("-m", "--model", default='rgan', type=str, help="Model type to use. Defaults to 'rgan' (regression GAN). Can also be dgan (deconvolutional GAN) or 'gruv' for vanilla GRUV model.") parser.add_argument("--batch", default=1, type=int, help="Number of generations to run.") parser.add_argument("--iteration", default=0, type=int, help="Current training iteration load weights for.") parser.add_argument("--seqlen", default=10, type=int, help="Generated sequence length.") parser.add_argument("--seedlen", default=1, type=int, help="Length of the seed selected for the generation process.") parser.add_argument("--output", default='new', type=str, help="Either 'new' (default) for only new generated output, 'gen' to also include the model's reproduction of the seed, or 'all' to also include the raw seed sequence.") parser.add_argument("-r", "--run", default=0, type=int, help="Integer id for this run (used for weight files). Defaults to zero.") args = parser.parse_args() if args.model == 'rgan': config = nn_config.get_regression_gan_configuration() elif args.model == 'dgan': config = nn_config.get_deconv_gan_configuration() elif args.model == 'gruv': config = nn_config.get_default_configuration() else: raise(Exception('invalid model type')) sample_frequency = config['sampling_frequency'] input_file = config['dataset_directory'] + args.dataset + '/' + args.dataset model_basename = config['model_basename'] + str(args.run) + '_' cur_iter = args.iteration gen_count = args.batch model_filename = model_basename + str(cur_iter) output_filename = './generated_song' include_model_seed = args.output == 'gen' or args.output == 'all' include_raw_seed = args.output == 'all' #Load up the training data if args.dataset == 'train': print('Loading training data') else: print('Loading generation data') #X_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) #y_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) #X_mean is a matrix of size (num_frequency_dims,) containing the mean for each frequency dimension #X_var is a matrix of size (num_frequency_dims,) containing the variance for each frequency dimension X_train = np.load(input_file + '_x.npy') y_train = np.load(input_file + '_y.npy') X_mean = np.load(input_file + '_mean.npy') X_var = np.load(input_file + '_var.npy') print('Finished loading data') #Figure out how many frequencies we have in the data num_timesteps = X_train.shape[1] freq_space_dims = X_train.shape[2] #Creates a lstm network print('Initializing network...') if args.model == 'rgan': model = network_utils.create_regression_generator_network(num_frequency_dimensions=freq_space_dims, config=config) elif args.model == 'dgan': model = network_utils.create_deconvolutional_generator_network(256, 1, freq_space_dims, num_timesteps, config, stateful=True) elif args.model == 'gruv': model = network_utils.create_lstm_network(freq_space_dims, config['generator_hidden_dims']) else: raise(Exception('invalid model type')) print('Model summary:') model.summary() #Load existing weights if available if os.path.isfile(model_filename): print('Loading weights from file {0}'.format(model_filename)) model.load_weights(model_filename) else: print('Model filename ' + model_filename + ' could not be found!') seq_len = args.seqlen; #Defines how long the final generated song is. Total song length in samples = seq_len * example_len if args.model == 'dgan': x_seeds = np.random.uniform(low=-1, high=1, size=(gen_count, 256)) outputs = generate_from_seeds(model, x_seeds, max_seq_len=seq_len, batch_size=1, uncenter_data=True, X_mean=X_mean, X_var=X_var) else: outputs = generate_from_data(model, X_train, seq_len, seed_len=args.seedlen, gen_count=gen_count, include_raw_seed=include_raw_seed, include_model_seed=include_model_seed, uncenter_data=True, X_var=X_var, X_mean=X_mean) for i in xrange(gen_count): #Save the generated sequence to a WAV file save_generated_example('{0}_{1}_{2}.wav'.format(output_filename, args.run, i), outputs[i], sample_frequency=sample_frequency)
# X_mean is a matrix of size (num_frequency_dims,) containing the mean for each frequency dimension # X_var is a matrix of size (num_frequency_dims,) containing the variance for each frequency dimension X_train = np.load(inputFile + '_x.npy') y_train = np.load(inputFile + '_y.npy') X_mean = np.load(inputFile + '_mean.npy') X_var = np.load(inputFile + '_var.npy') print('Finished loading training data') # Figure out how many frequencies we have in the data freq_space_dims = X_train.shape[2] data_size = X_train.shape[1] hidden_dims = config['hidden_dimension_size'] # Creates a lstm network model = network_utils.create_lstm_network( num_frequency_dimensions=freq_space_dims, num_hidden_dimensions=hidden_dims, data_size=data_size) # You could also substitute this with a RNN or GRU # model = network_utils.create_gru_network() # Load existing weights if available if os.path.isfile(model_filename): model.load_weights(model_filename) else: print('Model filename ' + model_filename + ' could not be found!') print('Starting generation!') # Here's the interesting part # We need to create some seed sequence for the algorithm to start with # Currently, we just grab an existing seed sequence from our training data and use that # However, this will generally produce verbatum copies of the original songs
# Load up the training data print('Loading training data') # X_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) # y_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) X_train = np.load(inputFile + '_x.npy') y_train = np.load(inputFile + '_y.npy') print('Finished loading training data') # Figure out how many frequencies we have in the data freq_space_dims = X_train.shape[2] #88200 print('freq_space_dims=',freq_space_dims) hidden_dims = config['hidden_dimension_size'] print('Using Mean Absolute Error') # Creates a lstm network model = network_utils.create_lstm_network(num_frequency_dimensions=freq_space_dims, num_hidden_dimensions=hidden_dims) #hidden_dims=1024 # Load existing weights if available if os.path.isfile(model_filename): model.load_weights(model_filename) num_iters = 1 # Number of iterations for training epochs_per_iter = 1 # Number of iterations before we save our model batch_size = 1 # Number of training examples pushed to the GPU per batch. # Larger batch sizes require more memory, but training will be faster print('Starting training!') while cur_iter < num_iters: print('Iteration: ' + str(cur_iter)) # We set cross-validation to 0, # as cross-validation will be on different datasets
#Load up the training data print ('Loading training data') #X_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) #y_train is a tensor of size (num_train_examples, num_timesteps, num_frequency_dims) X_train = np.load(inputFile + '_x.npy') y_train = np.load(inputFile + '_y.npy') print ('Finished loading training data') #Figure out how many frequencies we have in the data print("Xtrain freq: %s" % (str(X_train.shape))) freq_space_dims = X_train.shape[1:] hidden_dims = config['hidden_dimension_size'] #Creates a lstm network model = network_utils.create_lstm_network(num_frequency_dimensions=freq_space_dims, num_hidden_dimensions=hidden_dims, actF="tanh") #You could also substitute this with a RNN or GRU #model = network_utils.create_gru_network() #Load existing weights if available if os.path.isfile(model_filename): model.load_weights(model_filename) num_iters = 50 #Number of iterations for training epochs_per_iter = 25 #Number of iterations before we save our model batch_size = 5 #Number of training examples pushed to the GPU per batch. #Larger batch sizes require more memory, but training will be faster print ('Starting training!') while cur_iter < num_iters: print('Iteration: ' + str(cur_iter)) #We set cross-validation to 0,
BATCH_SIZE = args.n_batch # Load up the training data print('Loading training data') # X_TRAIN Numpy tensor (num_train_examples, num_timesteps, num_frequency_dims) X_TRAIN = np.load(INPUTFILE + '_x.npy') # Y_TRAIN Numpy tensor (num_train_examples, num_timesteps, num_frequency_dims) Y_TRAIN = np.load(INPUTFILE + '_y.npy') print('Finished loading training data') # Creates a lstm network MODEL = network_utils.create_lstm_network(FREQ_SPACE_DIMS=FREQ_SPACE_DIMS, NUM_HIDDEN_DIMENSIONS=HIDDEN_DIMS, NUM_RECURRENT_UNITS=NUM_RECURR) MODEL_WEIGTHS = [ inquirer.List('size', message="Please choose saved weights file for generating the song", choices=glob.glob('weights/LSTM*.h5') ), ] CHOOSE_MODEL = inquirer.prompt(MODEL_WEIGTHS) MODEL_FILENAME = CHOOSE_MODEL["size"] # Load existing weights if available if os.path.isfile(MODEL_FILENAME):