def make_plots(self, waveform, w, M, N, H, sr, filepath=None): if filepath: if not os.path.exists(filepath): os.makedirs(filepath) self.mX, self.pX = STFT.stftAnal(waveform, w, N, H) plotting.spectogram_plot(self.mX, self.pX, M, N, H, sr, show=False, filepath=filepath + 'model_generation_spectogram')
def main(args): # Verify model folder if not os.path.exists(args.model_folder): raise Exception("Model folder does not exist!") # Get network settings settings_json_string = '/network_settings.json' if args.model_folder[-1] == '/': args.model_folder = args.model_folder[:-1] if os.path.isfile(args.model_folder): model_folder = os.path.join(*args.model_folder.split(sep='/')[:-1]) else: model_folder = args.model_folder model_name = model_folder.split(sep='/')[-1] with open(model_folder + settings_json_string, 'r') as f: network_settings = json.load(f) analysis_type = network_settings['analysis_type'] analysis_settings = network_settings[analysis_type + '_settings'] M = analysis_settings['M'] H = analysis_settings['H'] w = window_dictionary.get(analysis_settings['w'])(M) N = analysis_settings['N'] sr = analysis_settings['sample_rate'] input_data_shape = (1, 1, network_settings['n_inputs']) # (num_steps, batch_size, n_inputs) with tf.Session() as sess: n_hidden = network_settings['n_hidden'] # List of hidden unit sizes input_placeholder = tf.placeholder(tf.float32, shape=input_data_shape) input = np.zeros(input_data_shape) feed_dict = {input_placeholder: input} state_placeholders = [] states = [] for lstm_layer in range(len(n_hidden)): state_placeholders.append((tf.placeholder(tf.float32, shape=(1, n_hidden[lstm_layer]), name='cell'), # batch_size = 1 tf.placeholder(tf.float32, shape=(1, n_hidden[lstm_layer]), name='hidden'))) # batch_size = 1 states.append([np.zeros((1, n_hidden[lstm_layer])), np.zeros((1, n_hidden[lstm_layer]))]) feed_dict[state_placeholders[lstm_layer]] = states[lstm_layer] print('n_steps:', network_settings['n_steps']) n_outputs = network_settings['n_outputs'] if len(n_hidden) == 1: n_outputs = [n_outputs] else: n_outputs = n_hidden[1:] + [n_outputs] x = input_placeholder lstm_states = [] # Keep track of LSTM states for i in range(len(n_hidden)): lstm = SimpleLSTM(n_hidden[i], scope='LSTM_model/layer_{}'.format(i + 1)) lstm_output, state = lstm(x, state_placeholders[i]) # lstm_outputs is Tensor of shape (n_steps, batch_size, n_hidden[i]) lstm_states.append(state) lstm_output = tf.unpack(lstm_output) # Make it into list length n_steps, each entry (batch_size, n_hidden[i]) dense = Dense(scope="LSTM_model/layer_{}".format(i + 1), size=n_outputs[i], nonlinearity=tf.sigmoid, initialiser=wbVars_Xavier) final_output = dense(lstm_output[0]) saver = tf.train.Saver() # saver.restore(sess, './training/saved_models/2016-11-25T08-54-35/model-20') load_saved_model_to_resume_training(saver, sess, model_folder) outputs_list = [] for step in range(network_settings['n_steps']): output, *states = sess.run([final_output] + [state for state in lstm_states], feed_dict=feed_dict) # states is list of LSTMStateTuple (length num_layers) # output is shape (batch_size, n_outputs), but it needs to be (n_steps=1, batch_size, n_outputs) output = np.expand_dims(output, axis=0) outputs_list.append(output) lstm_layer_states= states input = output # Update feed_dict by giving the new input and states for all layers feed_dict[input_placeholder] = input for lstm_layer in range(len(n_hidden)): feed_dict[state_placeholders[lstm_layer]] = lstm_layer_states[lstm_layer] final_outputs = [tf.squeeze(output, [0]) for output in outputs_list] final_outputs = tf.pack(final_outputs) # Make one tensor of rank 2 print(final_outputs.get_shape()) final_outputs = tf.transpose(final_outputs, [1, 0, 2]) # final_outputs has shape (batch_size, n_frames, n_outputs) print('after packing and transposing, final_outputs shape: ', final_outputs.get_shape()) #################### # Compare to ground truth (debugging) if args.vector_folder is None: print("No vector folder was provided, cannot calculate cost for debugging.") else: loaded, json_vector_settings, analysis_type_check = load_from_dir_root(args.vector_folder) assert analysis_type==analysis_type_check _, data_dict = setup_training_data(loaded, 1) # batch_size = 1 ground_truth = data_dict['output_data'] # (n_frames, batch_size, n_outputs) ground_truth = np.transpose(ground_truth, [1, 0, 2]) print('Network generation: {}'.format(final_outputs.eval())) network_mag = final_outputs.eval()[0, :, :257] # plt.figure() # plt.subplot(3,1,1) # plt.plot(network_mag[0, :]) # plt.subplot(3,1,2) # plt.plot(network_mag[1, :]) # plt.subplot(3,1,3) # plt.plot(network_mag[3, :]) # plt.show() print(network_mag[1, 7], network_mag[1, 17], network_mag[1, 32]) ground_truth_mag = ground_truth[0, :, :257] # plt.figure() # plt.subplot(3, 1, 1) # plt.plot(ground_truth_mag[0, :]) # plt.subplot(3, 1, 2) # plt.plot(ground_truth_mag[1, :]) # plt.subplot(3, 1, 3) # plt.plot(ground_truth_mag[2, :]) # plt.show() print('Data: {}'.format(ground_truth)) print('Network output min/max: {}, {}'.format(np.min(final_outputs.eval()), np.max(final_outputs.eval()))) print('Data min/max: {}, {}'.format(np.min(ground_truth), np.max(ground_truth))) print(ground_truth.shape) print(final_outputs.eval().shape) assert ground_truth.shape == final_outputs.eval().shape print('Squared error achieved by network: {}'.format(np.sum((ground_truth - final_outputs.eval())**2) / ground_truth.size)) # mX = final_outputs.eval()[0,:,:257] # pX = final_outputs.eval()[0,:,257:] # # np.save('./mX_model', mX) # np.save('./pX_model', pX) # # asdfasdf # phase_range = network_settings['stft_settings']['phase_range'] # mag_range = network_settings['stft_settings']['mag_range'] # # # Unnormalise # mX = mag_range[0] + (mX * (mag_range[1] - mag_range[0])) # mX *= sr / 2 # Undo weird rescaling # pX = phase_range[0] + (pX * (phase_range[1] - phase_range[0])) # # print(mX.shape) # (num_frames, num_freq_bins) # # mX = np.transpose(mX) # # pX = np.transpose(pX) # spectogram_plot(mX, pX, M, N, H, sr, fig_number=None, filepath=None, show=True) # # reconst = stftSynth(mX, pX, M, H) # print(np.max(reconst), np.min(reconst)) # # # # soundfile.write('./test_reconst.wav', reconst, sr, format='wav') # mX2, pX2 = stftAnal(reconst, w, N, H) # print(mX2.shape, pX2.shape) # spectogram_plot(mX2, pX2, M, N, H, sr, fig_number=None, filepath=None, show=True) # asdfasdfasd #################### # For testing - just batch size = 1 result = tf.squeeze(final_outputs, [0]).eval() network_output_folder = './generation/network_output/{}/'.format(model_name) if not os.path.exists(network_output_folder): os.makedirs(network_output_folder) if analysis_type == 'sine_model': process_output = SineModelOutputProcessingWithActiveTracking(result, network_settings) xtfreq, xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input() reconstruction = sineModelSynth(xtfreq, xtmag, xtphase, nextbiggestpower2(analysis_settings['M']), H, sr) elif analysis_type == 'sine_model_without_active_tracking': # deprecated process_output = SineModelOutputProcessing(result, network_settings) xtfreq, xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input() reconstruction = sineModelSynth(xtfreq, xtmag, xtphase, nextbiggestpower2(analysis_settings['M']), H, sr) elif analysis_type == 'stft': process_output = STFTModelOutputProcessing(result, network_settings) xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input() np.save(network_output_folder + 'xtmag_model', xtmag) np.save(network_output_folder + 'xtphase_model', xtphase) plot_filepath = './generation/plots/{}-(generated_{})'.format(model_name, STARTED_DATESTRING) if not os.path.exists(plot_filepath): os.makedirs(plot_filepath) spectogram_plot(xtmag, xtphase, M, N, H, sr, show=False, filepath=plot_filepath + '/network_output_spectogram') reconstruction = stftSynth(xtmag, xtphase, M, H) else: raise Exception('analysis_type not recognised!') print('model_name:', model_name) #TODO: extract more of these arguments in the class methods process_output.make_plots(reconstruction, w, M, N, H, sr, filepath='./generation/plots/{}-(generated_{})/'.format(model_name, STARTED_DATESTRING)) soundfile.write('./generation/wav_output/{}-(generated_{}).wav'.format(model_name, STARTED_DATESTRING), reconstruction, sr, format='wav')
def main(args): meta_graph = args.model_folder model_folder = '/'.join(meta_graph.split('/')[:-1]) + '/' model_name = os.path.basename(meta_graph) plot_folder = args.plot_folder + model_name if not os.path.exists(plot_folder): os.makedirs(plot_folder) with open(model_folder + 'network_settings.json', 'r') as f: network_settings = json.load(f) loaded, json_vector_settings, analysis_type = load_from_dir_root( args.vector_folder) dataset = DatasetFeed(loaded, 9) # for datablock in dataset.data: # print(datablock.shape) dataset.set_all_data_blocks_to_max_shape( json_vector_settings['mag_normalised_range'][0]) # data_shape = dataset.max_data_shape # n_steps = data_shape[0] # n_outputs = data_shape[1] #TODO: Retrieve these from a json # batch_size = 96 # latent_dim = 2 # n_input = n_outputs vae = VAE(model_to_restore=meta_graph) global_step = (meta_graph.split("/")[-1]).split('-')[-1] # HANDLES # vae.x_in, vae.z_mean, vae.z_log_sigma, # vae.x_reconstructed, vae.z_, vae.x_reconstructed_, # vae.cost, vae.global_step, vae.train_op ##################### PLOT IN LATENT SPACE ##################### next_batch = dataset.next_batch() next_batch = np.concatenate(tuple([next_batch] * 5), axis=0) ## Total hack to get this to work mus, _ = vae.encode(np.transpose( next_batch, [1, 0, 2])) # (n_steps, batch_size, n_inputs) mus = mus[:9, :] ys, xs = mus.T print('Means of z variable:', mus) plt.figure() plt.title("round {}: {} in latent space".format(global_step, 'Toms')) kwargs = {'alpha': 0.8} labels = [0] * 3 + [1] * 3 + [ 2 ] * 3 # Total hack to label different classes of audio files and mark them on the plot #TODO: Store classes in the dataset (maybe with filenames) classes = set(labels) if classes: colormap = plt.cm.rainbow(np.linspace(0, 1, len(classes))) kwargs['c'] = [colormap[i] for i in labels] # make room for legend ax = plt.subplot(111) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) handles = [ mpatches.Circle((0, 0), label=class_, color=colormap[i]) for i, class_ in enumerate(classes) ] ax.legend(handles=handles, shadow=True, bbox_to_anchor=(1.05, 0.45), fancybox=True, loc='center left') plt.scatter(xs, ys, **kwargs) # if range_: # plt.xlim(*range_) # plt.ylim(*range_) # plt.show() title = "latent_space.png" plt.savefig(os.path.join(plot_folder, title), bbox_inches="tight") ##################### EXPLORE LATENT SPACE ##################### min_, max_, nx, ny = -4, 4, 5, 5 # complex number steps act like np.linspace # row, col indices (i, j) correspond to graph coords (y, x) # rollaxis enables iteration over latent space 2-tuples zs = np.rollaxis(np.mgrid[max_:min_:ny * 1j, min_:max_:nx * 1j], 0, 3) M = json_vector_settings['M'] N = json_vector_settings['N'] H = json_vector_settings['H'] sr = json_vector_settings['sample_rate'] analysis_settings = network_settings['stft_settings'] for zrow in zs: # zrow is a matrix, which will be interpreted as (batch_size, 2) for z in zrow: z = np.expand_dims(z, axis=0) # TODO: Allow batches for z decoder generation = vae.decode( z) # shape (n_steps, batch_size, n_outputs) -> batch_size = 1 generation = np.squeeze(generation, axis=1) # shape (n_steps, n_outputs) # Plot and generate audio if analysis_type == 'sine_model': process_output = SineModelOutputProcessingWithActiveTracking( generation, network_settings) xtfreq, xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input( ) reconstruction = sineModelSynth( xtfreq, xtmag, xtphase, nextbiggestpower2(analysis_settings['M']), H, sr) elif analysis_type == 'stft': process_output = STFTModelOutputProcessing( generation, network_settings) xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input( ) np.save(plot_folder + 'xtmag_model', xtmag) np.save(plot_folder + 'xtphase_model', xtphase) plot_filepath = './generation/plots/VAE/{}-(generated_{})'.format( model_name, STARTED_DATESTRING) if not os.path.exists(plot_filepath): os.makedirs(plot_filepath) spectogram_plot(xtmag, xtphase, M, N, H, sr, show=False, filepath=plot_filepath + '/network_output_spectogram_{}'.format(z)) reconstruction = stftSynth(xtmag, xtphase, M, H) else: raise Exception('analysis_type not recognised!') soundfile.write( './generation/wav_output/VAE/{}-{}-(generated_{}).wav'.format( model_name, z, STARTED_DATESTRING), reconstruction, sr, format='wav')
def main(): if not os.path.exists(OUTPUT_FOLDER): os.makedirs(OUTPUT_FOLDER) file_count = 0 datalist = [] json_settings_file = OUTPUT_FOLDER + '/sine_model_settings.json' json_dict = { 'N': N, 'M': M, 'H': H, 'w': window, 'phase_range': [0, 2 * np.pi], 'max_sines': MAX_N_SINES } print("Loading files in {}".format(FOLDER_LIST)) for folder in FOLDER_LIST: if not os.path.exists(folder): raise InvalidPathError("{} not found".format(folder)) file_list = os.listdir(folder) for audio_file in file_list: if audio_file[-3:] not in ['wav', 'aif']: print('Skipping {}'.format(audio_file)) continue print("Processing {}...".format(audio_file), end='') file, sr = soundfile.read(folder + '/' + audio_file) freq_range = [0, sr / 2] if sr != SAMPLE_RATE: warnings.warn('Sample rate is not 44100Hz') json_dict['sample_rate'] = sr json_dict['freq_range'] = freq_range # All outputs of sineModelAnal are of the shape (numFrames, maxSines) xtfreq, xtmag, xtphase = sineModel.sineModelAnal( file, sr, w, N, H, THRESHOLD, maxnSines=MAX_N_SINES) active_tracks = (xtfreq != 0.0).astype(int) # For plotting the spectrogram of the signal mX, pX = STFT.stftAnal(file, w, N, H) plotting.plot_sineTracks(mX, pX, M, N, H, sr, xtfreq, show=False, filepath=PLOT_FOLDER + '/{}_sinetracks'.format(audio_file[:-4])) plotting.spectogram_plot(mX, pX, M, N, H, sr, show=False, filepath=PLOT_FOLDER + '/{}'.format(audio_file[:-4])) # Process the frequencies, magnitudes and phases to be normalised in the range [0,1] xtfreq = xtfreq / freq_range[1] #TODO: we might want to calculate this across all of the training data instead of file by file #TODO: This will need modifying in the json file as well min_xtmag = np.min(xtmag) max_xtmag = np.max( xtmag[xtmag != 0] ) # Recall tracks are separated by zeros - we want to ignore them xtmag[ xtmag == 0] = min_xtmag # Could change this later to have the dB floor lower for the zeros xtmag = (xtmag - min_xtmag) / (max_xtmag - min_xtmag) xtphase = np.mod(xtphase, 2 * np.pi) / (2 * np.pi ) # Between 0 and 1 json_dict['mag_range'] = [min_xtmag, max_xtmag] assert (xtfreq <= 1).all() and (xtfreq >= 0).all() assert (xtmag <= 1).all() and (xtmag >= 0).all() assert (xtphase <= 1).all() and (xtphase >= 0).all() output_path = OUTPUT_FOLDER + '/{}/'.format(file_count) if not os.path.exists(output_path): os.makedirs(output_path) ## Save the numpy arrays separately - couldn't work out how to save and load multiple arrays np.save(output_path + 'freq', xtfreq) np.save(output_path + 'mag', xtmag) np.save(output_path + 'phase', xtphase) np.save(output_path + 'active_tracks', active_tracks) datalist.append([xtfreq, xtmag, xtphase, active_tracks]) print('Saved as {}'.format(output_path)) file_count += 1 create_json(json_settings_file, json_dict)
def main(): if not os.path.exists(OUTPUT_FOLDER): os.makedirs(OUTPUT_FOLDER) if not os.path.exists(PLOT_FOLDER): os.makedirs(PLOT_FOLDER) file_count = 0 datalist = [] json_settings_file = OUTPUT_FOLDER + '/stft_settings.json' json_dict = {'N': N, 'M': M, 'H': H, 'w': window, 'phase_range': [0, 2 * np.pi]} print("Loading files in {}".format(FOLDER_LIST)) for folder in FOLDER_LIST: if not os.path.exists(folder): raise InvalidPathError("{} not found".format(folder)) file_list = os.listdir(folder) for audio_file in file_list: if audio_file[-3:] not in ['wav', 'aif']: print('Skipping {}'.format(audio_file)) continue print("Processing {}...".format(audio_file), end='') file, sr = soundfile.read(folder + '/' + audio_file) freq_range = [0, sr / 2] if sr!=SAMPLE_RATE: warnings.warn('Sample rate is not 44100Hz') print('File length: ', file.size) json_dict['sample_rate'] = sr json_dict['freq_range'] = freq_range mX, pX = STFT.stftAnal(file, w, N, H) mX[mX <= AMPLITUDE_THRESHOLD] = AMPLITUDE_THRESHOLD # # For SHORT_TEST: # mX = mX[:20, :] # pX = pX[:20, :] # For plotting the spectrogram of the signal plotting.spectogram_plot(mX, pX, M, N, H, sr, show=False, filepath=PLOT_FOLDER + '/{}'.format(audio_file[:-4])) # Process the frequencies, magnitudes and phases to be normalised in the range [0,1] #TODO: we might want to calculate this across all of the training data instead of file by file #TODO: This will need modifying in the json file as well min_mX = np.min(mX) max_mX = np.max(mX) json_dict['mag_range'] = [min_mX, max_mX] json_dict['mag_normalised_range'] = mX_norm_range json_dict['phase_normalised_range'] = pX_norm_range mX = (mX - min_mX) / (max_mX - min_mX) # Between 0 and 1 mX = (mX * (mX_norm_range[1] - mX_norm_range[0])) + mX_norm_range[0] pX = np.mod(pX, 2 * np.pi) / (2 * np.pi) # Between 0 and 1 pX = (pX * (pX_norm_range[1] - pX_norm_range[0])) + pX_norm_range[0] # Check the data has been normalised correctly assert (mX <= mX_norm_range[1]).all() and (mX >= mX_norm_range[0]).all() assert (pX <= pX_norm_range[1]).all() and (pX >= pX_norm_range[0]).all() output_path = OUTPUT_FOLDER + '/{}/'.format(audio_file[:-4]) # Previously used file_count if not os.path.exists(output_path): os.makedirs(output_path) ## Save the numpy arrays separately - couldn't work out how to save and load multiple arrays np.save(output_path + 'mag', mX) np.save(output_path + 'phase', pX) datalist.append([mX, pX]) print('Saved as {}'.format(output_path)) file_count += 1 create_json(json_settings_file, json_dict)