def generate(length, conditionOn = None): sess = tf.Session() sr = g.options["sample_rate"] with tf.variable_scope("GEN/"): Generator = model.WaveNetModel(1, dilations=g.options["dilations"], filter_width=g.options["filter_width"], residual_channels=g.options["residual_channels"], dilation_channels=g.options["dilation_channels"], skip_channels=g.options["skip_channels"], quantization_channels=g.options["quantization_channels"], use_biases=g.options["use_biases"], scalar_input=g.options["scalar_input"], initial_filter_width=g.options["initial_filter_width"], #global_condition_channels=g.options["noise_dimensions"], global_condition_cardinality=None, histograms=True, add_noise=True) # Get the graph variables_to_restore = { var.name[:-2]: var for var in tf.global_variables() if not ('state_buffer' in var.name or 'pointer' in var.name) and "GEN/" in var.name} #print(len(variables_to_restore)) saver = tf.train.Saver(variables_to_restore) print("Restoring model") ckpt = tf.train.get_checkpoint_state(generatedir) saver.restore(sess, ckpt.model_checkpoint_path) print("Model {} restored".format(ckpt.model_checkpoint_path)) sampleph = tf.placeholder(tf.float32, [1,Generator.receptive_field,1]) noiseph = tf.placeholder(tf.float32, [1,1,g.options["noise_dimensions"]]) encoded = ops.mu_law_encode(sampleph, g.options["quantization_channels"]) sample = tf.placeholder(tf.float32) one_hot = Generator._one_hot(encoded) next_sample = Generator._create_network(one_hot, None, noise = noiseph) arg_maxes = tf.nn.softmax(next_sample, axis=2) decoded = ops.mu_law_decode(sample, g.options["quantization_channels"]) #print(np.shape(arg_maxes)) # Sampling with argmax atm #intermed = tf.sign(tf.reduce_max(arg_maxes, axis=2, keepdims=True)-arg_maxes) #one_hot = (intermed-1)*(-1) #fake_sample = tf.concat((tf.slice(encoded, [0,1,0], [-1,-1,-1]), appendph),1) generated = [] if conditionOn is not None: audio, sr = librosa.load(conditionOn, g.options["sample_rate"], mono=True) start = np.random.randint(0,len(audio)-Generator.receptive_field) fakey = audio[start:start+Generator.receptive_field] #audio_start = fakey #fakey = sess.run(audio) #generated = fakey.tolist() else: fakey = [0.0] * (Generator.receptive_field-1) fakey.append(np.random.uniform()) #audio_start=[] noise = np.random.normal(g.options["noise_mean"], g.options["noise_variance"], size=g.options["noise_dimensions"]).reshape(1,1,-1) # REMOVE THIS LATER #noise = np.zeros((1,1,100)) fakey = np.reshape(fakey, [1,-1,1]) gen = sess.run(encoded, feed_dict={sampleph : fakey}) generated = gen#[0,:,0].tolist() fakey = sess.run(one_hot, feed_dict={sampleph : fakey}) print(np.shape(generated)) bar = progressbar.ProgressBar(maxval=length, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() for i in range(length): prediction = sess.run(arg_maxes, feed_dict={one_hot : fakey, noiseph : noise}) #fakey = sess.run(fake_sample, feed_dict={encoded : fakey, appendph : prediction}) newest_sample = prediction[-1,-1,:] #Sample from newest_sample #print(newest_sample) #np.seterr(divide='ignore') #scaled_prediction = np.log(newest_sample) / 0.9#args.temperature #scaled_prediction = (scaled_prediction - # np.logaddexp.reduce(scaled_prediction)) #scaled_prediction = np.exp(scaled_prediction) #np.seterr(divide='warn') #print(np.sum(newest_sample - scaled_prediction)) # Prediction distribution at temperature=1.0 should be unchanged after # scaling. #print(np.argmax(scaled_prediction)) scaled_prediction = newest_sample sample = np.random.choice( np.arange(g.options["quantization_channels"]), p=scaled_prediction) #sample = np.argmax(newest_sample) generated = np.append(generated, np.reshape(sample,[1,1,1]), 1) fakey = sess.run(one_hot, feed_dict={encoded : generated[:,-Generator.receptive_field:,:]}) bar.update(i+1) bar.finish() generated=np.reshape(generated,[-1]) decoded = sess.run(ops.mu_law_decode(generated, g.options["quantization_channels"])) generated = np.array(decoded) librosa.output.write_wav("Generated/gangen.wav", generated, sr, norm=True)
def create_histograms(layerNames, layerIndexes): activations = {} summaries = [] coord = tf.train.Coordinator() sess = tf.Session() writer = tf.summary.FileWriter("histograms") with tf.variable_scope("GEN/"): Generator = model.WaveNetModel(g.options["batch_size"], dilations=g.options["dilations"], filter_width=g.options["filter_width"], residual_channels=g.options["residual_channels"], dilation_channels=g.options["dilation_channels"], skip_channels=g.options["skip_channels"], quantization_channels=g.options["quantization_channels"], use_biases=g.options["use_biases"], scalar_input=g.options["scalar_input"], initial_filter_width=g.options["initial_filter_width"], global_condition_cardinality=None, histograms=False, add_noise=True) variables_to_restore = { var.name[:-2]: var for var in tf.global_variables() if not (('state_buffer' in var.name or 'pointer' in var.name) and "GEN/" in var.name) } #print(len(variables_to_restore)) # Data reading l = loader.AudioReader("maestro-v1.0.0/2017", g.options["sample_rate"], Generator.receptive_field, coord, stepSize=1, sampleSize=g.options["sample_size"], silenceThreshold=0.1) threads = tf.train.start_queue_runners(sess=sess, coord=coord) l.startThreads(sess) saver = tf.train.Saver(variables_to_restore) print("Restoring model") ckpt = tf.train.get_checkpoint_state(logdir) saver.restore(sess, ckpt.model_checkpoint_path) print("Model {} restored".format(ckpt.model_checkpoint_path)) #sampleph = tf.placeholder(tf.float32, [1,Generator.receptive_field,1]) deque = l.deque(g.options["batch_size"]) zeros = np.zeros((1,1,g.options["noise_dimensions"])) encoded = ops.mu_law_encode(deque, g.options["quantization_channels"]) one_hot = Generator._one_hot(encoded) for name in layerNames: activations[name] = {} for i in layerIndexes: #activations[name][i] = get_causal_activations(Generator._get_layer_activation(name, i, one_hot, None, noise=zeros), i) activations[name][i] = Generator._get_layer_activation(name, i, one_hot, None, noise=zeros) #for l in range(g.options["residual_channels"]): # tf.summary.histogram(name + "_layer_" + str(i) + "_unit_" + str(0), tf.reshape(activations[name][i][:,:,l], (-1,1))) #units = tf.shape(activations[name][i])[2] #def add(a): # tf.summary.histogram(name + "_layer_" + str(i) + "_unit_" + str(a), activations[name][i][:,:,a]) # tf.add(a,1) # return tf.constant(0) #stop = lambda a: tf.less(a, units) #tf.while_loop(stop, add, [0]) #summaries = tf.summary.merge_all() acts = [] for i in range(500): sess.run(deque) for i in range(1000): #summ = sess.run(summaries) act = sess.run(activations['dilated_stack'][layerIndexes[0]])[0,:,13] #print(act) print(i/1000) acts = np.concatenate((acts, act)) #writer.add_summary(summ, global_step = 0) plt.hist(acts, 32) plt.xlabel("Activation") plt.ylabel("Frequency") plt.show() coord.request_stop() coord.join(threads)
def ablate(layerNames, layerIndexes): sm = {} activations = {} means = {} variations = {} counters = {} sum2 = {} batch_size = {} sum2save = {} sum2saveop = {} meanssaveop = {} counterssaveop = {} variationssaveop = {} coord = tf.train.Coordinator() sess = tf.Session() with tf.variable_scope("GEN/"): Generator = model.WaveNetModel(g.options["batch_size"], dilations=g.options["dilations"], filter_width=g.options["filter_width"], residual_channels=g.options["residual_channels"], dilation_channels=g.options["dilation_channels"], skip_channels=g.options["skip_channels"], quantization_channels=g.options["quantization_channels"], use_biases=g.options["use_biases"], scalar_input=g.options["scalar_input"], initial_filter_width=g.options["initial_filter_width"], global_condition_cardinality=None, histograms=False, add_noise=True) variables_to_restore = { var.name[:-2]: var for var in tf.global_variables() if not (('state_buffer' in var.name or 'pointer' in var.name) and "GEN/" in var.name) } # Data reading l = loader.AudioReader("maestro-v1.0.0/2017", g.options["sample_rate"], Generator.receptive_field, coord, stepSize=1, sampleSize=g.options["sample_size"], silenceThreshold=0.1) threads = tf.train.start_queue_runners(sess=sess, coord=coord) l.startThreads(sess) saver = tf.train.Saver(variables_to_restore) print("Restoring model") ckpt = tf.train.get_checkpoint_state(logdir) saver.restore(sess, ckpt.model_checkpoint_path) print("Model {} restored".format(ckpt.model_checkpoint_path)) deque = l.deque(g.options["batch_size"]) zeros = np.zeros((1,1,g.options["noise_dimensions"])) encoded = ops.mu_law_encode(deque, g.options["quantization_channels"]) one_hot = Generator._one_hot(encoded) to_save = {} # Create dicts for name in layerNames: sm[name] = {} activations[name] = {} means[name] = {} variations[name] = {} counters[name] = {} sum2[name] = {} batch_size[name] = {} sum2save[name] = {} sum2saveop[name] = {} meanssaveop[name] = {} counterssaveop[name] = {} variationssaveop[name] = {} for i in layerIndexes: #activations[name][i] = get_causal_activations(Generator._get_layer_activation(name, i, one_hot, None, noise=zeros), i) activations[name][i] = Generator._get_layer_activation(name, i, one_hot, None, noise=zeros) sm[name][i] = tf.reduce_sum(activations[name][i], axis=[0,1]) sum2[name][i] = tf.reduce_sum(tf.square(activations[name][i]), axis=[0,1]) batch_size[name][i] = tf.to_float(tf.shape(activations[name][i])[0] + tf.shape(activations[name][i])[1]) # Save variables sum2save[name][i] = tf.Variable(tf.zeros(tf.shape(sm[name][i])), name="ABL/sum2_"+name+str(i)) to_save["ABL/sum2_"+name+str(i)] = sum2save[name][i] counters[name][i] = tf.Variable(0,name="ABL/counter_"+name+str(i),dtype=tf.float32) to_save["ABL/counter_"+name+str(i)] = counters[name][i] means[name][i] = tf.Variable(tf.zeros(tf.shape(sm[name][i])), name="ABL/mean_"+name+str(i)) to_save["ABL/mean_"+name+str(i)] = means[name][i] variations[name][i] = tf.Variable(tf.zeros(tf.shape(sm[name][i])), name="ABL/var_"+name+str(i)) to_save["ABL/var_"+name+str(i)] = variations[name][i] sum2saveop[name][i] = tf.assign(sum2save[name][i], sum2save[name][i] + sm[name][i]) meanssaveop[name][i] = tf.assign(means[name][i], ((means[name][i] * counters[name][i]) + sm[name][i]) / (counters[name][i] + batch_size[name][i] ) ) counterssaveop[name][i] = tf.assign(counters[name][i], counters[name][i] + batch_size[name][i]) variationssaveop[name][i] = tf.assign(variations[name][i], tf.sqrt(tf.abs((sum2save[name][i] / counters[name][i]) - tf.square(means[name][i])))) sess.run(tf.global_variables_initializer()) print("Dict created") print("Restoring previous statistics") ablatesaver = tf.train.Saver(to_save) ablateckpt = tf.train.get_checkpoint_state(ablatelogs) if ablateckpt is not None: optimistic_restore(sess, ablateckpt.model_checkpoint_path, tf.get_default_graph()) print("Statistics restored") # Eat up some so that statistics arent gathered at the beginning for _ in range(1000): sess.run(deque) # Gather statistics # How much statistics do we need? Preferably a lot :) length = 10000 bar = progressbar.ProgressBar(maxval=length, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() for k in range(length): for name in layerNames: for i in layerIndexes: act = sess.run(activations[name][i]) sess.run([sum2saveop[name][i], meanssaveop[name][i], counterssaveop[name][i]], feed_dict={activations[name][i] : act}) sess.run(variationssaveop[name][i]) bar.update(k+1) bar.finish() model_name = 'ablate.ckpt' checkpoint_path = os.path.join(ablatelogs, model_name) ablatesaver.save(sess, checkpoint_path) coord.request_stop() coord.join(threads)
def investigate(layerNames, layerIndexes, conditionOn, save_index=6): vis = visul.Visualizer(2*16) means = {} variations = {} ablations = {} coord = tf.train.Coordinator() sess = tf.Session() to_restore = {} with tf.variable_scope("GEN/"): Generator = model.WaveNetModel(1, dilations=g.options["dilations"], filter_width=g.options["filter_width"], residual_channels=g.options["residual_channels"], dilation_channels=g.options["dilation_channels"], skip_channels=g.options["skip_channels"], quantization_channels=g.options["quantization_channels"], use_biases=g.options["use_biases"], scalar_input=g.options["scalar_input"], initial_filter_width=g.options["initial_filter_width"], global_condition_cardinality=None, histograms=False, add_noise=True) variables_to_restore = { var.name[:-2]: var for var in tf.global_variables() if not (('state_buffer' in var.name or 'pointer' in var.name) and "GEN/" in var.name) } saver = tf.train.Saver(variables_to_restore) print("Restoring model") ckpt = tf.train.get_checkpoint_state(logdir) saver.restore(sess, ckpt.model_checkpoint_path) print("Model {} restored".format(ckpt.model_checkpoint_path)) sampleph = tf.placeholder(tf.float32, [1,Generator.receptive_field,1]) controlph = tf.placeholder(tf.float32, [1, None, g.options["residual_channels"]]) eph = tf.placeholder(tf.float32, [1, None, g.options["residual_channels"]]) noiseph = tf.placeholder(tf.float32, [1,1,g.options["noise_dimensions"]]) encoded = ops.mu_law_encode(sampleph, g.options["quantization_channels"]) sample = tf.placeholder(tf.float32) ablationsholder = {} ablationsholder[layerNames[0]] = {} ablationsholder[layerNames[0]][layerIndexes[0]] = controlph eholder = {} eholder[layerNames[0]] = {} eholder[layerNames[0]][layerIndexes[0]] = eph one_hot = Generator._one_hot(encoded) controlled_sample = Generator._create_ablated_network(one_hot, None, ablationsholder, eholder, noise=noiseph) c_arg_maxes = tf.nn.softmax(controlled_sample, axis=2) next_sample = Generator._create_network(one_hot, None, noise = noiseph) arg_maxes = tf.nn.softmax(next_sample, axis=2) decoded = ops.mu_law_decode(sample, g.options["quantization_channels"]) #print(np.shape(arg_maxes)) # Sampling with argmax atm #intermed = tf.sign(tf.reduce_max(arg_maxes, axis=2, keepdims=True)-arg_maxes) #one_hot = (intermed-1)*(-1) #fake_sample = tf.concat((tf.slice(encoded, [0,1,0], [-1,-1,-1]), appendph),1) # Start audio audio, sr = librosa.load(conditionOn, g.options["sample_rate"], mono=True) # This should be previously generated part of the experiment #start = np.random.randint(0,len(audio)-Generator.receptive_field) #fakey = audio[start:start+Generator.receptive_field] fakey = audio[-Generator.receptive_field:] print(np.shape(fakey)) noise = np.random.normal(g.options["noise_mean"], g.options["noise_variance"], size=g.options["noise_dimensions"]).reshape(1,1,-1) for name in layerNames: means[name] = {} ablations[name] = {} variations[name] = {} for i in layerIndexes: #ablations[name][i] = get_causal_activations(Generator._get_layer_activation(name, i, one_hot, None, noise=zeros),i) ablations[name][i] = Generator._get_layer_activation(name, i, one_hot, None, noise=noiseph) abl = tf.reduce_mean(ablations[name][i], axis=[0,1]) means[name][i] = tf.Variable(tf.zeros(tf.shape(abl)), name="ABL/mean_"+name+str(i)) to_restore["ABL/mean_"+name+str(i)] = means[name][i] variations[name][i] = tf.Variable(tf.zeros(tf.shape(abl)), name="ABL/var_"+name+str(i)) to_restore["ABL/var_"+name+str(i)] = variations[name][i] print("Restoring previous statistics") ablatesaver = tf.train.Saver(to_restore) ablateckpt = tf.train.get_checkpoint_state(ablatelogs) if ablateckpt is not None: optimistic_restore(sess, ablateckpt.model_checkpoint_path, tf.get_default_graph()) print("Statistics restored") name = layerNames[0] i = layerIndexes[0] limits = means[name][i] + variations[name][i] mask = ablations[name][i] > limits mask_ph = tf.placeholder(tf.bool) causal_ph = tf.placeholder(tf.float32) causal_counter = tf.cast(mask, tf.float32) + causal_ph stillactive = tf.logical_and(mask, mask_ph) fakey = np.reshape(fakey, [1,-1,1]) generated = sess.run(encoded, feed_dict={sampleph : fakey}) fakey = sess.run(one_hot, feed_dict={sampleph : fakey}) sl = Generator.receptive_field length=sl*1+1 bar = progressbar.ProgressBar(maxval=length, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() prevNote = "" counter = 0 act = sess.run(ablations[name][i], feed_dict={one_hot : fakey, noiseph : noise}) print(np.shape(act)) causal_count = sess.run(tf.cast(tf.zeros_like(mask), tf.bool), feed_dict={ablations[name][i] : act}) for k in range(length): act, prediction = sess.run([ablations[name][i], arg_maxes], feed_dict={one_hot : fakey, noiseph : noise}) #fakey = sess.run(fake_sample, feed_dict={encoded : fakey, appendph : prediction}) newest_sample = prediction[-1,-1,:] newmask = sess.run(mask, feed_dict={ablations[name][i] : act}) causal_count = sess.run(causal_counter, feed_dict={causal_ph : causal_count, mask:newmask}) #print(sess.run(tf.reduce_sum(causal_count, axis=[0,1]))) sample = np.random.choice( np.arange(g.options["quantization_channels"]), p=newest_sample) #sample = np.argmax(newest_sample) generated = np.append(generated, np.reshape(sample,[1,1,1]), 1) if counter % sl == 0 and counter != 0: decoded = sess.run(ops.mu_law_decode(generated[0,-sl:,0], g.options["quantization_channels"])) note = vis.detectNote(decoded, g.options["sample_rate"]) amp = vis.loudness(decoded) print("note: %s, amp %0.4f"%(note, amp)) if prevNote != note: #and amp > 1.: #print("note: %s, amp %0.4f"%(note, amp)) prevNote = note break counter += 1 fakey = sess.run(one_hot, feed_dict={encoded : generated[:,-Generator.receptive_field:,:]}) bar.update(k+1) bar.finish() save_ctrl=np.reshape(generated,[-1])[-sl:] decoded = sess.run(ops.mu_law_decode(save_ctrl, g.options["quantization_channels"])) save_ctrl = np.array(decoded) librosa.output.write_wav("Generated/Comparision/to_copy_"+str(save_index)+".wav", save_ctrl, sr, norm=True) causal_count = causal_count / length print(sess.run(tf.reduce_sum(causal_count, axis=[0,1]))) print(np.shape(act)) ablat = sess.run(tf.tile(tf.reshape(limits, [1,1,-1]), [1,np.shape(act)[1],1])) print("Target == " + note) target=note target_freq = vis.getFreq(target) # Get new bit of audio for the generator #start = np.random.randint(0,len(audio)-Generator.receptive_field) #fakey = audio[start:start+Generator.receptive_field] fakey = audio[-Generator.receptive_field:] fakey = np.reshape(fakey, [1,-1,1]) generated = sess.run(encoded, feed_dict={sampleph : fakey}) uncontrolled_generated = generated fakey = sess.run(one_hot, feed_dict={sampleph : fakey}) uncontrolled = fakey counter = 0 np.random.seed() # Set seed for k in range(length): c_prediction = sess.run(c_arg_maxes, feed_dict={one_hot : fakey, ablationsholder[name][i] : ablat, eholder[name][i] : causal_count, noiseph : noise}) prediction = sess.run(arg_maxes, feed_dict={one_hot : uncontrolled, noiseph : noise}) c_newest_sample = c_prediction[-1,-1,:] newest_sample = prediction[-1,-1,:] c_sample = np.random.choice( np.arange(g.options["quantization_channels"]), p=c_newest_sample) sample = np.random.choice( np.arange(g.options["quantization_channels"]), p=newest_sample) #sample = np.argmax(newest_sample) generated = np.append(generated, np.reshape(c_sample,[1,1,1]), 1) uncontrolled_generated = np.append(uncontrolled_generated, np.reshape(sample,[1,1,1]), 1) fakey = sess.run(one_hot, feed_dict={encoded : generated[:,-Generator.receptive_field:,:]}) uncontrolled = sess.run(one_hot, feed_dict={encoded : uncontrolled_generated[:,-Generator.receptive_field:,:]}) if counter % sl == 0 and counter != 0: decoded = sess.run(ops.mu_law_decode(generated[0,-sl:,0], g.options["quantization_channels"])) note = vis.detectNote(decoded, g.options["sample_rate"]) note_freq =vis.getFreq(note) tamp = vis.loudness(decoded) print("note: %s, amp %0.4f, freq error (abs): %0.4f"%(note, tamp, np.abs(target_freq-note_freq))) counter += 1 generated=np.reshape(generated,[-1]) decoded = sess.run(ops.mu_law_decode(generated, g.options["quantization_channels"])) generated = np.array(decoded) librosa.output.write_wav("Generated/Comparision/controlled_"+str(save_index)+".wav", generated, sr, norm=True) uncontrolled_generated=np.reshape(uncontrolled_generated,[-1]) u_decoded = sess.run(ops.mu_law_decode(uncontrolled_generated, g.options["quantization_channels"])) uncontrolled_generated = np.array(u_decoded) librosa.output.write_wav("Generated/Comparision/uncontrolled_"+str(save_index)+".wav", uncontrolled_generated, sr, norm=True) sess.close()
def feature_max(layerName, layerIndex, unit_index = None): sess = tf.Session() sr = g.options["sample_rate"] with tf.variable_scope("GEN/"): Generator = model.WaveNetModel(1, dilations=g.options["dilations"], filter_width=g.options["filter_width"], residual_channels=g.options["residual_channels"], dilation_channels=g.options["dilation_channels"], skip_channels=g.options["skip_channels"], quantization_channels=g.options["quantization_channels"], use_biases=g.options["use_biases"], scalar_input=g.options["scalar_input"], initial_filter_width=g.options["initial_filter_width"], #global_condition_channels=g.options["noise_dimensions"], global_condition_cardinality=None, histograms=True, add_noise=True) variables_to_restore = { var.name[:-2]: var for var in tf.global_variables() if not ('state_buffer' in var.name or 'pointer' in var.name) and "GEN/" in var.name} #print(len(variables_to_restore)) saver = tf.train.Saver(variables_to_restore) print("Restoring model") ckpt = tf.train.get_checkpoint_state(logdir) saver.restore(sess, ckpt.model_checkpoint_path) print("Model {} restored".format(ckpt.model_checkpoint_path)) sampleph = tf.placeholder(tf.float32, [1,Generator.receptive_field,1]) zeros = np.zeros((1,1,g.options["noise_dimensions"])) encoded = ops.mu_law_encode(sampleph, g.options["quantization_channels"]) one_hot = Generator._one_hot(encoded) to_optimise = Generator._get_layer_activation(layerName, layerIndex, one_hot, None, noise = zeros) if unit_index is not None and unit_index < np.shape(to_optimise)[2]: to_optimise = to_optimise[:,:,unit_index]; print("to_optimise shape") print(np.shape(to_optimise)) gs = tf.gradients(to_optimise, one_hot)[0] #prob_dist = np.random.randint(0, g.options["quantization_channels"], size= (1,Generator.receptive_field)) # Start with random noise #prob_dist = np.ones((1,Generator.receptive_field, g.options["quantization_channels"])) / (g.options["quantization_channels"]) prob_dist = softmax(np.random.random_sample((1, Generator.receptive_field, g.options["quantization_channels"]))) length = 2048 bar = progressbar.ProgressBar(maxval=length, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() for i in range(length): #inp = sampleFrom(prob_dist) grads = sess.run(gs, feed_dict = {one_hot : prob_dist}) #print(np.shape(grads)) prob_dist += 0.01*grads prob_dist = softmax(prob_dist) #prob_dist = prob_dist - np.min(prob_dist, axis=2, keepdims=True) #prob_dist /= np.sum(prob_dist, axis=2, keepdims=True) #print(np.shape(prob_dist)) bar.update(i+1) bar.finish() print(prob_dist) prob_dist = softmax(prob_dist) max_path = np.reshape(np.argmax(prob_dist, axis=2),[-1]) #generated = np.argmax(sampleFrom(prob_dist),axis=2) #print(np.shape(generated)) #generated=np.reshape(generated,[-1]) #decoded = sess.run(ops.mu_law_decode(generated, g.options["quantization_channels"])) #generated = np.array(decoded) import matplotlib.pyplot as plt plt.imshow(np.reshape(prob_dist,( g.options["quantization_channels"],Generator.receptive_field))) title = layerName + ", layer: " + str(layerIndex) if unit_index is not None: title += ", channel : " +str(unit_index) plt.title(title) plt.show()
tf.reset_default_graph() elif mode == modes[5]: #HISTOGRAMS create_histograms(['dilated_stack'], [45]) elif mode == modes[2]: #TRAIN fw = tf.summary.FileWriter(logdir) coord = tf.train.Coordinator() with tf.variable_scope("GEN/"): Generator = model.WaveNetModel(g.options["batch_size"], dilations=g.options["dilations"], filter_width=g.options["filter_width"], residual_channels=g.options["residual_channels"], dilation_channels=g.options["dilation_channels"], skip_channels=g.options["skip_channels"], quantization_channels=g.options["quantization_channels"], use_biases=g.options["use_biases"], scalar_input=g.options["scalar_input"], initial_filter_width=g.options["initial_filter_width"], #global_condition_channels=g.options["noise_dimensions"], global_condition_cardinality=None, histograms=True, final_layer_size=g.options["quantization_channels"], add_noise=True) with tf.variable_scope("DIS/"): Discriminator = model.WaveNetModel(g.options["batch_size"], dilations=d.options["dilations"], filter_width=d.options["filter_width"], residual_channels=d.options["residual_channels"], dilation_channels=d.options["dilation_channels"],
def generate(length, conditionOn=None): filename = "generated" sess = tf.Session() sr = o.options["sample_rate"] with tf.variable_scope("GEN/"): Generator = model.WaveNetModel( 1, dilations=o.options["dilations"], filter_width=o.options["filter_width"], residual_channels=o.options["residual_channels"], dilation_channels=o.options["dilation_channels"], skip_channels=o.options["skip_channels"], quantization_channels=o.options["quantization_channels"], use_biases=o.options["use_biases"], scalar_input=o.options["scalar_input"], initial_filter_width=o.options["initial_filter_width"], global_condition_channels=o.options["noise_dimensions"], global_condition_cardinality=None, histograms=True) sampleph = tf.placeholder(tf.float32, [1, Generator.receptive_field, 1]) noiseph = tf.placeholder(tf.float32, [1, 1, o.options["noise_dimensions"]]) next_sample = Generator._create_network(sampleph, noiseph) fake_sample = tf.concat( (tf.slice(sampleph, [0, 1, 0], [-1, -1, -1]), next_sample), 1) # Get the graph variables_to_restore = { var.name[:-2]: var for var in tf.global_variables() if not ('state_buffer' in var.name or 'pointer' in var.name) and "GEN/" in var.name } #print(len(variables_to_restore)) saver = tf.train.Saver(variables_to_restore) print("Restoring model") ckpt = tf.train.get_checkpoint_state(logdir) #saver.restore(sess, ckpt.model_checkpoint_path) saver.restore(sess, "D:\\MAESTRO\\tfb_logs\\model.ckpt-35368") print("Model {} restored".format(ckpt.model_checkpoint_path)) generated = [] if conditionOn is not None: audio, sr = librosa.load(conditionOn, o.options["sample_rate"], mono=True) start = np.random.randint(0, len(audio) - Generator.receptive_field) fakey = audio[start:start + Generator.receptive_field] #fakey = sess.run(audio) #generated = fakey.tolist() else: fakey = [0.0] * (Generator.receptive_field - 1) fakey.append(np.random.uniform()) noise = np.random.normal(o.options["noise_mean"], o.options["noise_variance"], size=o.options["noise_dimensions"]).reshape( 1, 1, -1) fakey = np.reshape(fakey, [1, -1, 1]) bar = progressbar.ProgressBar(maxval=length, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() for i in range(length): #print(np.shape(fakey), np.shape(noise)) #sample = sess.run(next_sample, feed_dict={sampleph : fakey, noiseph : noise}) fakey = sess.run(fake_sample, feed_dict={ sampleph: fakey, noiseph: noise }) print(fakey[-1, -1, -1]) #print(np.shape(fakey)) generated.append(fakey[-1, -1, -1]) bar.update(i + 1) bar.finish() print(np.shape(generated)) print(type(generated[0])) generated = sess.run(ops.mu_law_decode(generated, 256)) generated = np.array(generated) print(np.shape(generated)) print(type(generated[0])) print(generated) librosa.output.write_wav("Generated/gangen.wav", generated, sr, norm=True) print("Wrote file " + filename + ".")
if True: print("Done") else: fw = tf.summary.FileWriter(logdir) coord = tf.train.Coordinator() with tf.variable_scope("GEN/"): Generator = model.WaveNetModel( o.options["batch_size"], dilations=o.options["dilations"], filter_width=o.options["filter_width"], residual_channels=o.options["residual_channels"], dilation_channels=o.options["dilation_channels"], skip_channels=o.options["skip_channels"], quantization_channels=o.options["quantization_channels"], use_biases=o.options["use_biases"], scalar_input=o.options["scalar_input"], initial_filter_width=o.options["initial_filter_width"], global_condition_channels=o.options["noise_dimensions"], global_condition_cardinality=None, histograms=True) with tf.variable_scope("DIS/"): Discriminator = model.WaveNetModel( o.options["batch_size"], dilations=o.options["dilations"], filter_width=o.options["filter_width"], residual_channels=o.options["residual_channels"], dilation_channels=o.options["dilation_channels"], skip_channels=o.options["skip_channels"],