""" # 16 44 117 119 143 151 206 242 267 290 308 354 380 410 421 456 517 573 598 622 638 663 676 688 715 725 749 752 820 851 866 922 # start at 16 since that's the start of a chord sequence (could choose any of the numbers above) for offset in [16, 44, 308, 421, 517, 752, 866]: print("sampling offset {}".format(offset)) x_rec_i = x_rec[offset:offset + num_each] x_ts = piano_roll_imlike_to_image_array(x_rec_i, 0.25) # cut off zero padding on the vertical axis x_ts = x_ts[:, :35] if not os.path.exists("samples"): os.mkdir("samples") save_image_array( x_ts, "samples/multichannel_pixel_cnn_gen_{}_seed_{}_temp_{}.png".format( offset, args.seed, args.temp)) sample_flat_idx = flat_idx[-1000:] p = sample_flat_idx[offset:offset + num_each] note_to_norm_kv = d2["note_to_norm_kv"] midi_to_norm_kv = d2["midi_to_norm_kv"] # EEE more than 1 value maps to 0 in these kv lookups! midi_to_norm_lu = { int(k): int(v) + 1 if k != 0 else 0 for k, v in midi_to_norm_kv[p[0][0]] } norm_to_midi_lu = {v: k for k, v in midi_to_norm_lu.items()} note_to_norm_lu = {
feed = {vs.images: x, vs.bn_flag: 1.} outs = [vs.z_e_x, vs.z_q_x, vs.z_i_x, vs.x_tilde] r = sess.run(outs, feed_dict=feed) x_rec = r[-1] x_rec[x_rec > 0.5] = 1. x_rec[x_rec <= 0.5] = 0. x_rec = x_rec.sum(axis=-1)[..., None] x = x.sum(axis=-1)[..., None] x_rec[x_rec > 0.5] = 1. x_rec[x_rec <= 0.5] = 0. x[x > 0.5] = 1. x[x <= 0.5] = 0. x = x[:16] x_rec = x_rec[:16] diff = abs(x - x_rec) diff[diff > 0] = 1. # figure out piano roll / colored plot? print("writing out multichannel_rec.png, multichannel_orig.png, multichannel_diff.png") save_image_array(x_rec, "multichannel_rec.png") save_image_array(x, "multichannel_orig.png") save_image_array(diff, "multichannel_diff.png")
saver = tf.train.import_meta_graph(model_path + '.meta') saver.restore(sess, model_path) fields = ['images', 'bn_flag', 'z_e_x', 'z_q_x', 'z_i_x', 'x_tilde'] vs = namedtuple('Params', fields)(*[tf.get_collection(name)[0] for name in fields]) x = image_data[:32] feed = {vs.images: x, vs.bn_flag: 1.} outs = [vs.z_e_x, vs.z_q_x, vs.z_i_x, vs.x_tilde] r = sess.run(outs, feed_dict=feed) x_rec = r[-1] x_rec[x_rec > 0.5] = 1. x_rec[x_rec <= 0.5] = 0. #from IPython import embed; embed(); raise ValueError() rr = quantized_imlike_to_image_array(image_data[:16], 0.25) save_image_array(rr, "orig_subroll_multichannel.png", resize_multiplier=(4, 1), gamma_multiplier=7, flat_wide=True) rr = quantized_imlike_to_image_array(x_rec[:16], 0.25) save_image_array(rr, "rec_subroll_multichannel.png", resize_multiplier=(4, 1), gamma_multiplier=7, flat_wide=True) print("wrote out 'orig_subroll_multichannel.png'") print("wrote out 'rec_subroll_multichannel.png'")
if lcr_i[0] == 0: print(n) """ # 16 44 117 119 143 151 206 242 267 290 308 354 380 410 421 456 517 573 598 622 638 663 676 688 715 725 749 752 820 851 866 922 # start at 16 since that's the start of a chord sequence (could choose any of the numbers above) for offset in [16, 44, 308, 421, 517, 752, 866]: print("sampling offset {}".format(offset)) x_rec_i = x_rec[offset:offset + num_each] x_ts = piano_roll_imlike_to_image_array(x_rec_i, 0.25) if not os.path.exists("samples"): os.mkdir("samples") save_image_array( x_ts, "samples/pianoroll_multichannel_pixel_cnn_markovm4_chords_gen_{}_seed_{}_temp_{}.png" .format(offset, args.seed, args.temp)) sample_flat_idx = flat_idx[-1000:] p = sample_flat_idx[offset:offset + num_each] satb_midi = [[], [], [], []] satb_notes = [[], [], [], []] for n in range(len(x_rec_i)): measure_len = x_rec_i[n].shape[1] # 96 x 48 measure in events = {} for v in range(x_rec_i.shape[-1]): all_up = zip(*np.where(x_rec_i[n][..., v])) time_ordered = [
flat_scalenotes = [sn for sg in copy.deepcopy(d2["scalenotes"]) for sn in sg] sample_scalenotes = flat_scalenotes[-1000:] """ # find some start points for n in range(len(sample_labels)): lcr_i = label_to_lcr[sample_labels[n, 0]] if lcr_i[0] == 0: print(n) """ # 16 44 117 119 143 151 206 242 267 290 308 354 380 410 421 456 517 573 598 622 638 663 676 688 715 725 749 752 820 851 866 922 # start at 16 since that's the start of a chord sequence (could choose any of the numbers above) for offset in [16, 44, 308, 421, 517, 752, 866]: print("sampling offset {}".format(offset)) x_rec_i = x_rec[offset:offset + num_each] save_image_array(x_rec_i, "pixel_cnn_gen_{}.png".format(offset)) sample_flat_idx = flat_idx[-1000:] p = sample_flat_idx[offset:offset + num_each] note_to_norm_kv = d2["note_to_norm_kv"] midi_to_norm_kv = d2["midi_to_norm_kv"] # EEE more than 1 value maps to 0 in these kv lookups! midi_to_norm_lu = { int(k): int(v) + 1 if k != 0 else 0 for k, v in midi_to_norm_kv[p[0][0]] } norm_to_midi_lu = {v: k for k, v in midi_to_norm_lu.items()} note_to_norm_lu = { k: int(v) + 1 if k != "R" else 0
scalenotes = all_scalenotes[iii] for mi in range(len(all_notes[iii])): measure = all_notes[iii][mi] midi_m = notes_to_midi(measure) im = np.zeros((v_imsize, h_imsize, 4)).astype("uint8") for v in range(len(midi_m[0])): for t in range(len(midi_m)): # skip rest items, we will infer them in decoding if midi_m[t][v] != 0: im[:, t, v] = oh_lu[midi_m[t][v]] im = im.astype("float32") measures_as_images.append(im) # track in case something is skipped all_measures_as_images.append(measures_as_images) all_scalenotes_save.append([scalenotes] * len(measures_as_images)) all_chordnames_save.append(all_chordnames[iii]) """ cc = np.concatenate([am[None] for am in all_measures_as_images[0][:16]], axis=0) rr = piano_roll_imlike_to_image_array(cc, 0.25) save_image_array(rr, "tmppr.png") """ final = {} final["measures_as_images"] = all_measures_as_images final["scalenotes"] = all_scalenotes_save final["chordnames"] = all_chordnames_save print("Dumping to music_data_pianoroll_multichannel.npz") np.savez("music_data_pianoroll_multichannel.npz", **final)
import numpy as np from tfbldr.datasets import piano_roll_imlike_to_image_array from tfbldr.datasets import save_image_array import sys fname = sys.argv[1] d = np.load(fname) xr = d["pr"][8:16] rr = piano_roll_imlike_to_image_array(xr, 0.25, background="white") rr = rr[:, 40:80] pngname = "samples/{}.png".format(fname.split("/")[-1].split(".")[0]) print("saving {}".format(pngname)) save_image_array(rr, pngname, resize_multiplier=(4, 1), gamma_multiplier=7, flat_wide=True) print("image complete") from IPython import embed embed() raise ValueError()
feed = {vs.images: x, vs.bn_flag: 1.} outs = [vs.z_e_x, vs.z_q_x, vs.z_i_x, vs.x_tilde] r = sess.run(outs, feed_dict=feed) x_tilde_lins = r[-1] # gumbel sample to get mixture idx = np.argmax(x_tilde_lins + -np.log(-np.log(random_state.uniform(1E-5, 1-1E-5, x_tilde_lins.shape))), axis=-1) x_rec = idx """ # components, make it one hot idx_oh = np.eye(x_tilde_lins.shape[-1])[idx.ravel()].reshape(idx.shape + (-1,)) # get rid of the useless channel idx_oh = idx_oh[:, 0] # transpose to h, w, c, time on horizontal x_rec = idx_oh.transpose(0, 3, 1, 2) """ x = np.array(x).astype("int32") x_rec = x_rec[:4] x = x[:4] x_img = [plot_piano_roll(x[i][0], 0.25) for i in range(len(x))] x_rec_img = [plot_piano_roll(x_rec[i][0], 0.25) for i in range(len(x_rec))] save_image_array(np.array(x_img)[0][None], "original_1d.png", rescale=False) save_image_array(np.array(x_rec_img)[0][None], "reconstructed_1d.png", rescale=False) # convert back into real pitches? # jk its bad tho
image_data = image_data[-1000:] #shuffle_random = np.random.RandomState(112) #shuffle_random.shuffle(image_data) # get images from the held out valid set with tf.Session(config=config) as sess: saver = tf.train.import_meta_graph(model_path + '.meta') saver.restore(sess, model_path) fields = ['images', 'bn_flag', 'z_e_x', 'z_q_x', 'z_i_x', 'x_tilde'] vs = namedtuple('Params', fields)(*[tf.get_collection(name)[0] for name in fields]) x = image_data[:32] feed = {vs.images: x, vs.bn_flag: 1.} outs = [vs.z_e_x, vs.z_q_x, vs.z_i_x, vs.x_tilde] r = sess.run(outs, feed_dict=feed) x_rec = r[-1] x_rec[x_rec > 0.5] = 1. x_rec[x_rec <= 0.5] = 0. x = x[:16] x_rec = x_rec[:16] diff = abs(x - x_rec) diff[diff > 0] = 1. print("writing out rec.png, orig.png, diff.png") save_image_array(x_rec, "rec.png") save_image_array(x, "orig.png") save_image_array(diff, "diff.png")
np.sum(np.abs(np.diff(np.where(x_rec[i][:, :, 0])[0]))) for i in range(len(x_rec)) ] simul = [np.max(np.sum(x_rec[i][:, :, 0], axis=0)) for i in range(len(x_rec))] non_boring_indices = [ i for i in range(len(x_rec)) if 0 < delta_counts[i] <= 12 and simul[i] <= 1 ] # includes rest measures, rests arent boring x_rec = x_rec[np.array(non_boring_indices)] if len(x_rec) < (3 * num_to_plot): raise ValueError("Removed too many boring ones, set num_to_plot lower") x_rec = x_rec[:3 * num_to_plot] x_rec = np.concatenate((x_rec[::3], x_rec[1::3], x_rec[2::3]), axis=-1) save_image_array(x_rec, "samp_vq_pixel_cnn_music_bxe.png") ce = copy.deepcopy(d1["centers"]) # only use bottom 3 voices ce = [cei for cei in ce if len(cei) == 4] # find all the ones with 0 or 1 rest non_rest = [i for i in range(len(ce)) if sum(ce[i] == 0) == 0] start_chunks = [ i for i in range(len(non_rest) - num_to_plot) if np.max(np.diff(non_rest[i:i + num_to_plot])) == 1 ] random_state.shuffle(start_chunks) random_state.shuffle(start_chunks) ii = non_rest[start_chunks[0]] skeleton = np.array([sk for sk in ce[ii:(ii + num_to_plot)]]) joined = np.zeros((len(x_rec), x_rec.shape[2], skeleton.shape[-1]))