def main(args): # setup logging log = get_logger(args.log) log(args) data = poly.load_data(poly.JSB_CHORALES) training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] N_train_data = len(training_seq_lengths) N_train_time_slices = float(torch.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) log("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches)) # how often we do validation/test evaluation during training val_test_frequency = 50 # the number of samples we use to do the evaluation n_eval_samples = 1 # package repeated copies of val/test data for faster evaluation # (i.e. set us up for vectorization) def rep(x): rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:] repeat_dims = [1] * len(x.size()) repeat_dims[0] = n_eval_samples return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose(1, 0).reshape(rep_shape) # get the validation/test data ready for the dmm: pack into sequences, etc. val_seq_lengths = rep(val_seq_lengths) test_seq_lengths = rep(test_seq_lengths) val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences), val_seq_lengths, cuda=args.cuda) test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences), test_seq_lengths, cuda=args.cuda) # instantiate the dmm dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) # setup optimizer adam_params = {"lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay} adam = ClippedAdam(adam_params) # setup inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # now we're going to define some functions we need to form the main training loop # saves the model and optimizer states to disk def save_checkpoint(): log("saving model to %s..." % args.save_model) torch.save(dmm.state_dict(), args.save_model) log("saving optimizer states to %s..." % args.save_opt) adam.save(args.save_opt) log("done saving model and optimizer checkpoints to disk.") # loads the model and optimizer states from disk def load_checkpoint(): assert exists(args.load_opt) and exists(args.load_model), \ "--load-model and/or --load-opt misspecified" log("loading model from %s..." % args.load_model) dmm.load_state_dict(torch.load(args.load_model)) log("loading optimizer states from %s..." % args.load_opt) adam.load(args.load_opt) log("done loading model and optimizer states.") # prepare a mini-batch and take a gradient step to minimize -elbo def process_minibatch(epoch, which_mini_batch, shuffled_indices): if args.annealing_epochs > 0 and epoch < args.annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = args.minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, training_data_sequences, training_seq_lengths, cuda=args.cuda) # do an actual gradient step loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths, annealing_factor) # keep track of the training loss return loss # helper function for doing evaluation def do_evaluation(): # put the RNN into evaluation mode (i.e. turn off drop-out if applicable) dmm.rnn.eval() # compute the validation and test loss n_samples many times val_nll = svi.evaluate_loss(val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths) / torch.sum(val_seq_lengths) test_nll = svi.evaluate_loss(test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths) / torch.sum(test_seq_lengths) # put the RNN back into training mode (i.e. turn on drop-out if applicable) dmm.rnn.train() return val_nll, test_nll # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint() ################# # TRAINING LOOP # ################# times = [time.time()] for epoch in range(args.num_epochs): # if specified, save model and optimizer states to disk every checkpoint_freq epochs if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint() # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = torch.randperm(N_train_data) # process each mini-batch; this is where we take gradient steps for which_mini_batch in range(N_mini_batches): epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices) # report training diagnostics times.append(time.time()) epoch_time = times[-1] - times[-2] log("[training epoch %04d] %.4f \t\t\t\t(dt = %.3f sec)" % (epoch, epoch_nll / N_train_time_slices, epoch_time)) # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: val_nll, test_nll = do_evaluation() log("[val/test epoch %04d] %.4f %.4f" % (epoch, val_nll, test_nll))
def main(args): # setup logging log = get_logger(args.log) log(args) if 0: data = generate_sine_wave_data() input_dim = 1 elif 1: data = generate_returns_data() input_dim = 1 # return else: data = poly.load_data(poly.JSB_CHORALES) input_dim = 88 training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] N_train_data = len(training_seq_lengths) N_train_time_slices = float(torch.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) N_test_data = len(test_seq_lengths) log("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches)) # how often we do validation/test evaluation during training val_test_frequency = 50 # the number of samples we use to do the evaluation n_eval_samples = 1 # package repeated copies of val/test data for faster evaluation # (i.e. set us up for vectorization) def rep(x): rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:] repeat_dims = [1] * len(x.size()) repeat_dims[0] = n_eval_samples return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose( 1, 0).reshape(rep_shape) # get the validation/test data ready for the dmm: pack into sequences, etc. val_seq_lengths = rep(val_seq_lengths) test_seq_lengths = rep(test_seq_lengths) val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences), val_seq_lengths, cuda=args.cuda) test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences), test_seq_lengths, cuda=args.cuda) # instantiate the dmm dmm = DMM(input_dim=input_dim, rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) # setup optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay } adam = ClippedAdam(adam_params) # setup inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # now we're going to define some functions we need to form the main training loop # saves the model and optimizer states to disk def save_checkpoint(): log("saving model to %s..." % args.save_model) torch.save(dmm.state_dict(), args.save_model) log("saving optimizer states to %s..." % args.save_opt) adam.save(args.save_opt) log("done saving model and optimizer checkpoints to disk.") # loads the model and optimizer states from disk def load_checkpoint(): assert exists(args.load_opt) and exists(args.load_model), \ "--load-model and/or --load-opt misspecified" log("loading model from %s..." % args.load_model) dmm.load_state_dict(torch.load(args.load_model)) log("loading optimizer states from %s..." % args.load_opt) adam.load(args.load_opt) log("done loading model and optimizer states.") # prepare a mini-batch and take a gradient step to minimize -elbo def process_minibatch(epoch, which_mini_batch, shuffled_indices): if args.annealing_epochs > 0 and epoch < args.annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = args.minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, training_data_sequences, training_seq_lengths, cuda=args.cuda) # do an actual gradient step loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths, annealing_factor) # keep track of the training loss return loss # prepare a mini-batch and take a gradient step to minimize -elbo def test_minibatch(which_mini_batch, shuffled_indices): # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_test_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, test_data_sequences, test_seq_lengths, cuda=args.cuda) # Get the initial RNN state. h_0 = dmm.h_0 h_0_contig = h_0.expand(1, mini_batch.size(0), dmm.rnn.hidden_size).contiguous() # Feed the test sequence into the RNN. rnn_output, rnn_hidden_state = dmm.rnn(mini_batch_reversed, h_0_contig) # Reverse the time ordering of the hidden state and unpack it. rnn_output = poly.pad_and_reverse(rnn_output, mini_batch_seq_lengths) print(rnn_output) print(rnn_output.shape) # set z_prev = z_q_0 to setup the recursive conditioning in q(z_t |...) z_prev = dmm.z_q_0.expand(mini_batch.size(0), dmm.z_q_0.size(0)) # sample the latents z one time step at a time T_max = mini_batch.size(1) sequence_output = [] for t in range(1, T_max + 1): # the next two lines assemble the distribution q(z_t | z_{t-1}, x_{t:T}) z_loc, z_scale = dmm.combiner(z_prev, rnn_output[:, t - 1, :]) # if we are using normalizing flows, we apply the sequence of transformations # parameterized by self.iafs to the base distribution defined in the previous line # to yield a transformed distribution that we use for q(z_t|...) if len(dmm.iafs) > 0: z_dist = TransformedDistribution(dist.Normal(z_loc, z_scale), dmm.iafs) else: z_dist = dist.Normal(z_loc, z_scale) assert z_dist.event_shape == () assert z_dist.batch_shape == (len(mini_batch), dmm.z_q_0.size(0)) # sample z_t from the distribution z_dist annealing_factor = 1.0 with pyro.poutine.scale(scale=annealing_factor): z_t = pyro.sample( "z_%d" % t, z_dist.mask(mini_batch_mask[:, t - 1:t]).to_event(1)) print("z_{}:".format(t), z_t) print(z_t.shape) # compute the probabilities that parameterize the bernoulli likelihood emission_probs_t = dmm.emitter(z_t) emission_probs_t_np = emission_probs_t.detach().numpy() sequence_output.append(emission_probs_t_np) print("x_{}:".format(t), emission_probs_t) print(emission_probs_t.shape) # the latent sampled at this time step will be conditioned upon in the next time step # so keep track of it z_prev = z_t # Run the model another few steps. n_steps = 100 for t in range(1, n_steps + 1): # first compute the parameters of the diagonal gaussian distribution p(z_t | z_{t-1}) z_loc, z_scale = dmm.trans(z_prev) # then sample z_t according to dist.Normal(z_loc, z_scale) # note that we use the reshape method so that the univariate Normal distribution # is treated as a multivariate Normal distribution with a diagonal covariance. with poutine.scale(scale=annealing_factor): z_t = pyro.sample( "z_%d" % t, dist.Normal(z_loc, z_scale).mask( mini_batch_mask[:, t - 1:t]).to_event(1)) # compute the probabilities that parameterize the bernoulli likelihood emission_probs_t = dmm.emitter(z_t) emission_probs_t_np = emission_probs_t.detach().numpy() sequence_output.append(emission_probs_t_np) # # the next statement instructs pyro to observe x_t according to the # # bernoulli distribution p(x_t|z_t) # pyro.sample("obs_x_%d" % t, # # dist.Bernoulli(emission_probs_t) # dist.Normal(emission_probs_t, 0.5) # .mask(mini_batch_mask[:, t - 1:t]) # .to_event(1), # obs=mini_batch[:, t - 1, :]) # the latent sampled at this time step will be conditioned upon # in the next time step so keep track of it z_prev = z_t sequence_output = np.concatenate(sequence_output, axis=1) print(sequence_output.shape) n_plots = 5 fig, axes = plt.subplots(nrows=n_plots, ncols=1) x = range(sequence_output.shape[1]) for i in range(n_plots): input = mini_batch[i, :].numpy().squeeze() output = sequence_output[i, :] axes[i].plot(range(input.shape[0]), input) axes[i].plot(range(len(output)), output) axes[i].grid() # plt.plot(sequence_output[0, :]) plt.show() # helper function for doing evaluation def do_evaluation(): # put the RNN into evaluation mode (i.e. turn off drop-out if applicable) dmm.rnn.eval() # compute the validation and test loss n_samples many times val_nll = svi.evaluate_loss( val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths) / torch.sum(val_seq_lengths) test_nll = svi.evaluate_loss( test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths) / torch.sum(test_seq_lengths) # put the RNN back into training mode (i.e. turn on drop-out if applicable) dmm.rnn.train() return val_nll, test_nll # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint() ################# # TRAINING LOOP # ################# times = [time.time()] for epoch in range(args.num_epochs): # if specified, save model and optimizer states to disk every checkpoint_freq epochs if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint() # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = torch.randperm(N_train_data) # process each mini-batch; this is where we take gradient steps for which_mini_batch in range(N_mini_batches): epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices) # report training diagnostics times.append(time.time()) epoch_time = times[-1] - times[-2] log("[training epoch %04d] %.4f \t\t\t\t(dt = %.3f sec)" % (epoch, epoch_nll / N_train_time_slices, epoch_time)) # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: val_nll, test_nll = do_evaluation() log("[val/test epoch %04d] %.4f %.4f" % (epoch, val_nll, test_nll)) # Testing. print("Testing") shuffled_indices = torch.randperm(N_test_data) which_mini_batch = 0 test_minibatch(which_mini_batch, shuffled_indices)
## Other Params calc_ac = False MIDI_lo = 21 MIDI_hi = 21 + 87 num_pitches = MIDI_hi - MIDI_lo + 1 program = 52 # Choral 'Ah' fs_midi = 2 # Piano roll sampling frequency win_size_n = 2048 # size of window for ac -- centered in frame ################################################################################ # Main Script ################################################################################ if __name__=="__main__": # Load data data = poly.load_data(poly.JSB_CHORALES) data_categories = ['train', 'test', 'valid'] # Set up the different data sets (training, validation, testing) so they can be # iterated over data_seqs = {} seq_lengths = {} for category in data_categories: data_seqs[category] = data[category]['sequences'] seq_lengths[category] = data[category]['sequence_lengths'] ## Generate training data for category in data_categories: y_vals = np.zeros((0,num_pitches)) x_vals = np.zeros((0,int(win_size_n//2)), dtype=np.float32) seq_length = seq_lengths[category]
def main(args): # setup logging log = get_logger(args.log) log(args) data = poly.load_data(poly.JSB_CHORALES) training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] N_train_data = len(training_seq_lengths) N_train_time_slices = float(torch.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) log("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches)) ## instantiate the dmm dmm = dmm_model.DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) dmm.eval() # setup optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay } adam = ClippedAdam(adam_params) # setup inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # loads the model and optimizer states from disk def load_checkpoint(): assert exists(args.load_opt) and exists(args.load_model), \ "--load-model and/or --load-opt misspecified" log("loading model from %s..." % args.load_model) dmm.load_state_dict(torch.load(args.load_model)) log("loading optimizer states from %s..." % args.load_opt) adam.load(args.load_opt) log("done loading model and optimizer states.") if args.load_opt != '' and args.load_model != '': load_checkpoint() ####################################### # LOAD TRAINED MODEL AND SAMPLE FROM IT ####################################### ## Basic parameters fs_aud = 12000 # sampling rate for audio rendering fig = plt.figure() ax_gt = fig.add_subplot(1, 2, 1) ax_estimated = fig.add_subplot(1, 2, 2) # ax_estimated_c = fig.add_subplot(2,2,4) # ax_dist = fig.add_subplot(2,2,2) MIDI_lo = 21 MIDI_hi = 21 + 87 # MIDI_lo_p = # MIDI_hi_p = condense = False num_notes = MIDI_hi - MIDI_lo + 1 ## Load the front-end model net = dnn.Net(ac_length=1024) net.eval() save_prefix = "dnn_frontend_poly_test" save_path = project_directory + "dnn_front_end/saved_models/" net.load_state_dict(torch.load(save_path + save_prefix + ".pt")) ## Select a testing set and collect data and initial distribution num_test_seqs = test_seq_lengths.shape[0] # idx = np.random.randint(num_test_seqs) idx = 22 print("Using test sequence number {}".format(idx)) seq_len = test_seq_lengths[idx].item() piano_roll_gt = test_data_sequences[idx, :seq_len, :].data.numpy() piano_roll_gt_rev = np.ascontiguousarray(np.flip(piano_roll_gt, axis=1)) z1_dist = dmm.get_z1_dist(torch.tensor(piano_roll_gt_rev)) ## Plot ground truth and render as audio piano_roll_gt_full = np.zeros((128, seq_len)) piano_roll_gt_full[MIDI_lo:MIDI_hi + 1, :] += piano_roll_gt.transpose().astype(int) piano_roll_gt_full *= 64 print("Sythensizing audio for input...", end="") with suppress_stdout(): pm = piano_roll_to_pretty_midi(piano_roll_gt_full.astype(float), fs=2, program=52) audio = pm.fluidsynth(fs=fs_aud, sf2_path='/usr/share/soundfonts/FluidR3_GM.sf2') print("done.") wav.write("test_ground_truth_out.wav", fs_aud, audio) ax_gt.imshow(np.flip(np.transpose(piano_roll_gt), axis=0)) ax_gt.set_yticks(np.arange(1, 89)[::10]) ax_gt.set_yticklabels(np.arange(88, 0, -1)[::10]) # plt.show() ## Generate Neural Activity Patterns # Periphery Parameters x_lo = 0.05 # 45 Hz x_hi = 0.75 # 6050 Hz num_channels = 72 print("Generating NAP...") nap, channel_cfs = pyc.carfac_nap(audio, float(fs_aud), num_sections=num_channels, x_lo=x_lo, x_hi=x_hi, b=0.5) print("Finished.") ## Generate auto-correlated frames len_sig_n = len(audio) len_frame_n = len_sig_n / (seq_len + 2) # to account of PM's padding # num_frames = int(len_sig_n/len_frame_n) num_frames = seq_len c_times_n = np.arange(0, num_frames) * len_frame_n + int(len_frame_n // 2) c_times_t = c_times_n / fs_aud win_size_n = 2048 # analysis window size win_size_t = win_size_n / fs_aud print("Calculating frame data...") sac_frames = datagen.gen_nap_sac_frames(nap, float(fs_aud), c_times_t, win_size_t, normalize=True) frames = gen_nap_frames(nap, float(fs_aud), c_times_t, win_size_t) print("Finished.") assert len(sac_frames) == seq_len ## Plot some sample frames # fig = plt.figure() # idcs = np.random.randint(seq_len,size=10) # for k in range(10): # ax = fig.add_subplot(2,5,k+1) # ax.plot(frames[k]) # plt.show() ## Generate the observation probabilities win_size = int(len(audio) / piano_roll_gt.shape[0]) sig_len = len(audio) num_hops = piano_roll_gt.shape[0] assert num_hops == sac_frames.shape[0] obs_probs = torch.zeros((num_hops, 2, num_notes), requires_grad=False) obs_probs_dnn = torch.zeros((num_hops, 2, num_notes), requires_grad=False) dnn_ests = torch.zeros((num_hops, num_notes), requires_grad=False) for k in range(num_hops): # obs_probs[k,:,:] = midi_probs_nap_klap(frames[k], fs_aud, 1024, b=0.01) obs_probs[k, :, :] = midi_probs_from_signal_dnn( sac_frames[k], fs_aud, MIDI_lo, MIDI_hi, net, ac_size=1024, compression_factor=0.825, offset=0.05) # dnn_ests[k] = torch.where(obs_probs[k,1,:] > 0.25, # torch.ones_like(obs_probs[k,1,:]), # torch.zeros_like(obs_probs[k,1,:])) ## Plot some sample front-end outputs # fig = plt.figure() # idcs = np.random.randint(seq_len, size=10) # for k in range(10): # ax = fig.add_subplot(2,5,k+1) # ax.plot(obs_probs[idcs[k],1,:].numpy()) # ax.plot(obs_probs_dnn[idcs[k],1,:].numpy()) # ax.set_title("{}".format(idcs[k])) # for q in range(piano_roll_gt.shape[1]): # if piano_roll_gt[idcs[k],q] == 1: # ax.plot([q, q], [0,1.0], color='C3') # plt.show() # TEST: calculate the probability of the first notes piano_roll_gt = torch.from_numpy(piano_roll_gt).type(torch.long) # Particle Filtering Parameters # num_particles = 10000 # worked! num_particles = 500 z_dim = 100 x_dim = 88 z = torch.ones((num_particles, z_dim), requires_grad=False) x = torch.ones((num_hops, num_particles, x_dim), requires_grad=False) w = torch.ones((num_hops, num_particles), requires_grad=False) w_naive = torch.ones((num_hops, num_particles), requires_grad=False) # Generate initial particles count = 0 for p in range(num_particles): z_prev = pyro.sample("init_z", z1_dist) z[p, :], x[0, p, :] = dmm.get_sample(z_prev, p) num_same = torch.sum( x[0, p, :].type(torch.long) == piano_roll_gt[0]).item() if num_same == 88: count += 1 print("Got {} correct samples in step {}".format(count, 0)) count = 0 ## Calculate initial weights ## Uniform # w[0,:] = 1.0/w.shape[1] ## Use calculates obs_probs (from DNN or elsewhere) for p in range(num_particles): prob = calc_obs_probs(obs_probs[0, :, :], x[0, p, :]) w[0, p] *= prob w_naive[0, p] *= prob # Normalize weights w[0, :] = normalize_weights(w[0, :]) w_naive[0, :] = normalize_weights(w_naive[0, :]) ## Main Particle Filtering Loop good_samples = np.zeros(num_hops) for f in range(1, num_hops): ## Sample new particles z_vals, z_probs = particles_to_dist(z, w[f - 1, :]) for p in range(num_particles): idx = discrete_sample(z_probs) z[p, :], x[f, p, :] = dmm.get_sample(z_vals[idx], p + f * num_particles) num_same = torch.sum( x[f, p, :].type(torch.long) == piano_roll_gt[f]).item() if num_same == 88: count += 1 ## Calculate Weights -- probably bring this into loop above #- Primitive observation-free model of observations (more notes correct, higher prob) sx_prob = ((num_same - 78) / 10)**2 w_naive[f, p] *= sx_prob #*xz_prob #- Use Observation probabilities sx_prob = calc_obs_probs(obs_probs[f, :, :], x[f, p, :]) w[f, p] *= sx_prob #*xz_prob # Calculate probability of x given z # xz_dist = dist.Bernoulli(dmm.emitter(z[p,:])) # xz_prob = torch.exp(torch.sum(xz_dist.log_prob(x[f,p,:]))).item() # Report number of samples that corresponded with ground truth print("Got {} correct samples in step {} \t".format(count, f), end='') good_samples[f] = count count = 0 ## Normalize w[f, :] = w[f, :].pow(0.25) w[f, :] = normalize_weights(w[f, :]) w_naive[f, :] = normalize_weights(w_naive[f, :]) # plt.plot(w[f,:].numpy()) # plt.plot(w_naive[f,:].numpy()) # plt.show() print("\tDone!") # Now pull out the most probable path piano_roll_dist = np.zeros((num_hops, 88)) if condense: print("\"Condensing\" final distribution") w_c = [] x_c = [] for f in range(num_hops): w_condensed, x_condensed = make_final_dist(w[f, :], x[f, :, :]) w_c.append(w_condensed) x_c.append(x_condensed) print("{} unique samples in step {}/{}".format( len(w_condensed), f + 1, num_hops)) piano_roll_dist[f, :] = np.sum(x_condensed * w_condensed[:, np.newaxis], axis=0) ax_dist.imshow(np.flip(np.transpose(piano_roll_dist), axis=0)) ## Most probable path by picking highest weighted particle piano_roll_estimated = np.zeros((num_hops, 88)) piano_roll_estimated_c = np.zeros((num_hops, 88)) for f in range(num_hops): ## just picking highest weight idx = np.argmax(w[f, :].numpy()) piano_roll_estimated[f, :] = x[f, idx, :].numpy() ## picking from highest condensed weight if condense: idx = np.argmax(w_c[f]) piano_roll_estimated_c[f, :] = x_c[f][idx, :] ax_estimated.imshow(np.flip(np.transpose(piano_roll_estimated), axis=0)) ax_estimated.set_yticks(np.arange(1, 89)[::10]) ax_estimated.set_yticklabels(np.arange(88, 0, -1)[::10]) if condense: ax_estimated_c.imshow( np.flip(np.transpose(piano_roll_estimated_c), axis=0)) ## Calculate and report precision and recall p, r, f = precision_recall_f(piano_roll_gt.numpy(), piano_roll_estimated) print("Precision (regular): \t", p) print("Recall (regular): \t", r) print("F-metric (regular): \t", f) ## Check how often the correct sample was chosen when available gt = piano_roll_gt.numpy() num_available = 0 num_chosen = 0 num_available = 0 num_chosen_c = 0 for f in range(num_hops): if good_samples[f] > 0: num_available += 1 if np.array_equal(gt[f, :], piano_roll_estimated[f, :]): num_chosen += 1 if condense: if np.array_equal(gt[f, :], piano_roll_estimated_c[f, :]): num_chosen_c += 1 print("Correct select rate (normal) : {}".format(num_chosen / num_available)) if condense: print("Correct select rate (condensed): {}".format(num_chosen_c / num_available)) ## Make audio for estimate piano roll piano_roll_estimated_full = np.zeros((128, seq_len), dtype=int) piano_roll_estimated_full[ 20:108, :] += piano_roll_estimated.transpose().astype(int) piano_roll_estimated_full *= 64 print("Sythensizing audio for input...", end="") with suppress_stdout(): pm = piano_roll_to_pretty_midi(piano_roll_estimated_full.astype(float), fs=2, program=52) audio = pm.fluidsynth(fs=fs_aud, sf2_path='/usr/share/soundfonts/FluidR3_GM.sf2') print("done.") wav.write("test_estimated_out.wav", fs_aud, audio) # ax_estimated.imshow(np.flip(piano_roll_estimated_full, axis=0)) ## Sample a random sequence starting at the same initial latent state # x_vals = [] # z_vals = [] # piano_roll_sampled = np.zeros((seq_len, 88)) # for k in range(seq_len): # z_new, x_new = dmm.get_sample(z_prev, k) # x_vals.append(x_new) # z_vals.append(z_new) # piano_roll_sampled[k,:] = x_new.data.numpy() # z_prev = z_new # # # Get MIDI from piano from sampled roll # piano_roll_sampled_full = np.zeros((128, seq_len), dtype=int) # piano_roll_sampled_full[20:108,:] += piano_roll_sampled.transpose().astype(int) # piano_roll_sampled_full *= 64 # print("Sythensizing audio for input...", end="") # with suppress_stdout(): # pm = piano_roll_to_pretty_midi(piano_roll_sampled_full.astype(float), fs=1, program=52) # audio = pm.fluidsynth(fs=fs_aud, # sf2_path='/usr/share/soundfonts/FluidR3_GM.sf2') # print('done.') # wav.write("test_sampled_out.wav", fs_aud, audio) # ax_sampled.imshow(np.flip(np.transpose(piano_roll_sampled), axis=0)) # plt.tight_layout() plt.show()
def main(args): # setup logging log = get_logger(args.log) log(args) data = poly.load_data(poly.JSB_CHORALES) training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] N_train_data = len(training_seq_lengths) N_train_time_slices = float(torch.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) log("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches)) # how often we do validation/test evaluation during training val_test_frequency = 5 # the number of samples we use to do the evaluation n_eval_samples = 1 # package repeated copies of val/test data for faster evaluation # (i.e. set us up for vectorization) def rep(x): rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:] repeat_dims = [1] * len(x.size()) repeat_dims[0] = n_eval_samples return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose( 1, 0).reshape(rep_shape) # get the validation/test data ready for the dmm: pack into sequences, etc. val_seq_lengths = rep(val_seq_lengths) test_seq_lengths = rep(test_seq_lengths) val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences), val_seq_lengths, cuda=args.cuda) test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences), test_seq_lengths, cuda=args.cuda) # instantiate the dmm dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, use_cuda=args.cuda) # prepare a mini-batch and take a gradient step to minimize -elbo def process_minibatch(epoch, which_mini_batch, shuffled_indices): if args.annealing_epochs > 0 and epoch < args.annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = args.minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, training_data_sequences, training_seq_lengths, cuda=args.cuda) # do an actual gradient step loss, loss_AT = dmm.train_ae(mini_batch, mini_batch_reversed, mini_batch_seq_lengths, annealing_factor) # keep track of the training loss return loss # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint(dmm, log) ################# # TRAINING LOOP # ################# times = [time.time()] for epoch in range(args.num_epochs): # if specified, save model and optimizer states to disk every checkpoint_freq epochs if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint(dmm, log) # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = torch.randperm(N_train_data) # process each mini-batch; this is where we take gradient steps for which_mini_batch in range(N_mini_batches): epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices) # report training diagnostics times.append(time.time()) epoch_time = times[-1] - times[-2] log("[training epoch %04d] %.4f \t\t\t\t(dt = %.3f sec)" % (epoch, epoch_nll / N_train_time_slices, epoch_time)) # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: # sample = dmm.generate(n=n_eval_samples, T_max=torch.max(training_seq_lengths).item()) val_loss = dmm.build_loss(val_batch, val_batch_reversed, val_seq_lengths) test_loss = dmm.build_loss(test_batch, test_batch_reversed, test_seq_lengths) log("[val/test epoch %04d] %.4f %.4f" % (epoch, val_loss[0], test_loss[0]))