Example #1
0
def main(args):
    # setup logging
    log = get_logger(args.log)
    log(args)

    data = poly.load_data(poly.JSB_CHORALES)
    training_seq_lengths = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']
    test_seq_lengths = data['test']['sequence_lengths']
    test_data_sequences = data['test']['sequences']
    val_seq_lengths = data['valid']['sequence_lengths']
    val_data_sequences = data['valid']['sequences']
    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(torch.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    log("N_train_data: %d     avg. training seq. length: %.2f    N_mini_batches: %d" %
        (N_train_data, training_seq_lengths.float().mean(), N_mini_batches))

    # how often we do validation/test evaluation during training
    val_test_frequency = 50
    # the number of samples we use to do the evaluation
    n_eval_samples = 1

    # package repeated copies of val/test data for faster evaluation
    # (i.e. set us up for vectorization)
    def rep(x):
        rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:]
        repeat_dims = [1] * len(x.size())
        repeat_dims[0] = n_eval_samples
        return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose(1, 0).reshape(rep_shape)

    # get the validation/test data ready for the dmm: pack into sequences, etc.
    val_seq_lengths = rep(val_seq_lengths)
    test_seq_lengths = rep(test_seq_lengths)
    val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences),
        val_seq_lengths, cuda=args.cuda)
    test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences),
        test_seq_lengths, cuda=args.cuda)

    # instantiate the dmm
    dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs,
              iaf_dim=args.iaf_dim, use_cuda=args.cuda)

    # setup optimizer
    adam_params = {"lr": args.learning_rate, "betas": (args.beta1, args.beta2),
                   "clip_norm": args.clip_norm, "lrd": args.lr_decay,
                   "weight_decay": args.weight_decay}
    adam = ClippedAdam(adam_params)

    # setup inference algorithm
    elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)

    # now we're going to define some functions we need to form the main training loop

    # saves the model and optimizer states to disk
    def save_checkpoint():
        log("saving model to %s..." % args.save_model)
        torch.save(dmm.state_dict(), args.save_model)
        log("saving optimizer states to %s..." % args.save_opt)
        adam.save(args.save_opt)
        log("done saving model and optimizer checkpoints to disk.")

    # loads the model and optimizer states from disk
    def load_checkpoint():
        assert exists(args.load_opt) and exists(args.load_model), \
            "--load-model and/or --load-opt misspecified"
        log("loading model from %s..." % args.load_model)
        dmm.load_state_dict(torch.load(args.load_model))
        log("loading optimizer states from %s..." % args.load_opt)
        adam.load(args.load_opt)
        log("done loading model and optimizer states.")

    # prepare a mini-batch and take a gradient step to minimize -elbo
    def process_minibatch(epoch, which_mini_batch, shuffled_indices):
        if args.annealing_epochs > 0 and epoch < args.annealing_epochs:
            # compute the KL annealing factor approriate for the current mini-batch in the current epoch
            min_af = args.minimum_annealing_factor
            annealing_factor = min_af + (1.0 - min_af) * \
                (float(which_mini_batch + epoch * N_mini_batches + 1) /
                 float(args.annealing_epochs * N_mini_batches))
        else:
            # by default the KL annealing factor is unity
            annealing_factor = 1.0

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]
        # grab a fully prepped mini-batch using the helper function in the data loader
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, training_data_sequences,
                                  training_seq_lengths, cuda=args.cuda)
        # do an actual gradient step
        loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask,
                        mini_batch_seq_lengths, annealing_factor)
        # keep track of the training loss
        return loss

    # helper function for doing evaluation
    def do_evaluation():
        # put the RNN into evaluation mode (i.e. turn off drop-out if applicable)
        dmm.rnn.eval()

        # compute the validation and test loss n_samples many times
        val_nll = svi.evaluate_loss(val_batch, val_batch_reversed, val_batch_mask,
                                    val_seq_lengths) / torch.sum(val_seq_lengths)
        test_nll = svi.evaluate_loss(test_batch, test_batch_reversed, test_batch_mask,
                                     test_seq_lengths) / torch.sum(test_seq_lengths)

        # put the RNN back into training mode (i.e. turn on drop-out if applicable)
        dmm.rnn.train()
        return val_nll, test_nll

    # if checkpoint files provided, load model and optimizer states from disk before we start training
    if args.load_opt != '' and args.load_model != '':
        load_checkpoint()

    #################
    # TRAINING LOOP #
    #################
    times = [time.time()]
    for epoch in range(args.num_epochs):
        # if specified, save model and optimizer states to disk every checkpoint_freq epochs
        if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0:
            save_checkpoint()

        # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch
        epoch_nll = 0.0
        # prepare mini-batch subsampling indices for this epoch
        shuffled_indices = torch.randperm(N_train_data)

        # process each mini-batch; this is where we take gradient steps
        for which_mini_batch in range(N_mini_batches):
            epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices)

        # report training diagnostics
        times.append(time.time())
        epoch_time = times[-1] - times[-2]
        log("[training epoch %04d]  %.4f \t\t\t\t(dt = %.3f sec)" %
            (epoch, epoch_nll / N_train_time_slices, epoch_time))

        # do evaluation on test and validation data and report results
        if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0:
            val_nll, test_nll = do_evaluation()
            log("[val/test epoch %04d]  %.4f  %.4f" % (epoch, val_nll, test_nll))
Example #2
0
def main(args):
    # setup logging
    log = get_logger(args.log)
    log(args)

    if 0:
        data = generate_sine_wave_data()
        input_dim = 1
    elif 1:
        data = generate_returns_data()
        input_dim = 1
        # return
    else:
        data = poly.load_data(poly.JSB_CHORALES)
        input_dim = 88
    training_seq_lengths = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']
    test_seq_lengths = data['test']['sequence_lengths']
    test_data_sequences = data['test']['sequences']
    val_seq_lengths = data['valid']['sequence_lengths']
    val_data_sequences = data['valid']['sequences']
    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(torch.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    N_test_data = len(test_seq_lengths)

    log("N_train_data: %d     avg. training seq. length: %.2f    N_mini_batches: %d"
        % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches))

    # how often we do validation/test evaluation during training
    val_test_frequency = 50
    # the number of samples we use to do the evaluation
    n_eval_samples = 1

    # package repeated copies of val/test data for faster evaluation
    # (i.e. set us up for vectorization)
    def rep(x):
        rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:]
        repeat_dims = [1] * len(x.size())
        repeat_dims[0] = n_eval_samples
        return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose(
            1, 0).reshape(rep_shape)

    # get the validation/test data ready for the dmm: pack into sequences, etc.
    val_seq_lengths = rep(val_seq_lengths)
    test_seq_lengths = rep(test_seq_lengths)
    val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * val_data_sequences.shape[0]),
        rep(val_data_sequences),
        val_seq_lengths,
        cuda=args.cuda)
    test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * test_data_sequences.shape[0]),
        rep(test_data_sequences),
        test_seq_lengths,
        cuda=args.cuda)

    # instantiate the dmm
    dmm = DMM(input_dim=input_dim,
              rnn_dropout_rate=args.rnn_dropout_rate,
              num_iafs=args.num_iafs,
              iaf_dim=args.iaf_dim,
              use_cuda=args.cuda)

    # setup optimizer
    adam_params = {
        "lr": args.learning_rate,
        "betas": (args.beta1, args.beta2),
        "clip_norm": args.clip_norm,
        "lrd": args.lr_decay,
        "weight_decay": args.weight_decay
    }
    adam = ClippedAdam(adam_params)

    # setup inference algorithm
    elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)

    # now we're going to define some functions we need to form the main training loop

    # saves the model and optimizer states to disk
    def save_checkpoint():
        log("saving model to %s..." % args.save_model)
        torch.save(dmm.state_dict(), args.save_model)
        log("saving optimizer states to %s..." % args.save_opt)
        adam.save(args.save_opt)
        log("done saving model and optimizer checkpoints to disk.")

    # loads the model and optimizer states from disk
    def load_checkpoint():
        assert exists(args.load_opt) and exists(args.load_model), \
            "--load-model and/or --load-opt misspecified"
        log("loading model from %s..." % args.load_model)
        dmm.load_state_dict(torch.load(args.load_model))
        log("loading optimizer states from %s..." % args.load_opt)
        adam.load(args.load_opt)
        log("done loading model and optimizer states.")

    # prepare a mini-batch and take a gradient step to minimize -elbo
    def process_minibatch(epoch, which_mini_batch, shuffled_indices):
        if args.annealing_epochs > 0 and epoch < args.annealing_epochs:
            # compute the KL annealing factor approriate for the current mini-batch in the current epoch
            min_af = args.minimum_annealing_factor
            annealing_factor = min_af + (1.0 - min_af) * \
                (float(which_mini_batch + epoch * N_mini_batches + 1) /
                 float(args.annealing_epochs * N_mini_batches))
        else:
            # by default the KL annealing factor is unity
            annealing_factor = 1.0

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size,
                                 N_train_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]
        # grab a fully prepped mini-batch using the helper function in the data loader
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, training_data_sequences,
                                  training_seq_lengths, cuda=args.cuda)
        # do an actual gradient step
        loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask,
                        mini_batch_seq_lengths, annealing_factor)
        # keep track of the training loss
        return loss

    # prepare a mini-batch and take a gradient step to minimize -elbo
    def test_minibatch(which_mini_batch, shuffled_indices):

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size,
                                 N_test_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]
        # grab a fully prepped mini-batch using the helper function in the data loader
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, test_data_sequences,
                                  test_seq_lengths, cuda=args.cuda)

        # Get the initial RNN state.
        h_0 = dmm.h_0
        h_0_contig = h_0.expand(1, mini_batch.size(0),
                                dmm.rnn.hidden_size).contiguous()

        # Feed the test sequence into the RNN.
        rnn_output, rnn_hidden_state = dmm.rnn(mini_batch_reversed, h_0_contig)

        # Reverse the time ordering of the hidden state and unpack it.
        rnn_output = poly.pad_and_reverse(rnn_output, mini_batch_seq_lengths)
        print(rnn_output)
        print(rnn_output.shape)

        # set z_prev = z_q_0 to setup the recursive conditioning in q(z_t |...)
        z_prev = dmm.z_q_0.expand(mini_batch.size(0), dmm.z_q_0.size(0))

        # sample the latents z one time step at a time
        T_max = mini_batch.size(1)
        sequence_output = []
        for t in range(1, T_max + 1):
            # the next two lines assemble the distribution q(z_t | z_{t-1}, x_{t:T})
            z_loc, z_scale = dmm.combiner(z_prev, rnn_output[:, t - 1, :])

            # if we are using normalizing flows, we apply the sequence of transformations
            # parameterized by self.iafs to the base distribution defined in the previous line
            # to yield a transformed distribution that we use for q(z_t|...)
            if len(dmm.iafs) > 0:
                z_dist = TransformedDistribution(dist.Normal(z_loc, z_scale),
                                                 dmm.iafs)
            else:
                z_dist = dist.Normal(z_loc, z_scale)
            assert z_dist.event_shape == ()
            assert z_dist.batch_shape == (len(mini_batch), dmm.z_q_0.size(0))

            # sample z_t from the distribution z_dist
            annealing_factor = 1.0
            with pyro.poutine.scale(scale=annealing_factor):
                z_t = pyro.sample(
                    "z_%d" % t,
                    z_dist.mask(mini_batch_mask[:, t - 1:t]).to_event(1))

            print("z_{}:".format(t), z_t)
            print(z_t.shape)

            # compute the probabilities that parameterize the bernoulli likelihood
            emission_probs_t = dmm.emitter(z_t)

            emission_probs_t_np = emission_probs_t.detach().numpy()
            sequence_output.append(emission_probs_t_np)

            print("x_{}:".format(t), emission_probs_t)
            print(emission_probs_t.shape)

            # the latent sampled at this time step will be conditioned upon in the next time step
            # so keep track of it
            z_prev = z_t

        # Run the model another few steps.
        n_steps = 100
        for t in range(1, n_steps + 1):
            # first compute the parameters of the diagonal gaussian distribution p(z_t | z_{t-1})
            z_loc, z_scale = dmm.trans(z_prev)

            # then sample z_t according to dist.Normal(z_loc, z_scale)
            # note that we use the reshape method so that the univariate Normal distribution
            # is treated as a multivariate Normal distribution with a diagonal covariance.
            with poutine.scale(scale=annealing_factor):
                z_t = pyro.sample(
                    "z_%d" % t,
                    dist.Normal(z_loc, z_scale).mask(
                        mini_batch_mask[:, t - 1:t]).to_event(1))

            # compute the probabilities that parameterize the bernoulli likelihood
            emission_probs_t = dmm.emitter(z_t)

            emission_probs_t_np = emission_probs_t.detach().numpy()
            sequence_output.append(emission_probs_t_np)

            # # the next statement instructs pyro to observe x_t according to the
            # # bernoulli distribution p(x_t|z_t)
            # pyro.sample("obs_x_%d" % t,
            #             # dist.Bernoulli(emission_probs_t)
            #             dist.Normal(emission_probs_t, 0.5)
            #             .mask(mini_batch_mask[:, t - 1:t])
            #             .to_event(1),
            #             obs=mini_batch[:, t - 1, :])

            # the latent sampled at this time step will be conditioned upon
            # in the next time step so keep track of it
            z_prev = z_t

        sequence_output = np.concatenate(sequence_output, axis=1)
        print(sequence_output.shape)

        n_plots = 5
        fig, axes = plt.subplots(nrows=n_plots, ncols=1)
        x = range(sequence_output.shape[1])
        for i in range(n_plots):
            input = mini_batch[i, :].numpy().squeeze()
            output = sequence_output[i, :]
            axes[i].plot(range(input.shape[0]), input)
            axes[i].plot(range(len(output)), output)
            axes[i].grid()

        # plt.plot(sequence_output[0, :])
        plt.show()

    # helper function for doing evaluation
    def do_evaluation():
        # put the RNN into evaluation mode (i.e. turn off drop-out if applicable)
        dmm.rnn.eval()

        # compute the validation and test loss n_samples many times
        val_nll = svi.evaluate_loss(
            val_batch, val_batch_reversed, val_batch_mask,
            val_seq_lengths) / torch.sum(val_seq_lengths)
        test_nll = svi.evaluate_loss(
            test_batch, test_batch_reversed, test_batch_mask,
            test_seq_lengths) / torch.sum(test_seq_lengths)

        # put the RNN back into training mode (i.e. turn on drop-out if applicable)
        dmm.rnn.train()
        return val_nll, test_nll

    # if checkpoint files provided, load model and optimizer states from disk before we start training
    if args.load_opt != '' and args.load_model != '':
        load_checkpoint()

    #################
    # TRAINING LOOP #
    #################
    times = [time.time()]
    for epoch in range(args.num_epochs):
        # if specified, save model and optimizer states to disk every checkpoint_freq epochs
        if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0:
            save_checkpoint()

        # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch
        epoch_nll = 0.0
        # prepare mini-batch subsampling indices for this epoch
        shuffled_indices = torch.randperm(N_train_data)

        # process each mini-batch; this is where we take gradient steps
        for which_mini_batch in range(N_mini_batches):
            epoch_nll += process_minibatch(epoch, which_mini_batch,
                                           shuffled_indices)

        # report training diagnostics
        times.append(time.time())
        epoch_time = times[-1] - times[-2]
        log("[training epoch %04d]  %.4f \t\t\t\t(dt = %.3f sec)" %
            (epoch, epoch_nll / N_train_time_slices, epoch_time))

        # do evaluation on test and validation data and report results
        if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0:
            val_nll, test_nll = do_evaluation()
            log("[val/test epoch %04d]  %.4f  %.4f" %
                (epoch, val_nll, test_nll))

    # Testing.
    print("Testing")
    shuffled_indices = torch.randperm(N_test_data)
    which_mini_batch = 0
    test_minibatch(which_mini_batch, shuffled_indices)
## Other Params
calc_ac = False
MIDI_lo = 21
MIDI_hi = 21 + 87
num_pitches = MIDI_hi - MIDI_lo + 1
program = 52    # Choral 'Ah'
fs_midi = 2     # Piano roll sampling frequency
win_size_n = 2048   # size of window for ac -- centered in frame


################################################################################
# Main Script
################################################################################
if __name__=="__main__":
    # Load data
    data = poly.load_data(poly.JSB_CHORALES)
    data_categories = ['train', 'test', 'valid']

    # Set up the different data sets (training, validation, testing) so they can be
    # iterated over
    data_seqs = {} 
    seq_lengths = {} 
    for category in data_categories:
        data_seqs[category] = data[category]['sequences']
        seq_lengths[category] = data[category]['sequence_lengths']

    ## Generate training data
    for category in data_categories:
        y_vals = np.zeros((0,num_pitches))
        x_vals = np.zeros((0,int(win_size_n//2)), dtype=np.float32)
        seq_length = seq_lengths[category]
Example #4
0
def main(args):
    # setup logging
    log = get_logger(args.log)
    log(args)

    data = poly.load_data(poly.JSB_CHORALES)
    training_seq_lengths = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']
    test_seq_lengths = data['test']['sequence_lengths']
    test_data_sequences = data['test']['sequences']
    val_seq_lengths = data['valid']['sequence_lengths']
    val_data_sequences = data['valid']['sequences']
    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(torch.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    log("N_train_data: %d     avg. training seq. length: %.2f    N_mini_batches: %d"
        % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches))

    ## instantiate the dmm
    dmm = dmm_model.DMM(rnn_dropout_rate=args.rnn_dropout_rate,
                        num_iafs=args.num_iafs,
                        iaf_dim=args.iaf_dim,
                        use_cuda=args.cuda)
    dmm.eval()

    # setup optimizer
    adam_params = {
        "lr": args.learning_rate,
        "betas": (args.beta1, args.beta2),
        "clip_norm": args.clip_norm,
        "lrd": args.lr_decay,
        "weight_decay": args.weight_decay
    }
    adam = ClippedAdam(adam_params)

    # setup inference algorithm
    elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)

    # loads the model and optimizer states from disk
    def load_checkpoint():
        assert exists(args.load_opt) and exists(args.load_model), \
            "--load-model and/or --load-opt misspecified"
        log("loading model from %s..." % args.load_model)
        dmm.load_state_dict(torch.load(args.load_model))
        log("loading optimizer states from %s..." % args.load_opt)
        adam.load(args.load_opt)
        log("done loading model and optimizer states.")

    if args.load_opt != '' and args.load_model != '':
        load_checkpoint()

    #######################################
    # LOAD TRAINED MODEL AND SAMPLE FROM IT
    #######################################
    ## Basic parameters
    fs_aud = 12000  # sampling rate for audio rendering
    fig = plt.figure()
    ax_gt = fig.add_subplot(1, 2, 1)
    ax_estimated = fig.add_subplot(1, 2, 2)
    # ax_estimated_c = fig.add_subplot(2,2,4)
    # ax_dist        = fig.add_subplot(2,2,2)
    MIDI_lo = 21
    MIDI_hi = 21 + 87
    # MIDI_lo_p =
    # MIDI_hi_p =
    condense = False
    num_notes = MIDI_hi - MIDI_lo + 1

    ## Load the front-end model
    net = dnn.Net(ac_length=1024)
    net.eval()
    save_prefix = "dnn_frontend_poly_test"
    save_path = project_directory + "dnn_front_end/saved_models/"
    net.load_state_dict(torch.load(save_path + save_prefix + ".pt"))

    ## Select a testing set and collect data and initial distribution
    num_test_seqs = test_seq_lengths.shape[0]
    # idx = np.random.randint(num_test_seqs)
    idx = 22
    print("Using test sequence number {}".format(idx))
    seq_len = test_seq_lengths[idx].item()
    piano_roll_gt = test_data_sequences[idx, :seq_len, :].data.numpy()
    piano_roll_gt_rev = np.ascontiguousarray(np.flip(piano_roll_gt, axis=1))
    z1_dist = dmm.get_z1_dist(torch.tensor(piano_roll_gt_rev))

    ## Plot ground truth and render as audio
    piano_roll_gt_full = np.zeros((128, seq_len))
    piano_roll_gt_full[MIDI_lo:MIDI_hi +
                       1, :] += piano_roll_gt.transpose().astype(int)
    piano_roll_gt_full *= 64
    print("Sythensizing audio for input...", end="")
    with suppress_stdout():
        pm = piano_roll_to_pretty_midi(piano_roll_gt_full.astype(float),
                                       fs=2,
                                       program=52)
        audio = pm.fluidsynth(fs=fs_aud,
                              sf2_path='/usr/share/soundfonts/FluidR3_GM.sf2')
    print("done.")
    wav.write("test_ground_truth_out.wav", fs_aud, audio)
    ax_gt.imshow(np.flip(np.transpose(piano_roll_gt), axis=0))
    ax_gt.set_yticks(np.arange(1, 89)[::10])
    ax_gt.set_yticklabels(np.arange(88, 0, -1)[::10])
    # plt.show()

    ## Generate Neural Activity Patterns
    # Periphery Parameters
    x_lo = 0.05  # 45 Hz
    x_hi = 0.75  # 6050 Hz
    num_channels = 72
    print("Generating NAP...")
    nap, channel_cfs = pyc.carfac_nap(audio,
                                      float(fs_aud),
                                      num_sections=num_channels,
                                      x_lo=x_lo,
                                      x_hi=x_hi,
                                      b=0.5)
    print("Finished.")

    ## Generate auto-correlated frames
    len_sig_n = len(audio)
    len_frame_n = len_sig_n / (seq_len + 2)  # to account of PM's padding
    # num_frames = int(len_sig_n/len_frame_n)
    num_frames = seq_len
    c_times_n = np.arange(0, num_frames) * len_frame_n + int(len_frame_n // 2)
    c_times_t = c_times_n / fs_aud
    win_size_n = 2048  # analysis window size
    win_size_t = win_size_n / fs_aud
    print("Calculating frame data...")
    sac_frames = datagen.gen_nap_sac_frames(nap,
                                            float(fs_aud),
                                            c_times_t,
                                            win_size_t,
                                            normalize=True)
    frames = gen_nap_frames(nap, float(fs_aud), c_times_t, win_size_t)
    print("Finished.")
    assert len(sac_frames) == seq_len
    ## Plot some sample frames
    # fig = plt.figure()
    # idcs = np.random.randint(seq_len,size=10)
    # for k in range(10):
    #     ax = fig.add_subplot(2,5,k+1)
    #     ax.plot(frames[k])
    # plt.show()

    ## Generate the observation probabilities
    win_size = int(len(audio) / piano_roll_gt.shape[0])
    sig_len = len(audio)
    num_hops = piano_roll_gt.shape[0]
    assert num_hops == sac_frames.shape[0]
    obs_probs = torch.zeros((num_hops, 2, num_notes), requires_grad=False)
    obs_probs_dnn = torch.zeros((num_hops, 2, num_notes), requires_grad=False)
    dnn_ests = torch.zeros((num_hops, num_notes), requires_grad=False)
    for k in range(num_hops):
        # obs_probs[k,:,:] = midi_probs_nap_klap(frames[k], fs_aud, 1024, b=0.01)
        obs_probs[k, :, :] = midi_probs_from_signal_dnn(
            sac_frames[k],
            fs_aud,
            MIDI_lo,
            MIDI_hi,
            net,
            ac_size=1024,
            compression_factor=0.825,
            offset=0.05)
        # dnn_ests[k] = torch.where(obs_probs[k,1,:] > 0.25,
        #                        torch.ones_like(obs_probs[k,1,:]),
        #                        torch.zeros_like(obs_probs[k,1,:]))

    ## Plot some sample front-end outputs
    # fig = plt.figure()
    # idcs = np.random.randint(seq_len, size=10)
    # for k in range(10):
    #     ax = fig.add_subplot(2,5,k+1)
    #     ax.plot(obs_probs[idcs[k],1,:].numpy())
    #     ax.plot(obs_probs_dnn[idcs[k],1,:].numpy())
    #     ax.set_title("{}".format(idcs[k]))
    #     for q in range(piano_roll_gt.shape[1]):
    #         if piano_roll_gt[idcs[k],q] == 1:
    #             ax.plot([q, q], [0,1.0], color='C3')
    # plt.show()

    # TEST: calculate the probability of the first notes
    piano_roll_gt = torch.from_numpy(piano_roll_gt).type(torch.long)

    # Particle Filtering Parameters
    # num_particles = 10000     # worked!
    num_particles = 500
    z_dim = 100
    x_dim = 88
    z = torch.ones((num_particles, z_dim), requires_grad=False)
    x = torch.ones((num_hops, num_particles, x_dim), requires_grad=False)
    w = torch.ones((num_hops, num_particles), requires_grad=False)
    w_naive = torch.ones((num_hops, num_particles), requires_grad=False)

    # Generate initial particles
    count = 0
    for p in range(num_particles):
        z_prev = pyro.sample("init_z", z1_dist)
        z[p, :], x[0, p, :] = dmm.get_sample(z_prev, p)
        num_same = torch.sum(
            x[0, p, :].type(torch.long) == piano_roll_gt[0]).item()
        if num_same == 88:
            count += 1
    print("Got {} correct samples in step {}".format(count, 0))
    count = 0

    ## Calculate initial weights
    ## Uniform
    # w[0,:] = 1.0/w.shape[1]

    ## Use calculates obs_probs (from DNN or elsewhere)
    for p in range(num_particles):
        prob = calc_obs_probs(obs_probs[0, :, :], x[0, p, :])
        w[0, p] *= prob
        w_naive[0, p] *= prob

    # Normalize weights
    w[0, :] = normalize_weights(w[0, :])
    w_naive[0, :] = normalize_weights(w_naive[0, :])

    ## Main Particle Filtering Loop
    good_samples = np.zeros(num_hops)
    for f in range(1, num_hops):
        ## Sample new particles
        z_vals, z_probs = particles_to_dist(z, w[f - 1, :])
        for p in range(num_particles):
            idx = discrete_sample(z_probs)
            z[p, :], x[f, p, :] = dmm.get_sample(z_vals[idx],
                                                 p + f * num_particles)
            num_same = torch.sum(
                x[f, p, :].type(torch.long) == piano_roll_gt[f]).item()
            if num_same == 88:
                count += 1

            ## Calculate Weights -- probably bring this into loop above
            #- Primitive observation-free model of observations (more notes correct, higher prob)
            sx_prob = ((num_same - 78) / 10)**2
            w_naive[f, p] *= sx_prob  #*xz_prob
            #- Use Observation probabilities
            sx_prob = calc_obs_probs(obs_probs[f, :, :], x[f, p, :])
            w[f, p] *= sx_prob  #*xz_prob
            # Calculate probability of x given z
            # xz_dist = dist.Bernoulli(dmm.emitter(z[p,:]))
            # xz_prob = torch.exp(torch.sum(xz_dist.log_prob(x[f,p,:]))).item()

        # Report number of samples that corresponded with ground truth
        print("Got {} correct samples in step {} \t".format(count, f), end='')
        good_samples[f] = count
        count = 0

        ## Normalize
        w[f, :] = w[f, :].pow(0.25)
        w[f, :] = normalize_weights(w[f, :])
        w_naive[f, :] = normalize_weights(w_naive[f, :])
        # plt.plot(w[f,:].numpy())
        # plt.plot(w_naive[f,:].numpy())
        # plt.show()
        print("\tDone!")

    # Now pull out the most probable path
    piano_roll_dist = np.zeros((num_hops, 88))

    if condense:
        print("\"Condensing\" final distribution")
        w_c = []
        x_c = []
        for f in range(num_hops):
            w_condensed, x_condensed = make_final_dist(w[f, :], x[f, :, :])
            w_c.append(w_condensed)
            x_c.append(x_condensed)
            print("{} unique samples in step {}/{}".format(
                len(w_condensed), f + 1, num_hops))
            piano_roll_dist[f, :] = np.sum(x_condensed *
                                           w_condensed[:, np.newaxis],
                                           axis=0)
        ax_dist.imshow(np.flip(np.transpose(piano_roll_dist), axis=0))

    ## Most probable path by picking highest weighted particle
    piano_roll_estimated = np.zeros((num_hops, 88))
    piano_roll_estimated_c = np.zeros((num_hops, 88))
    for f in range(num_hops):
        ## just picking highest weight
        idx = np.argmax(w[f, :].numpy())
        piano_roll_estimated[f, :] = x[f, idx, :].numpy()
        ## picking from highest condensed weight
        if condense:
            idx = np.argmax(w_c[f])
            piano_roll_estimated_c[f, :] = x_c[f][idx, :]
    ax_estimated.imshow(np.flip(np.transpose(piano_roll_estimated), axis=0))
    ax_estimated.set_yticks(np.arange(1, 89)[::10])
    ax_estimated.set_yticklabels(np.arange(88, 0, -1)[::10])
    if condense:
        ax_estimated_c.imshow(
            np.flip(np.transpose(piano_roll_estimated_c), axis=0))

    ## Calculate and report precision and recall
    p, r, f = precision_recall_f(piano_roll_gt.numpy(), piano_roll_estimated)
    print("Precision (regular): \t", p)
    print("Recall (regular):    \t", r)
    print("F-metric (regular):  \t", f)

    ## Check how often the correct sample was chosen when available
    gt = piano_roll_gt.numpy()
    num_available = 0
    num_chosen = 0
    num_available = 0
    num_chosen_c = 0
    for f in range(num_hops):
        if good_samples[f] > 0:
            num_available += 1
            if np.array_equal(gt[f, :], piano_roll_estimated[f, :]):
                num_chosen += 1
            if condense:
                if np.array_equal(gt[f, :], piano_roll_estimated_c[f, :]):
                    num_chosen_c += 1
    print("Correct select rate (normal)   : {}".format(num_chosen /
                                                       num_available))
    if condense:
        print("Correct select rate (condensed): {}".format(num_chosen_c /
                                                           num_available))

    ## Make audio for estimate piano roll
    piano_roll_estimated_full = np.zeros((128, seq_len), dtype=int)
    piano_roll_estimated_full[
        20:108, :] += piano_roll_estimated.transpose().astype(int)
    piano_roll_estimated_full *= 64
    print("Sythensizing audio for input...", end="")
    with suppress_stdout():
        pm = piano_roll_to_pretty_midi(piano_roll_estimated_full.astype(float),
                                       fs=2,
                                       program=52)
        audio = pm.fluidsynth(fs=fs_aud,
                              sf2_path='/usr/share/soundfonts/FluidR3_GM.sf2')
    print("done.")
    wav.write("test_estimated_out.wav", fs_aud, audio)
    # ax_estimated.imshow(np.flip(piano_roll_estimated_full, axis=0))

    ## Sample a random sequence starting at the same initial latent state
    # x_vals = []
    # z_vals = []
    # piano_roll_sampled = np.zeros((seq_len, 88))

    # for k in range(seq_len):
    #     z_new, x_new = dmm.get_sample(z_prev, k)
    #     x_vals.append(x_new)
    #     z_vals.append(z_new)
    #     piano_roll_sampled[k,:] = x_new.data.numpy()
    #     z_prev = z_new
    #
    # # Get MIDI from piano from sampled roll
    # piano_roll_sampled_full = np.zeros((128, seq_len), dtype=int)
    # piano_roll_sampled_full[20:108,:] += piano_roll_sampled.transpose().astype(int)
    # piano_roll_sampled_full *= 64
    # print("Sythensizing audio for input...", end="")
    # with suppress_stdout():
    #     pm = piano_roll_to_pretty_midi(piano_roll_sampled_full.astype(float), fs=1, program=52)
    #     audio = pm.fluidsynth(fs=fs_aud,
    #                           sf2_path='/usr/share/soundfonts/FluidR3_GM.sf2')
    # print('done.')
    # wav.write("test_sampled_out.wav", fs_aud, audio)
    # ax_sampled.imshow(np.flip(np.transpose(piano_roll_sampled), axis=0))
    # plt.tight_layout()
    plt.show()
Example #5
0
def main(args):
    # setup logging
    log = get_logger(args.log)
    log(args)

    data = poly.load_data(poly.JSB_CHORALES)
    training_seq_lengths = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']
    test_seq_lengths = data['test']['sequence_lengths']
    test_data_sequences = data['test']['sequences']
    val_seq_lengths = data['valid']['sequence_lengths']
    val_data_sequences = data['valid']['sequences']
    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(torch.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    log("N_train_data: %d     avg. training seq. length: %.2f    N_mini_batches: %d"
        % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches))

    # how often we do validation/test evaluation during training
    val_test_frequency = 5
    # the number of samples we use to do the evaluation
    n_eval_samples = 1

    # package repeated copies of val/test data for faster evaluation
    # (i.e. set us up for vectorization)
    def rep(x):
        rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:]
        repeat_dims = [1] * len(x.size())
        repeat_dims[0] = n_eval_samples
        return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose(
            1, 0).reshape(rep_shape)

    # get the validation/test data ready for the dmm: pack into sequences, etc.
    val_seq_lengths = rep(val_seq_lengths)
    test_seq_lengths = rep(test_seq_lengths)
    val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * val_data_sequences.shape[0]),
        rep(val_data_sequences),
        val_seq_lengths,
        cuda=args.cuda)
    test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * test_data_sequences.shape[0]),
        rep(test_data_sequences),
        test_seq_lengths,
        cuda=args.cuda)

    # instantiate the dmm
    dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, use_cuda=args.cuda)

    # prepare a mini-batch and take a gradient step to minimize -elbo
    def process_minibatch(epoch, which_mini_batch, shuffled_indices):
        if args.annealing_epochs > 0 and epoch < args.annealing_epochs:
            # compute the KL annealing factor approriate for the current mini-batch in the current epoch
            min_af = args.minimum_annealing_factor
            annealing_factor = min_af + (1.0 - min_af) * \
                               (float(which_mini_batch + epoch * N_mini_batches + 1) /
                                float(args.annealing_epochs * N_mini_batches))
        else:
            # by default the KL annealing factor is unity
            annealing_factor = 1.0

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size,
                                 N_train_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]
        # grab a fully prepped mini-batch using the helper function in the data loader
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, training_data_sequences,
                                  training_seq_lengths, cuda=args.cuda)
        # do an actual gradient step
        loss, loss_AT = dmm.train_ae(mini_batch, mini_batch_reversed,
                                     mini_batch_seq_lengths, annealing_factor)
        # keep track of the training loss
        return loss

    # if checkpoint files provided, load model and optimizer states from disk before we start training
    if args.load_opt != '' and args.load_model != '':
        load_checkpoint(dmm, log)

    #################
    # TRAINING LOOP #
    #################
    times = [time.time()]
    for epoch in range(args.num_epochs):
        # if specified, save model and optimizer states to disk every checkpoint_freq epochs
        if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0:
            save_checkpoint(dmm, log)

        # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch
        epoch_nll = 0.0
        # prepare mini-batch subsampling indices for this epoch
        shuffled_indices = torch.randperm(N_train_data)

        # process each mini-batch; this is where we take gradient steps
        for which_mini_batch in range(N_mini_batches):
            epoch_nll += process_minibatch(epoch, which_mini_batch,
                                           shuffled_indices)

        # report training diagnostics
        times.append(time.time())
        epoch_time = times[-1] - times[-2]
        log("[training epoch %04d]  %.4f \t\t\t\t(dt = %.3f sec)" %
            (epoch, epoch_nll / N_train_time_slices, epoch_time))

        # do evaluation on test and validation data and report results
        if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0:
            # sample = dmm.generate(n=n_eval_samples, T_max=torch.max(training_seq_lengths).item())
            val_loss = dmm.build_loss(val_batch, val_batch_reversed,
                                      val_seq_lengths)
            test_loss = dmm.build_loss(test_batch, test_batch_reversed,
                                       test_seq_lengths)
            log("[val/test epoch %04d]  %.4f  %.4f" %
                (epoch, val_loss[0], test_loss[0]))