Example #1
0
def vae_generate(gui):
    live_instrument = gui.live_instrument
    device = gui.device
    model = gui.model.to(device)
    dials = gui.dials
    bars = gui.slider_bars.value()

    gaussian = torch.distributions.Normal(torch.zeros(100), torch.ones(100))
    with torch.no_grad():
        samples = []
        for i in range(bars):
            samples.append(gaussian.sample())
        sample = torch.Tensor(bars,100).to(device)
        torch.stack(samples, out=sample, dim=0)
        recon = model.decoder(sample.to(device))
        recon = torch.softmax(recon, dim=3).squeeze(1)
        # recon /= np.max(np.abs(recon))
        generated = recon[0]
        if bars > 1:
            for r in recon[1:]:
                generated = torch.cat((generated, r), dim=0)
        generated[generated < (1-gui.slider_temperature.value()/100)] = 0
        generated = generated.cpu().numpy()
        generated = debinarizeMidi(generated, prediction=False)
        generated = addCuttedOctaves(generated)
        smoother = NoteSmoother(generated, threshold=1)
        generated = smoother.smooth()
        live_instrument.computer_play(prediction=generated)
    gui.is_running = False
def sample(model, temperature=0.5, smooth_threshold=0):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if model.train():
        model.eval()

    gaussian = Normal(torch.zeros(100), torch.ones(100))
    # print(gaussian.sample())

    with torch.no_grad():
        sample = gaussian.sample()
        recon = model.decoder(sample.unsqueeze(0).to(device))
        recon = torch.softmax(recon, dim=3)
        recon = recon.squeeze(0).squeeze(0).cpu().numpy()
        # recon /= np.max(np.abs(recon))
        recon[recon < (1 - temperature)] = 0
        recon = debinarizeMidi(recon, prediction=False)
        recon = addCuttedOctaves(recon)
        if smooth_threshold:
            smoother = NoteSmoother(recon, threshold=smooth_threshold)
            recon = smoother.smooth()

        pianorollMatrixToTempMidi(recon,
                                  prediction=True,
                                  show=True,
                                  showPlayer=False,
                                  autoplay=True)
def reconstruct(file_path,
                model,
                start_bar,
                end_bar,
                temperature=0.5,
                smooth_threshold=0):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if model.train():
        model.eval()

    with torch.no_grad():
        sample_np = getSlicedPianorollMatrixNp(file_path)
        sample_np = transposeNotesHigherLower(sample_np)
        sample_np = cutOctaves(sample_np)
        sample_np = sample_np[start_bar:end_bar]
        sample = torch.from_numpy(sample_np).float()
        recon, embed, logvar = model(sample.view(-1, 1, 96, 60).to(device))
        recon = torch.softmax(recon, dim=3)
        recon = recon.squeeze(1).cpu().numpy()
        # recon /= np.abs(np.max(recon))
        recon[recon < (1 - temperature)] = 0

        sample_play = debinarizeMidi(sample_np, prediction=False)
        sample_play = addCuttedOctaves(sample_play)
        recon = debinarizeMidi(recon, prediction=True)
        recon = addCuttedOctaves(recon)

        recon_out = recon[0]
        sample_out = sample_play[0]
        if recon.shape[0] > 1:
            for i in range(recon.shape[0] - 1):
                sample_out = np.concatenate((sample_out, sample_play[i + 1]),
                                            axis=0)
                recon_out = np.concatenate((recon_out, recon[i + 1]), axis=0)

    # plot with pypianoroll
    sample_plot = ppr.Track(sample_out)
    ppr.plot(sample_plot)
    recon_plot = ppr.Track(recon_out)
    ppr.plot(recon_plot)
    # smooth output
    smoother = NoteSmoother(recon_out, threshold=smooth_threshold)
    smoothed_seq = smoother.smooth()
    smoother_seq_plot = ppr.Track(smoothed_seq)
    ppr.plot(smoother_seq_plot)
Example #4
0
def vae_main(live_instrument, model, args):
    # reset live input clock
    print("\nUser input\n")
    live_instrument.reset_sequence()
    live_instrument.reset_clock()
    while True:
        status_played_notes = live_instrument.clock()
        if status_played_notes:
            sequence = live_instrument.parse_to_matrix()
            live_instrument.reset_sequence()
            break

    # send live recorded sequence through model and get improvisation
    with torch.no_grad():
        sample = np.array(np.split(sequence, args.bars))

        # prepare sample for input
        sample = cutOctaves(sample)
        sample = torch.from_numpy(sample).float().to(device)
        sample = torch.unsqueeze(sample, 1)

        # model
        mu, logvar = model.encoder(sample)

        # TODO reparameterize to get new sequences here with GUI??

        #reconstruction, soon ~prediction
        pred = model.decoder(mu)

        # reorder prediction
        pred = pred.squeeze(1)
        prediction = pred[0]

        # TODO TEMP for more sequences
        if pred.size(0) > 1:
            for p in pred[1:]:
                prediction = torch.cat((prediction, p), dim=0)

        prediction = prediction.cpu().numpy()
        # normalize predictions
        prediction /= np.abs(np.max(prediction))

        # check midi activations to include rests
        prediction[prediction < (1 - args.temperature)] = 0
        prediction = debinarizeMidi(prediction, prediction=True)
        prediction = addCuttedOctaves(prediction)

        # play predicted sequence note by note
        print("\nPrediction\n")
        live_instrument.computer_play(prediction=prediction)

    live_instrument.reset_sequence()
Example #5
0
    def embedding_to_midi(self, embedding, out_path='./utils/midi_files/test.mid', temperature=0.5, smooth_threshold=0):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        if self.model.train():
            self.model.eval()

        with torch.no_grad():
            recon = self.model.decoder(embedding.to(device))
            recon = torch.softmax(recon, dim=3)
            recon = recon.squeeze(0).squeeze(0).cpu().numpy()
        
            recon[recon < (1-temperature)] = 0
            recon = debinarizeMidi(recon, prediction=False)
            recon = addCuttedOctaves(recon)

            if smooth_threshold:
                smoother = NoteSmoother(recon, threshold=smooth_threshold)
                recon = smoother.smooth()

            pianorollMatrixToTempMidi(recon, path=out_path, prediction=True, show=False,
                showPlayer=False, autoplay=False)
def interpolate(sample1_path,
                sample2_path,
                model,
                sample1_bar=0,
                sample2_bar=0,
                temperature=0.5,
                smooth_threshold=0,
                play_loud=False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if model.train():
        model.eval()

    with torch.no_grad():
        sample1 = getSlicedPianorollMatrixNp(sample1_path)
        sample1 = transposeNotesHigherLower(sample1)
        sample1 = cutOctaves(sample1[sample1_bar])
        sample2 = getSlicedPianorollMatrixNp(sample2_path)
        sample2 = transposeNotesHigherLower(sample2)
        sample2 = cutOctaves(sample2[sample2_bar])

        #prepare for input
        sample1 = torch.from_numpy(sample1.reshape(1, 1, 96,
                                                   60)).float().to(device)
        sample2 = torch.from_numpy(sample2.reshape(1, 1, 96,
                                                   60)).float().to(device)

        # embed both sequences
        embed1, _ = model.encoder(sample1)
        embed2, _ = model.encoder(sample2)

        # for hamming distance
        recon1 = model.decoder(embed1)
        recon1 = torch.softmax(recon1, dim=3)
        recon1 = recon1.squeeze(0).squeeze(0).cpu().numpy()
        # recon1 /= np.max(np.abs(recon1))
        recon1[recon1 < (1 - temperature)] = 0
        recon1 = debinarizeMidi(recon1, prediction=False)
        recon1 = addCuttedOctaves(recon1)
        recon1[recon1 > 0] = 1
        hamming1 = recon1.flatten()

        recon2 = model.decoder(embed2)
        recon2 = torch.softmax(recon2, dim=3)
        recon2 = recon2.squeeze(0).squeeze(0).cpu().numpy()
        # recon2 /= np.max(np.abs(recon2))
        recon2[recon2 < (1 - temperature)] = 0
        recon2 = debinarizeMidi(recon2, prediction=False)
        recon2 = addCuttedOctaves(recon2)
        recon2[recon2 > 0] = 1
        hamming2 = recon2.flatten()

        hamming_dists1 = []
        hamming_dists2 = []

        for i, a in enumerate(range(0, 11)):
            alpha = a / 10.
            c = (1. - alpha) * embed1 + alpha * embed2

            # decode current interpolation
            recon = model.decoder(c)
            recon = torch.softmax(recon, dim=3)
            recon = recon.squeeze(0).squeeze(0).cpu().numpy()
            # recon /= np.max(np.abs(recon))
            recon[recon < (1 - temperature)] = 0
            recon = debinarizeMidi(recon, prediction=False)
            recon = addCuttedOctaves(recon)
            if smooth_threshold:
                smoother = NoteSmoother(recon, threshold=smooth_threshold)
                recon = smoother.smooth()
            #for current hamming
            recon_hamm = recon.flatten()
            recon_hamm[recon_hamm > 0] = 1
            current_hamming1 = hamming(hamming1, recon_hamm)
            current_hamming2 = hamming(hamming2, recon_hamm)
            hamming_dists1.append(current_hamming1)
            hamming_dists2.append(current_hamming2)

            # plot piano roll
            if i == 0:
                recon_plot = recon
            else:
                recon_plot = np.concatenate((recon_plot, recon), axis=0)

            print("alpha = {}".format(alpha))
            print("Hamming distance to sequence 1 is {}".format(
                current_hamming1))
            print("Hamming distance to sequence 2 is {}".format(
                current_hamming2))
            if play_loud:
                pianorollMatrixToTempMidi(recon,
                                          prediction=True,
                                          show=True,
                                          showPlayer=False,
                                          autoplay=True)

        alphas = np.arange(0, 1.1, 0.1)
        fig, ax = plt.subplots()
        ax.plot(alphas, hamming_dists1)
        ax.plot(alphas, hamming_dists2)
        ax.grid()

        fig2, ax2 = plt.subplots()
        # recon_plot = ppr.Track(recon_plot)
        downbeats = [i * 96 for i in range(11)]
        # recon_plot.plot(ax, downbeats=downbeats)
        ppr.plot_pianoroll(ax2, recon_plot, downbeats=downbeats)
        plt.show()
Example #7
0
def vae_interact(gui):
    live_instrument = gui.live_instrument
    device = gui.device
    model = gui.model.to(device)
    dials = gui.dials
    while True:
        print("\nUser input\n")
        # reset live input clock and prerecorded sequences
        live_instrument.reset_sequence()
        live_instrument.reset_clock()
        while True:
            status_played_notes = live_instrument.clock()
            if status_played_notes:
                sequence = live_instrument.parse_to_matrix()
                live_instrument.reset_sequence()
                break
            if not gui.is_running:
                break
        if not gui.is_running:
            break

        # send live recorded sequence through model and get response
        with torch.no_grad():
            # prepare sample for input
            sample = np.array(np.split(sequence, live_instrument.bars))
            sample = cutOctaves(sample)
            sample = torch.from_numpy(sample).float().to(device)
            sample = torch.unsqueeze(sample,1)

            # encode
            mu, logvar = model.encoder(sample)

            # reparameterize with variance
            dial_vals = []
            for dial in dials:
                dial_vals.append(dial.value())
            dial_tensor = (torch.FloatTensor(dial_vals)/100.).to(device)
            new = mu + (dial_tensor * 0.5 * logvar.exp())
            pred = model.decoder(new).squeeze(1)

            # for more than 1 sequence
            prediction = pred[0]
            if pred.size(0) > 1:
                for p in pred[1:]:
                    prediction = torch.cat((prediction, p), dim=0)

            # back to cpu and normalize
            prediction = prediction.cpu().numpy()
            prediction /= np.abs(np.max(prediction))

            # check midi activations to include rests
            prediction[prediction < (1 - gui.slider_temperature.value()/100.)] = 0
            prediction = debinarizeMidi(prediction, prediction=True)
            prediction = addCuttedOctaves(prediction)
            smoother = NoteSmoother(prediction, threshold=2)
            prediction = smoother.smooth()

            # sent to robot
            if gui.chx_simulate_robot.isChecked():
                print("\nPublisher\n")
                note_msg = Int32MultiArray()
                live_instrument.human = False
                live_instrument.reset_clock()
                play_tick = -1
                old_midi_on = np.zeros(1)
                played_notes = []
                while True:
                    done = live_instrument.computer_clock()
                    if live_instrument.current_tick > play_tick:
                        play_tick = live_instrument.current_tick
                        midi_on = np.argwhere(prediction[play_tick] > 0)
                        if midi_on.any():
                            for note in midi_on[0]:
                                if note not in old_midi_on:
                                    current_vel = int(prediction[live_instrument.current_tick,note])
                                    mido_msg = mido.Message('note_on', note=note, velocity=current_vel)
                                    note_msg.data = mido_msg.bytes()
                                    gui.midi_publisher.publish(note_msg)
                                    played_notes.append(note)
                        else:
                            for note in played_notes:
                                # self.out_port.send(mido.Message('note_off',
                                #                             note=note))#, velocity=100))
                                played_notes.pop(0)

                        if old_midi_on.any():
                            for note in old_midi_on[0]:
                                if note not in midi_on:
                                    # self.out_port.send(mido.Message('note_off', note=note))
                                    continue
                        old_midi_on = midi_on

                    if done:
                        live_instrument.human = True
                        live_instrument.reset_clock()
                        break
            # or play in software
            else:
                print("\nPrediction\n")
                live_instrument.computer_play(prediction=prediction)

        live_instrument.reset_sequence()
        if not gui.is_running:
            break
Example #8
0
def vae_endless(gui):
    live_instrument = gui.live_instrument
    device = gui.device
    model = gui.model.to(device)
    dials = gui.dials
    print("\nUser input\n")
    # reset live input clock and prerecorded sequences
    live_instrument.reset_sequence()
    live_instrument.reset_clock()
    while True:
        status_played_notes = live_instrument.clock()
        if status_played_notes:
            sequence = live_instrument.parse_to_matrix()
            live_instrument.reset_sequence()
            break
        if not gui.is_running:
            break

    while True:
        # send live recorded sequence through model and get response
        with torch.no_grad():
            # prepare sample for input
            sample = np.array(np.split(sequence, live_instrument.bars))
            sample = cutOctaves(sample)
            sample = torch.from_numpy(sample).float().to(device)
            sample = torch.unsqueeze(sample,1)

            # encode
            mu, logvar = model.encoder(sample)

            # reparameterize with variance
            dial_vals = []
            for dial in dials:
                dial_vals.append(dial.value())
            dial_tensor = torch.FloatTensor(dial_vals)/100.
            # print(dial_tensor)
            new = mu + (dial_tensor * 0.5 * logvar.exp())
            pred = model.decoder(new).squeeze(1)

            # for more than 1 sequence
            prediction = pred[0]
            if pred.size(0) > 1:
                for p in pred[1:]:
                    prediction = torch.cat((prediction, p), dim=0)

            # back to cpu and normalize
            prediction = prediction.cpu().numpy()
            prediction /= np.abs(np.max(prediction))

            # check midi activations to include rests
            prediction[prediction < (1 - gui.slider_temperature.value()/100.)] = 0
            prediction = debinarizeMidi(prediction, prediction=True)
            prediction = addCuttedOctaves(prediction)
            smoother = NoteSmoother(prediction, threshold=2)
            prediction = smoother.smooth()

            # play predicted sequence note by note
            print("\nPrediction\n")
            live_instrument.computer_play(prediction=prediction)

        live_instrument.reset_sequence()
        sequence = prediction
        if not gui.is_running:
            break