Esempio n. 1
0
 def make_plots(self, waveform, w, M, N, H, sr, filepath=None):
     if filepath:
         if not os.path.exists(filepath):
             os.makedirs(filepath)
     self.mX, self.pX = STFT.stftAnal(waveform, w, N, H)
     plotting.spectogram_plot(self.mX,
                              self.pX,
                              M,
                              N,
                              H,
                              sr,
                              show=False,
                              filepath=filepath +
                              'model_generation_spectogram')
Esempio n. 2
0
def main(args):
    # Verify model folder
    if not os.path.exists(args.model_folder):
        raise Exception("Model folder does not exist!")

    # Get network settings
    settings_json_string = '/network_settings.json'
    if args.model_folder[-1] == '/':
        args.model_folder = args.model_folder[:-1]

    if os.path.isfile(args.model_folder):
        model_folder = os.path.join(*args.model_folder.split(sep='/')[:-1])
    else:
        model_folder = args.model_folder

    model_name = model_folder.split(sep='/')[-1]

    with open(model_folder + settings_json_string, 'r') as f:
        network_settings = json.load(f)

    analysis_type = network_settings['analysis_type']
    analysis_settings = network_settings[analysis_type + '_settings']
    M = analysis_settings['M']
    H = analysis_settings['H']
    w = window_dictionary.get(analysis_settings['w'])(M)
    N = analysis_settings['N']
    sr = analysis_settings['sample_rate']

    input_data_shape = (1, 1, network_settings['n_inputs'])  # (num_steps, batch_size, n_inputs)


    with tf.Session() as sess:

        n_hidden = network_settings['n_hidden']  # List of hidden unit sizes

        input_placeholder = tf.placeholder(tf.float32, shape=input_data_shape)
        input = np.zeros(input_data_shape)
        feed_dict = {input_placeholder: input}

        state_placeholders = []
        states = []
        for lstm_layer in range(len(n_hidden)):
            state_placeholders.append((tf.placeholder(tf.float32, shape=(1, n_hidden[lstm_layer]), name='cell'),  # batch_size = 1
                                       tf.placeholder(tf.float32, shape=(1, n_hidden[lstm_layer]), name='hidden')))  # batch_size = 1
            states.append([np.zeros((1, n_hidden[lstm_layer])), np.zeros((1, n_hidden[lstm_layer]))])
            feed_dict[state_placeholders[lstm_layer]] = states[lstm_layer]

        print('n_steps:', network_settings['n_steps'])

        n_outputs = network_settings['n_outputs']
        if len(n_hidden) == 1:
            n_outputs = [n_outputs]
        else:
            n_outputs = n_hidden[1:] + [n_outputs]

        x = input_placeholder
        lstm_states = []  # Keep track of LSTM states
        for i in range(len(n_hidden)):
            lstm = SimpleLSTM(n_hidden[i], scope='LSTM_model/layer_{}'.format(i + 1))
            lstm_output, state = lstm(x, state_placeholders[i])  # lstm_outputs is Tensor of shape (n_steps, batch_size, n_hidden[i])
            lstm_states.append(state)
            lstm_output = tf.unpack(lstm_output)  # Make it into list length n_steps, each entry (batch_size, n_hidden[i])
            dense = Dense(scope="LSTM_model/layer_{}".format(i + 1), size=n_outputs[i],
                                   nonlinearity=tf.sigmoid, initialiser=wbVars_Xavier)
            final_output = dense(lstm_output[0])

        saver = tf.train.Saver()
        # saver.restore(sess, './training/saved_models/2016-11-25T08-54-35/model-20')
        load_saved_model_to_resume_training(saver, sess, model_folder)

        outputs_list = []
        for step in range(network_settings['n_steps']):
            output, *states = sess.run([final_output] + [state for state in lstm_states],
                                      feed_dict=feed_dict)  # states is list of LSTMStateTuple (length num_layers)
            # output is shape (batch_size, n_outputs), but it needs to be (n_steps=1, batch_size, n_outputs)
            output = np.expand_dims(output, axis=0)
            outputs_list.append(output)
            lstm_layer_states= states
            input = output
            # Update feed_dict by giving the new input and states for all layers
            feed_dict[input_placeholder] = input
            for lstm_layer in range(len(n_hidden)):
                feed_dict[state_placeholders[lstm_layer]] = lstm_layer_states[lstm_layer]

        final_outputs = [tf.squeeze(output, [0]) for output in outputs_list]
        final_outputs = tf.pack(final_outputs)  # Make one tensor of rank 2
        print(final_outputs.get_shape())
        final_outputs = tf.transpose(final_outputs,
                                     [1, 0, 2])  # final_outputs has shape (batch_size, n_frames, n_outputs)
        print('after packing and transposing, final_outputs shape: ', final_outputs.get_shape())

        ####################
        # Compare to ground truth (debugging)
        if args.vector_folder is None:
            print("No vector folder was provided, cannot calculate cost for debugging.")
        else:
            loaded, json_vector_settings, analysis_type_check = load_from_dir_root(args.vector_folder)
            assert analysis_type==analysis_type_check

            _, data_dict = setup_training_data(loaded, 1)  # batch_size = 1
            ground_truth = data_dict['output_data']  # (n_frames, batch_size, n_outputs)
            ground_truth = np.transpose(ground_truth, [1, 0, 2])
            print('Network generation: {}'.format(final_outputs.eval()))
            network_mag = final_outputs.eval()[0, :, :257]
            # plt.figure()
            # plt.subplot(3,1,1)
            # plt.plot(network_mag[0, :])
            # plt.subplot(3,1,2)
            # plt.plot(network_mag[1, :])
            # plt.subplot(3,1,3)
            # plt.plot(network_mag[3, :])
            # plt.show()

            print(network_mag[1, 7], network_mag[1, 17], network_mag[1, 32])


            ground_truth_mag = ground_truth[0, :, :257]
            # plt.figure()
            # plt.subplot(3, 1, 1)
            # plt.plot(ground_truth_mag[0, :])
            # plt.subplot(3, 1, 2)
            # plt.plot(ground_truth_mag[1, :])
            # plt.subplot(3, 1, 3)
            # plt.plot(ground_truth_mag[2, :])
            # plt.show()


            print('Data: {}'.format(ground_truth))
            print('Network output min/max: {}, {}'.format(np.min(final_outputs.eval()), np.max(final_outputs.eval())))
            print('Data min/max: {}, {}'.format(np.min(ground_truth), np.max(ground_truth)))
            print(ground_truth.shape)
            print(final_outputs.eval().shape)
            assert ground_truth.shape == final_outputs.eval().shape
            print('Squared error achieved by network: {}'.format(np.sum((ground_truth - final_outputs.eval())**2)
                                                                 / ground_truth.size))

            # mX = final_outputs.eval()[0,:,:257]
            # pX = final_outputs.eval()[0,:,257:]
            #
            # np.save('./mX_model', mX)
            # np.save('./pX_model', pX)
            #
            # asdfasdf
            # phase_range = network_settings['stft_settings']['phase_range']
            # mag_range = network_settings['stft_settings']['mag_range']
            #
            # # Unnormalise
            # mX = mag_range[0] + (mX * (mag_range[1] - mag_range[0]))
            # mX *= sr / 2  # Undo weird rescaling
            # pX = phase_range[0] + (pX * (phase_range[1] - phase_range[0]))
            #
            # print(mX.shape)  # (num_frames, num_freq_bins)
            # # mX = np.transpose(mX)
            # # pX = np.transpose(pX)
            # spectogram_plot(mX, pX, M, N, H, sr, fig_number=None, filepath=None, show=True)
            #
            # reconst = stftSynth(mX, pX, M, H)
            # print(np.max(reconst), np.min(reconst))
            #
            #
            # # soundfile.write('./test_reconst.wav', reconst, sr, format='wav')
            # mX2, pX2 = stftAnal(reconst, w, N, H)
            # print(mX2.shape, pX2.shape)
            # spectogram_plot(mX2, pX2, M, N, H, sr, fig_number=None, filepath=None, show=True)
            # asdfasdfasd


        ####################
        # For testing - just batch size = 1
        result = tf.squeeze(final_outputs, [0]).eval()

    network_output_folder = './generation/network_output/{}/'.format(model_name)
    if not os.path.exists(network_output_folder):
        os.makedirs(network_output_folder)

    if analysis_type == 'sine_model':
        process_output = SineModelOutputProcessingWithActiveTracking(result, network_settings)
        xtfreq, xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input()
        reconstruction = sineModelSynth(xtfreq, xtmag, xtphase, nextbiggestpower2(analysis_settings['M']), H, sr)
    elif analysis_type == 'sine_model_without_active_tracking':  # deprecated
        process_output = SineModelOutputProcessing(result, network_settings)
        xtfreq, xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input()
        reconstruction = sineModelSynth(xtfreq, xtmag, xtphase, nextbiggestpower2(analysis_settings['M']), H, sr)
    elif analysis_type == 'stft':
        process_output = STFTModelOutputProcessing(result, network_settings)
        xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input()
        np.save(network_output_folder + 'xtmag_model', xtmag)
        np.save(network_output_folder + 'xtphase_model', xtphase)
        plot_filepath = './generation/plots/{}-(generated_{})'.format(model_name, STARTED_DATESTRING)
        if not os.path.exists(plot_filepath): os.makedirs(plot_filepath)
        spectogram_plot(xtmag, xtphase, M, N, H, sr, show=False, filepath=plot_filepath + '/network_output_spectogram')
        reconstruction = stftSynth(xtmag, xtphase, M, H)
    else:
        raise Exception('analysis_type not recognised!')

    print('model_name:', model_name)

    #TODO: extract more of these arguments in the class methods
    process_output.make_plots(reconstruction, w, M, N, H, sr,
               filepath='./generation/plots/{}-(generated_{})/'.format(model_name, STARTED_DATESTRING))

    soundfile.write('./generation/wav_output/{}-(generated_{}).wav'.format(model_name, STARTED_DATESTRING),
                    reconstruction, sr, format='wav')
Esempio n. 3
0
def main(args):
    meta_graph = args.model_folder

    model_folder = '/'.join(meta_graph.split('/')[:-1]) + '/'
    model_name = os.path.basename(meta_graph)
    plot_folder = args.plot_folder + model_name
    if not os.path.exists(plot_folder): os.makedirs(plot_folder)

    with open(model_folder + 'network_settings.json', 'r') as f:
        network_settings = json.load(f)

    loaded, json_vector_settings, analysis_type = load_from_dir_root(
        args.vector_folder)
    dataset = DatasetFeed(loaded, 9)
    # for datablock in dataset.data:
    #     print(datablock.shape)

    dataset.set_all_data_blocks_to_max_shape(
        json_vector_settings['mag_normalised_range'][0])
    # data_shape = dataset.max_data_shape

    # n_steps = data_shape[0]
    # n_outputs = data_shape[1]

    #TODO: Retrieve these from a json
    # batch_size = 96
    # latent_dim = 2
    # n_input = n_outputs

    vae = VAE(model_to_restore=meta_graph)
    global_step = (meta_graph.split("/")[-1]).split('-')[-1]

    # HANDLES
    # vae.x_in, vae.z_mean, vae.z_log_sigma,
    # vae.x_reconstructed, vae.z_, vae.x_reconstructed_,
    # vae.cost, vae.global_step, vae.train_op

    ##################### PLOT IN LATENT SPACE #####################

    next_batch = dataset.next_batch()
    next_batch = np.concatenate(tuple([next_batch] * 5),
                                axis=0)  ## Total hack to get this to work

    mus, _ = vae.encode(np.transpose(
        next_batch, [1, 0, 2]))  # (n_steps, batch_size, n_inputs)
    mus = mus[:9, :]

    ys, xs = mus.T

    print('Means of z variable:', mus)

    plt.figure()
    plt.title("round {}: {} in latent space".format(global_step, 'Toms'))
    kwargs = {'alpha': 0.8}

    labels = [0] * 3 + [1] * 3 + [
        2
    ] * 3  # Total hack to label different classes of audio files and mark them on the plot
    #TODO: Store classes in the dataset (maybe with filenames)
    classes = set(labels)
    if classes:
        colormap = plt.cm.rainbow(np.linspace(0, 1, len(classes)))
        kwargs['c'] = [colormap[i] for i in labels]

        # make room for legend
        ax = plt.subplot(111)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
        handles = [
            mpatches.Circle((0, 0), label=class_, color=colormap[i])
            for i, class_ in enumerate(classes)
        ]
        ax.legend(handles=handles,
                  shadow=True,
                  bbox_to_anchor=(1.05, 0.45),
                  fancybox=True,
                  loc='center left')

    plt.scatter(xs, ys, **kwargs)

    # if range_:
    #     plt.xlim(*range_)
    #     plt.ylim(*range_)

    # plt.show()
    title = "latent_space.png"
    plt.savefig(os.path.join(plot_folder, title), bbox_inches="tight")

    ##################### EXPLORE LATENT SPACE #####################

    min_, max_, nx, ny = -4, 4, 5, 5

    # complex number steps act like np.linspace
    # row, col indices (i, j) correspond to graph coords (y, x)
    # rollaxis enables iteration over latent space 2-tuples
    zs = np.rollaxis(np.mgrid[max_:min_:ny * 1j, min_:max_:nx * 1j], 0, 3)

    M = json_vector_settings['M']
    N = json_vector_settings['N']
    H = json_vector_settings['H']
    sr = json_vector_settings['sample_rate']
    analysis_settings = network_settings['stft_settings']

    for zrow in zs:
        # zrow is a matrix, which will be interpreted as (batch_size, 2)
        for z in zrow:
            z = np.expand_dims(z, axis=0)  # TODO: Allow batches for z decoder
            generation = vae.decode(
                z)  # shape (n_steps, batch_size, n_outputs) -> batch_size = 1
            generation = np.squeeze(generation,
                                    axis=1)  # shape (n_steps, n_outputs)
            # Plot and generate audio
            if analysis_type == 'sine_model':
                process_output = SineModelOutputProcessingWithActiveTracking(
                    generation, network_settings)
                xtfreq, xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input(
                )
                reconstruction = sineModelSynth(
                    xtfreq, xtmag, xtphase,
                    nextbiggestpower2(analysis_settings['M']), H, sr)
            elif analysis_type == 'stft':
                process_output = STFTModelOutputProcessing(
                    generation, network_settings)
                xtmag, xtphase = process_output.convert_network_output_to_analysis_model_input(
                )
                np.save(plot_folder + 'xtmag_model', xtmag)
                np.save(plot_folder + 'xtphase_model', xtphase)
                plot_filepath = './generation/plots/VAE/{}-(generated_{})'.format(
                    model_name, STARTED_DATESTRING)
                if not os.path.exists(plot_filepath):
                    os.makedirs(plot_filepath)
                spectogram_plot(xtmag,
                                xtphase,
                                M,
                                N,
                                H,
                                sr,
                                show=False,
                                filepath=plot_filepath +
                                '/network_output_spectogram_{}'.format(z))
                reconstruction = stftSynth(xtmag, xtphase, M, H)
            else:
                raise Exception('analysis_type not recognised!')

            soundfile.write(
                './generation/wav_output/VAE/{}-{}-(generated_{}).wav'.format(
                    model_name, z, STARTED_DATESTRING),
                reconstruction,
                sr,
                format='wav')
Esempio n. 4
0
def main():
    if not os.path.exists(OUTPUT_FOLDER):
        os.makedirs(OUTPUT_FOLDER)

    file_count = 0
    datalist = []

    json_settings_file = OUTPUT_FOLDER + '/sine_model_settings.json'
    json_dict = {
        'N': N,
        'M': M,
        'H': H,
        'w': window,
        'phase_range': [0, 2 * np.pi],
        'max_sines': MAX_N_SINES
    }

    print("Loading files in {}".format(FOLDER_LIST))
    for folder in FOLDER_LIST:
        if not os.path.exists(folder):
            raise InvalidPathError("{} not found".format(folder))
        file_list = os.listdir(folder)
        for audio_file in file_list:
            if audio_file[-3:] not in ['wav', 'aif']:
                print('Skipping {}'.format(audio_file))
                continue
            print("Processing {}...".format(audio_file), end='')
            file, sr = soundfile.read(folder + '/' + audio_file)
            freq_range = [0, sr / 2]
            if sr != SAMPLE_RATE:
                warnings.warn('Sample rate is not 44100Hz')

            json_dict['sample_rate'] = sr
            json_dict['freq_range'] = freq_range

            # All outputs of sineModelAnal are of the shape (numFrames, maxSines)
            xtfreq, xtmag, xtphase = sineModel.sineModelAnal(
                file, sr, w, N, H, THRESHOLD, maxnSines=MAX_N_SINES)

            active_tracks = (xtfreq != 0.0).astype(int)

            # For plotting the spectrogram of the signal
            mX, pX = STFT.stftAnal(file, w, N, H)
            plotting.plot_sineTracks(mX,
                                     pX,
                                     M,
                                     N,
                                     H,
                                     sr,
                                     xtfreq,
                                     show=False,
                                     filepath=PLOT_FOLDER +
                                     '/{}_sinetracks'.format(audio_file[:-4]))
            plotting.spectogram_plot(mX,
                                     pX,
                                     M,
                                     N,
                                     H,
                                     sr,
                                     show=False,
                                     filepath=PLOT_FOLDER +
                                     '/{}'.format(audio_file[:-4]))

            # Process the frequencies, magnitudes and phases to be normalised in the range [0,1]
            xtfreq = xtfreq / freq_range[1]

            #TODO: we might want to calculate this across all of the training data instead of file by file
            #TODO: This will need modifying in the json file as well
            min_xtmag = np.min(xtmag)
            max_xtmag = np.max(
                xtmag[xtmag != 0]
            )  # Recall tracks are separated by zeros - we want to ignore them
            xtmag[
                xtmag ==
                0] = min_xtmag  # Could change this later to have the dB floor lower for the zeros
            xtmag = (xtmag - min_xtmag) / (max_xtmag - min_xtmag)

            xtphase = np.mod(xtphase, 2 * np.pi) / (2 * np.pi
                                                    )  # Between 0 and 1

            json_dict['mag_range'] = [min_xtmag, max_xtmag]

            assert (xtfreq <= 1).all() and (xtfreq >= 0).all()
            assert (xtmag <= 1).all() and (xtmag >= 0).all()
            assert (xtphase <= 1).all() and (xtphase >= 0).all()

            output_path = OUTPUT_FOLDER + '/{}/'.format(file_count)
            if not os.path.exists(output_path):
                os.makedirs(output_path)
            ## Save the numpy arrays separately - couldn't work out how to save and load multiple arrays
            np.save(output_path + 'freq', xtfreq)
            np.save(output_path + 'mag', xtmag)
            np.save(output_path + 'phase', xtphase)
            np.save(output_path + 'active_tracks', active_tracks)

            datalist.append([xtfreq, xtmag, xtphase, active_tracks])
            print('Saved as {}'.format(output_path))
            file_count += 1

    create_json(json_settings_file, json_dict)
Esempio n. 5
0
def main():
    if not os.path.exists(OUTPUT_FOLDER):
        os.makedirs(OUTPUT_FOLDER)
    if not os.path.exists(PLOT_FOLDER):
        os.makedirs(PLOT_FOLDER)

    file_count = 0
    datalist = []

    json_settings_file = OUTPUT_FOLDER + '/stft_settings.json'
    json_dict = {'N': N, 'M': M, 'H': H, 'w': window, 'phase_range': [0, 2 * np.pi]}

    print("Loading files in {}".format(FOLDER_LIST))
    for folder in FOLDER_LIST:
        if not os.path.exists(folder):
            raise InvalidPathError("{} not found".format(folder))
        file_list = os.listdir(folder)
        for audio_file in file_list:
            if audio_file[-3:] not in ['wav', 'aif']:
                print('Skipping {}'.format(audio_file))
                continue
            print("Processing {}...".format(audio_file), end='')
            file, sr = soundfile.read(folder + '/' + audio_file)
            freq_range = [0, sr / 2]
            if sr!=SAMPLE_RATE:
                warnings.warn('Sample rate is not 44100Hz')
            print('File length: ', file.size)

            json_dict['sample_rate'] = sr
            json_dict['freq_range'] = freq_range

            mX, pX = STFT.stftAnal(file, w, N, H)

            mX[mX <= AMPLITUDE_THRESHOLD] = AMPLITUDE_THRESHOLD

            # # For SHORT_TEST:
            # mX = mX[:20, :]
            # pX = pX[:20, :]

            # For plotting the spectrogram of the signal
            plotting.spectogram_plot(mX, pX, M, N, H, sr, show=False, filepath=PLOT_FOLDER + '/{}'.format(audio_file[:-4]))

            # Process the frequencies, magnitudes and phases to be normalised in the range [0,1]
            #TODO: we might want to calculate this across all of the training data instead of file by file
            #TODO: This will need modifying in the json file as well
            min_mX = np.min(mX)
            max_mX = np.max(mX)

            json_dict['mag_range'] = [min_mX, max_mX]

            json_dict['mag_normalised_range'] = mX_norm_range
            json_dict['phase_normalised_range'] = pX_norm_range

            mX = (mX - min_mX) / (max_mX - min_mX) # Between 0 and 1
            mX = (mX * (mX_norm_range[1] - mX_norm_range[0])) + mX_norm_range[0]
            pX = np.mod(pX, 2 * np.pi) / (2 * np.pi)  # Between 0 and 1
            pX = (pX * (pX_norm_range[1] - pX_norm_range[0])) + pX_norm_range[0]

            # Check the data has been normalised correctly
            assert (mX <= mX_norm_range[1]).all() and (mX >= mX_norm_range[0]).all()
            assert (pX <= pX_norm_range[1]).all() and (pX >= pX_norm_range[0]).all()

            output_path = OUTPUT_FOLDER + '/{}/'.format(audio_file[:-4])  # Previously used file_count
            if not os.path.exists(output_path):
                os.makedirs(output_path)
            ## Save the numpy arrays separately - couldn't work out how to save and load multiple arrays
            np.save(output_path + 'mag', mX)
            np.save(output_path + 'phase', pX)

            datalist.append([mX, pX])
            print('Saved as {}'.format(output_path))
            file_count += 1

    create_json(json_settings_file, json_dict)