Esempio n. 1
0
def main(_):
    # REVIEW josephz: This paradigm was copied from inference-hack.py
    # initialize_globals()

    sample_dir = "sample"
    # sample_names = ["new_test"]
    sample_names = ["rolling_in_the_deep"]
    post_processor = PostProcessor()
    post_processor.load_weights("weights.h5")
    # sample_names = ["perfect_features"]
    # sample_names = ["rolling_in_the_one_more_time"]
    for sample_name in sample_names:
        console.h1("Processing %s" % sample_name)
        console.time("total processing for " + sample_name)
        sample_path = sample_dir + "/" + sample_name

        style_path = sample_path + "/style.mp3"
        content_path = sample_path + "/content.mp3"
        stylized_img_path = sample_path + "/stylized.png"
        stylized_img_raw_path = sample_path + "/stylized_raw.png"
        stylized_audio_path = sample_path + "/stylized.mp3"
        stylized_audio_raw_path = sample_path + "/stylized_raw.mp3"

        # Read style audio to spectrograms.
        style_audio, style_sample_rate = conversion.file_to_audio(style_path)
        style_img, style_phase = conversion.audio_to_spectrogram(
            style_audio, fft_window_size=1536)

        # Read content audio to spectrograms.
        content_audio, content_sample_rate = conversion.file_to_audio(
            content_path)
        content_img, content_phase = conversion.audio_to_spectrogram(
            content_audio, fft_window_size=1536)
        stylized_img_raw, stylized_img = stylize(content_img, style_img,
                                                 content_phase, style_phase,
                                                 content_path, style_path,
                                                 post_processor)

        # Save raw stylized spectrogram and audio.
        stylized_audio_raw = conversion.amplitude_to_audio(
            stylized_img_raw,
            fft_window_size=1536,
            phase_iterations=15,
            phase=content_phase)
        conversion.image_to_file(stylized_img_raw, stylized_img_raw_path)
        conversion.audio_to_file(stylized_audio_raw, stylized_audio_raw_path)

        # Save post-processed stylized spectrogram and audio.
        stylized_audio = conversion.amplitude_to_audio(stylized_img,
                                                       fft_window_size=1536,
                                                       phase_iterations=15,
                                                       phase=content_phase)
        # np.save("stylized_img.npy", stylized_img)
        # np.save("content_phase.npy", content_phase)
        conversion.image_to_file(stylized_img, stylized_img_path)
        conversion.audio_to_file(stylized_audio, stylized_audio_path)

        console.timeEnd("total processing for " + sample_name)
        console.info("Finished processing %s; saved to %s" %
                     (sample_name, stylized_audio_path))
Esempio n. 2
0
#!/usr/bin/env python
import conversion
import console
import numpy as np
from post_processor import PostProcessor

post_processor = PostProcessor()
post_processor.load_weights("weights.h5")

stylized = conversion.file_to_image("sample/rolling_in_the_deep/stylized.png")
content_harmonics = conversion.file_to_image("sample/rolling_in_the_deep/content.mp3.harmonics.png")
content_sibilants = conversion.file_to_image("sample/rolling_in_the_deep/content.mp3.harmonics.png")

stylized = post_processor.predict_unstacked(amplitude=stylized, harmonics=content_harmonics, sibilants=content_sibilants)

conversion.image_to_file(stylized, "/Users/ollin/Desktop/boop.png")
#!/usr/bin/env python
import console
import conversion
import numpy as np
import sys
import cv2

# hacky, will replace with argparse later
img_file_path = sys.argv[1]
console.log("sawifying", img_file_path)

spectrogram = conversion.file_to_image(img_file_path)
# non-maximum suppression since im lazy and don't wanna interpolate
output = np.zeros(spectrogram.shape)

# slow, will replace with numpy later
for y in range(64):
    console.progressBar((y + 1) / 64)
    for t in range(spectrogram.shape[1]):
        if spectrogram[y][t] > 0.1:
            harmonic = 1
            while harmonic * (y + 1) < spectrogram.shape[0]:
                center = harmonic * y
                end = int(harmonic * (y + 1))
                for i in range(center, end):
                    output[i][t] = spectrogram[y][t] * 1 / harmonic
                harmonic += 1

conversion.image_to_file(output, img_file_path + ".saw.png")
Esempio n. 4
0
 def denoise_from_file(self, file_path):
     noisy = np.load(file_path)
     denoised = self.predict(noisy)
     conversion.image_to_file(denoised, file_path + ".denoised.png")
    "../data/aligned/one_last_time/one_last_time_cover_aligned_30s.mp3",
    "../data/aligned/one_last_time/one_last_time_original_30s.mp3"
]
#test_files = ["sample/rolling_in_the_deep/style.mp3"]

for f in test_files:
    console.time("preprocessing")
    console.log("starting", f)
    audio, sample_rate = conversion.file_to_audio(f)
    amplitude, phase = conversion.audio_to_spectrogram(audio,
                                                       fft_window_size=1536)
    console.timeEnd("preprocessing")
    console.time("extracting fundamental")
    fundamental_mask = sst.extract_fundamental(amplitude)
    console.timeEnd("extracting fundamental")
    conversion.image_to_file(fundamental_mask, f + ".fundamental.png")

    console.time("fundamental to harmonics")
    fundamental_freqs, fundamental_amps = sst.extract_fundamental_freqs_amps(
        fundamental_mask, amplitude)
    harmonics = sst.fundamental_to_harmonics(fundamental_freqs,
                                             fundamental_amps, amplitude)
    console.timeEnd("fundamental to harmonics")
    conversion.image_to_file(harmonics, f + ".harmonics.png")

    # pitch normalization haha
    if True:
        pitch_normalized_amp, pitch_normalized_phase = sst.normalize_pitch(
            amplitude, phase, fundamental_freqs, fundamental_amps)
        conversion.image_to_file(pitch_normalized_amp,
                                 f + ".pitch_normalized.png")
 def denoise_from_file(self, file_path):
     noisy = np.load(file_path)
     # lmao
     denoised = self.predict_unstacked(noisy[:,:,0], noisy[:,:,1], noisy[:,:,2])
     conversion.image_to_file(denoised, file_path + ".denoised.png")
Esempio n. 7
0
    style_audio, fft_window_size=1536)
console.timeEnd("preprocessing")

stylized_amplitude = np.zeros(content_amplitude.shape)

num_freq, num_timesteps, _ = content_amplitude.shape
num_timesteps = min(num_timesteps, style_amplitude.shape[1])

# Preprocessing - compute fundamentals and harmonics
console.time("super resolution")
content_fundamental_mask = sst.extract_fundamental(content_amplitude)
content_fundamental_freqs, content_fundamental_amps = sst.extract_fundamental_freqs_amps(
    content_fundamental_mask, content_amplitude)
content_sibilants = sst.get_sibilants(content_amplitude,
                                      content_fundamental_amps)
conversion.image_to_file(content_sibilants,
                         test_content_file + ".sibilants.jpg")
console.log("finished sibilants")
content_harmonics = sst.fundamental_to_harmonics(content_fundamental_freqs,
                                                 content_fundamental_amps,
                                                 content_amplitude)
content_harmonics = dilation(content_harmonics)

content_sibilants *= content_amplitude.max() / content_sibilants.max()
console.stats(content_sibilants, "content sibilants")
content_harmonics *= content_amplitude.max() / content_harmonics.max()
console.stats(content_harmonics, "content harmonics")
console.timeEnd("super resolution")

console.time("frequency weighting")
# ELEMENT 1: Frequency weighting
for t in range(num_timesteps):