Example #1
0
def main(_):
    # REVIEW josephz: This paradigm was copied from inference-hack.py
    # initialize_globals()

    sample_dir = "sample"
    # sample_names = ["new_test"]
    sample_names = ["rolling_in_the_deep"]
    post_processor = PostProcessor()
    post_processor.load_weights("weights.h5")
    # sample_names = ["perfect_features"]
    # sample_names = ["rolling_in_the_one_more_time"]
    for sample_name in sample_names:
        console.h1("Processing %s" % sample_name)
        console.time("total processing for " + sample_name)
        sample_path = sample_dir + "/" + sample_name

        style_path = sample_path + "/style.mp3"
        content_path = sample_path + "/content.mp3"
        stylized_img_path = sample_path + "/stylized.png"
        stylized_img_raw_path = sample_path + "/stylized_raw.png"
        stylized_audio_path = sample_path + "/stylized.mp3"
        stylized_audio_raw_path = sample_path + "/stylized_raw.mp3"

        # Read style audio to spectrograms.
        style_audio, style_sample_rate = conversion.file_to_audio(style_path)
        style_img, style_phase = conversion.audio_to_spectrogram(
            style_audio, fft_window_size=1536)

        # Read content audio to spectrograms.
        content_audio, content_sample_rate = conversion.file_to_audio(
            content_path)
        content_img, content_phase = conversion.audio_to_spectrogram(
            content_audio, fft_window_size=1536)
        stylized_img_raw, stylized_img = stylize(content_img, style_img,
                                                 content_phase, style_phase,
                                                 content_path, style_path,
                                                 post_processor)

        # Save raw stylized spectrogram and audio.
        stylized_audio_raw = conversion.amplitude_to_audio(
            stylized_img_raw,
            fft_window_size=1536,
            phase_iterations=15,
            phase=content_phase)
        conversion.image_to_file(stylized_img_raw, stylized_img_raw_path)
        conversion.audio_to_file(stylized_audio_raw, stylized_audio_raw_path)

        # Save post-processed stylized spectrogram and audio.
        stylized_audio = conversion.amplitude_to_audio(stylized_img,
                                                       fft_window_size=1536,
                                                       phase_iterations=15,
                                                       phase=content_phase)
        # np.save("stylized_img.npy", stylized_img)
        # np.save("content_phase.npy", content_phase)
        conversion.image_to_file(stylized_img, stylized_img_path)
        conversion.audio_to_file(stylized_audio, stylized_audio_path)

        console.timeEnd("total processing for " + sample_name)
        console.info("Finished processing %s; saved to %s" %
                     (sample_name, stylized_audio_path))
def main(_):
    initialize_globals()

    # scrape data from folder
    RAW_DATA_DIR = "../data/studio_acapellas"
    PROCESSED_DATA_DIR = "../data/processed"
    # for each one, generate the data using sst methods, and save the data
    for file_name in os.listdir(RAW_DATA_DIR):
        file_path = os.path.join(RAW_DATA_DIR, file_name)
        if file_path.endswith(".mp3"):
            processed_file_name = file_name.replace("mp3", "npy")  # haha
            # todo: rewrite all this using pathlib
            processed_file_path_x = PROCESSED_DATA_DIR + "/x/" + processed_file_name
            processed_file_path_y = PROCESSED_DATA_DIR + "/y/" + processed_file_name
            console.h1("Processing", file_path)
            processed_file_path_style = PROCESSED_DATA_DIR + "/style/" + processed_file_name
            x_arr, y_arr, style_arr = generate_data_arrs(file_path)
            # for debugging just save as images
            console.stats(x_arr, "x_arr")
            console.stats(y_arr, "y_arr")
            console.stats(style_arr, "style_arr")
            #ipdb.set_trace()
            io.imsave(processed_file_path_x + ".jpg", x_arr / x_arr.max())
            io.imsave(processed_file_path_y + ".jpg", y_arr / y_arr.max())
            np.save(processed_file_path_x, x_arr)
            np.save(processed_file_path_y, y_arr)
            np.save(processed_file_path_style, style_arr)
        else:
            console.info("Skipping", file_path)
Example #3
0
def compute_nnf_multiscale(content_features, style_features, iterations=16):
    console.h1(iterations, "Starting multi-scale pass")
    factors = [8, 4, 2, 1]
    assert factors[-1] == 1  # make sure output nnf is right size
    iterations_per_scale = max(iterations // len(factors), 1)
    nnf = None
    num_features, num_content_timesteps = content_features.shape
    num_features, num_style_timesteps = style_features.shape
    for d, downscale_factor in enumerate(factors):
        content_shape_downscaled = (num_features,
                                    num_content_timesteps // downscale_factor)
        if nnf is not None:
            # make the nnf into an image-shaped thingy and then undo it after performing scaling
            old_style_max = num_style_timesteps // factors[d - 1]
            new_style_max = num_style_timesteps // downscale_factor
            # since offsets are in absolute coordinates for now
            # (sigh) you need to scale the nnf values
            # each time you resize the nnf
            # for the *relative* nnf vectors to be preserved
            nnf = (resize(nnf[:, np.newaxis, np.newaxis] / old_style_max,
                          (num_content_timesteps // downscale_factor, 1))[:, 0,
                                                                          0] *
                   new_style_max).astype(np.int64)
        content_features_downscaled = resize(content_features,
                                             content_shape_downscaled)
        style_features_downscaled = resize(
            style_features,
            (num_features, num_style_timesteps // downscale_factor))
        # console.debug("shape of downscaled content features is", content_features_downscaled.shape)
        nnf = compute_nnf(content_features_downscaled,
                          style_features_downscaled,
                          iterations_per_scale,
                          seed_nnf=nnf)
    return nnf
Example #4
0
def handleImage(fileName, args, phase=None):
    console.h1("Reconstructing Audio from Spectrogram")

    spectrogram, sampleRate = loadSpectrogram(fileName)
    audio = spectrogramToAudioFile(spectrogram, fftWindowSize=args.fft, phaseIterations=args.iter)

    sanityCheck, phase = audioFileToSpectrogram(audio, fftWindowSize=args.fft)
    saveSpectrogram(sanityCheck, fileName + fileSuffix("Output Spectrogram", fft=args.fft, iter=args.iter, sampleRate=sampleRate) + ".png")

    saveAudioFile(audio, fileName + fileSuffix("Output", fft=args.fft, iter=args.iter) + ".wav", sampleRate)
Example #5
0
def handleAudio(filePath, args):
    console.h1("Creating Spectrogram")
    INPUT_FILE = filePath
    INPUT_FILENAME = basename(INPUT_FILE)

    console.info("Attempting to read from " + INPUT_FILE)
    audio, sampleRate = loadAudioFile(INPUT_FILE)
    console.info("Max of audio file is " + str(np.max(audio)))
    spectrogram, phase = audioFileToSpectrogram(audio, fftWindowSize=args.fft)
    SPECTROGRAM_FILENAME = INPUT_FILENAME + fileSuffix("Input Spectrogram", fft=args.fft, iter=args.iter, sampleRate=sampleRate) + ".png"

    saveSpectrogram(spectrogram, SPECTROGRAM_FILENAME)

    print()
    console.wait("Saved Spectrogram; press Enter to continue...")
    print()

    handleImage(SPECTROGRAM_FILENAME, args, phase)
def handle_audio(file_path, args):
    console.h1("Creating Spectrogram")
    input_filename = basename(file_path)

    console.info("Attempting to read from " + file_path)
    audio, sample_rate = load_audio_file(file_path)
    console.info("Max of audio file is " + str(np.max(audio)))
    spectrogram = audio_file_to_spectrogram(audio, fft_window_size=args.fft)
    spectrogram_filename = input_filename + \
        file_suffix("Input Spectrogram", fft=args.fft,
                    iter=args.iter, sample_rate=sample_rate) + ".png"

    save_spectrogram(spectrogram, spectrogram_filename)

    print()
    console.wait("Saved Spectrogram; press Enter to continue...")
    print()

    handle_image(spectrogram_filename, args)
def handle_image(filename, args):
    console.h1("Reconstructing Audio from Spectrogram")

    spectrogram, sample_rate = load_spectrogram(filename)
    audio = spectrogram_to_audio_file(
        spectrogram, fft_window_size=args.fft, phase_iterations=args.iter)

    sanity_check = audio_file_to_spectrogram(audio, fft_window_size=args.fft)
    outname = filename \
        + file_suffix("Output Spectrogram",
                      fft=args.fft,
                      iter=args.iter,
                      sample_rate=sample_rate) \
        + ".png"
    save_spectrogram(sanity_check, outname)

    outname = filename \
        + file_suffix("Output", fft=args.fft, iter=args.iter) \
        + ".wav", sample_rate
    save_audio_file(audio, outname)
Example #8
0
def handle_image(file_name, args, phase=None):
    console.h1("Reconstructing Audio from Spectrogram")

    spectrogram, sample_rate = load_spectrogram(file_name)
    audio = spectrogram_to_audio(spectrogram,
                                 fft_window_size=args.fft,
                                 phase_iterations=args.iter)

    sanity_check, phase = audio_to_spectrogram(audio,
                                               fft_window_size=args.fft,
                                               sr=sample_rate)
    save_spectrogram(
        sanity_check,
        file_name + file_suffix("Output Spectrogram",
                                fft=args.fft,
                                iter=args.iter,
                                sample_rate=sample_rate) + ".png")

    save_audio(
        audio, file_name +
        file_suffix("Output", fft=args.fft, iter=args.iter) + ".wav",
        sample_rate)
Example #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--train", default=None, type=str, help="path containing training data")
    parser.add_argument("--valid", default=None, type=str, help="path containing validation data")
    parser.add_argument("--epochs", default=10, type=int, help="number of epochs to train")
    parser.add_argument("--name", default=None, type=str, help="name of experiment")
    parser.add_argument(
        "--weights", default="weights.h5", type=str, help="h5 file to read/write weights to"
    )
    parser.add_argument("--batch_size", default=16, type=int, help="batch size for training")
    parser.add_argument(
        "--load", action="store_true", help="Load previous weights file before starting"
    )
    parser.add_argument("--test", default=None, type=str, help="Test file to infer on every epoch")
    parser.add_argument("files", nargs="*", default=[])

    args = parser.parse_args()
    post_processor = PostProcessor(args.name)

    if len(args.files) == 0 and args.train:
        console.h1("preparing to train on {}".format(args.train))
        if args.load:
            console.log("loading weights from {}".format(args.weights))
            post_processor.load_weights(args.weights)
        console.log("loading data")
        train = DataGenerator(args.train, args.batch_size)
        valid = DataGenerator(args.valid, args.batch_size) if args.valid else None
        console.h1("training")
        post_processor.train(train, args.epochs, validation_data=valid, test_file=args.test)
        post_processor.save_weights(args.weights)
    elif len(args.files) > 0:
        console.h1("preparing to process", args.files, "...")
        post_processor.load_weights(args.weights)
        for f in args.files:
            post_processor.denoise_from_file(f)
    else:
        console.error("please provide data to train on (--train) or files to process")
num_ok = 0
num_total = 0
db = pickle.load(open(Config.db_path, 'rb'))
for pid, j in db.items():
    pdfs = [x['href'] for x in j['links'] if x['type'] == 'application/pdf']
    assert len(pdfs) == 1
    pdf_url = pdfs[0] + '.pdf'
    pdf_url = pdf_url.replace("http:","https:") # ??
    basename = pdf_url.split('/')[-1]
    fname = os.path.join(Config.pdf_dir, basename)

    # try retrieve the pdf
    num_total += 1
    try:
        if not basename in have:
            console.log('fetching %s into %s' % (pdf_url, fname))
            req = urlopen(pdf_url, None, timeout_secs)
            with open(fname, 'wb') as fp:
                shutil.copyfileobj(req, fp)
            time.sleep(0.05 + random.uniform(0, 0.1))
        else:
            console.info('%s exists, skipping' % (fname, ))
            num_ok += 1
    except Exception as e:
        console.warn('error downloading: ', pdf_url)
        console.log(e)

    console.info('%d/%d of %d downloaded ok.' % (num_ok, num_total, len(db)))

console.h1('Final number of papers downloaded okay: %d/%d' % (num_ok, len(db)))
    files = sys.argv[1:]
    config_str = str(config)
    print(config_str)

    vocal_isolation = VocalIsolation(config)

    if len(files) == 0 and config.data:
        console.log("No files provided; attempting to train on " +
                    config.data + "...")
        if config.batch_generator.startswith("random") \
                and config.epoch_steps == 0:
            console.error("EPOCH_STEPS is not set,"
                          " but cannot be determined from data.")
            exit(1)
        if config.load:
            console.h1("Loading Weights")
            vocal_isolation.load_weights(config.weights)
        console.h1("Loading Data")
        data = Data()
        console.h1("Training Model")
        signal.signal(signal.SIGINT, get_signal_handler(vocal_isolation))
        vocal_isolation.run(data)
    elif len(files) > 0:
        console.log("Weights provided; performing inference on " + str(files) +
                    "...")
        console.h1("Loading weights")
        vocal_isolation.load_weights(config.weights)
        for f in files:
            vocal_isolation.infer(f, config.fft, config.phase_iterations,
                                  config.learn_phase, config.get_channels())
    else:
Example #12
0
                        mashup = np.maximum(acapella, instrumental)
                        # chop into slices so everything's the same size in a batch
                        dim = SLICE_SIZE
                        mashup_slices = chop(mashup, dim)
                        acapella_slices = chop(acapella, dim)
                        count += 1
                        self.x.extend(mashup_slices)
                        self.y.extend(acapella_slices)
                console.info("Created", count, "mashups for key", k, "with",
                             len(self.x), "total slices so far")
            # Add a "channels" channel to please the network
            self.x = np.array(self.x)[:, :, :, np.newaxis]
            self.y = np.array(self.y)[:, :, :, np.newaxis]
            # Save to file if asked
            if as_h5:
                h5f = h5py.File(h5_path, "w")
                h5f.create_dataset("x", data=self.x)
                h5f.create_dataset("y", data=self.y)
                h5f.close()


if __name__ == "__main__":
    # Simple testing code to use while developing
    console.h1("Loading Data")
    d = Data(sys.argv[1], 1536)
    console.h1("Writing Sample Data")
    conversion.save_spectrogram(d.x[0], "x_sample_0.png")
    conversion.save_spectrogram(d.y[0], "y_sample_0.png")
    audio = conversion.spectrogram_to_audio(d.x[0], 1536)
    conversion.save_audio(audio, "x_sample.wav", 22050)