def main(_): # REVIEW josephz: This paradigm was copied from inference-hack.py # initialize_globals() sample_dir = "sample" # sample_names = ["new_test"] sample_names = ["rolling_in_the_deep"] post_processor = PostProcessor() post_processor.load_weights("weights.h5") # sample_names = ["perfect_features"] # sample_names = ["rolling_in_the_one_more_time"] for sample_name in sample_names: console.h1("Processing %s" % sample_name) console.time("total processing for " + sample_name) sample_path = sample_dir + "/" + sample_name style_path = sample_path + "/style.mp3" content_path = sample_path + "/content.mp3" stylized_img_path = sample_path + "/stylized.png" stylized_img_raw_path = sample_path + "/stylized_raw.png" stylized_audio_path = sample_path + "/stylized.mp3" stylized_audio_raw_path = sample_path + "/stylized_raw.mp3" # Read style audio to spectrograms. style_audio, style_sample_rate = conversion.file_to_audio(style_path) style_img, style_phase = conversion.audio_to_spectrogram( style_audio, fft_window_size=1536) # Read content audio to spectrograms. content_audio, content_sample_rate = conversion.file_to_audio( content_path) content_img, content_phase = conversion.audio_to_spectrogram( content_audio, fft_window_size=1536) stylized_img_raw, stylized_img = stylize(content_img, style_img, content_phase, style_phase, content_path, style_path, post_processor) # Save raw stylized spectrogram and audio. stylized_audio_raw = conversion.amplitude_to_audio( stylized_img_raw, fft_window_size=1536, phase_iterations=15, phase=content_phase) conversion.image_to_file(stylized_img_raw, stylized_img_raw_path) conversion.audio_to_file(stylized_audio_raw, stylized_audio_raw_path) # Save post-processed stylized spectrogram and audio. stylized_audio = conversion.amplitude_to_audio(stylized_img, fft_window_size=1536, phase_iterations=15, phase=content_phase) # np.save("stylized_img.npy", stylized_img) # np.save("content_phase.npy", content_phase) conversion.image_to_file(stylized_img, stylized_img_path) conversion.audio_to_file(stylized_audio, stylized_audio_path) console.timeEnd("total processing for " + sample_name) console.info("Finished processing %s; saved to %s" % (sample_name, stylized_audio_path))
def main(_): initialize_globals() # scrape data from folder RAW_DATA_DIR = "../data/studio_acapellas" PROCESSED_DATA_DIR = "../data/processed" # for each one, generate the data using sst methods, and save the data for file_name in os.listdir(RAW_DATA_DIR): file_path = os.path.join(RAW_DATA_DIR, file_name) if file_path.endswith(".mp3"): processed_file_name = file_name.replace("mp3", "npy") # haha # todo: rewrite all this using pathlib processed_file_path_x = PROCESSED_DATA_DIR + "/x/" + processed_file_name processed_file_path_y = PROCESSED_DATA_DIR + "/y/" + processed_file_name console.h1("Processing", file_path) processed_file_path_style = PROCESSED_DATA_DIR + "/style/" + processed_file_name x_arr, y_arr, style_arr = generate_data_arrs(file_path) # for debugging just save as images console.stats(x_arr, "x_arr") console.stats(y_arr, "y_arr") console.stats(style_arr, "style_arr") #ipdb.set_trace() io.imsave(processed_file_path_x + ".jpg", x_arr / x_arr.max()) io.imsave(processed_file_path_y + ".jpg", y_arr / y_arr.max()) np.save(processed_file_path_x, x_arr) np.save(processed_file_path_y, y_arr) np.save(processed_file_path_style, style_arr) else: console.info("Skipping", file_path)
def compute_nnf_multiscale(content_features, style_features, iterations=16): console.h1(iterations, "Starting multi-scale pass") factors = [8, 4, 2, 1] assert factors[-1] == 1 # make sure output nnf is right size iterations_per_scale = max(iterations // len(factors), 1) nnf = None num_features, num_content_timesteps = content_features.shape num_features, num_style_timesteps = style_features.shape for d, downscale_factor in enumerate(factors): content_shape_downscaled = (num_features, num_content_timesteps // downscale_factor) if nnf is not None: # make the nnf into an image-shaped thingy and then undo it after performing scaling old_style_max = num_style_timesteps // factors[d - 1] new_style_max = num_style_timesteps // downscale_factor # since offsets are in absolute coordinates for now # (sigh) you need to scale the nnf values # each time you resize the nnf # for the *relative* nnf vectors to be preserved nnf = (resize(nnf[:, np.newaxis, np.newaxis] / old_style_max, (num_content_timesteps // downscale_factor, 1))[:, 0, 0] * new_style_max).astype(np.int64) content_features_downscaled = resize(content_features, content_shape_downscaled) style_features_downscaled = resize( style_features, (num_features, num_style_timesteps // downscale_factor)) # console.debug("shape of downscaled content features is", content_features_downscaled.shape) nnf = compute_nnf(content_features_downscaled, style_features_downscaled, iterations_per_scale, seed_nnf=nnf) return nnf
def handleImage(fileName, args, phase=None): console.h1("Reconstructing Audio from Spectrogram") spectrogram, sampleRate = loadSpectrogram(fileName) audio = spectrogramToAudioFile(spectrogram, fftWindowSize=args.fft, phaseIterations=args.iter) sanityCheck, phase = audioFileToSpectrogram(audio, fftWindowSize=args.fft) saveSpectrogram(sanityCheck, fileName + fileSuffix("Output Spectrogram", fft=args.fft, iter=args.iter, sampleRate=sampleRate) + ".png") saveAudioFile(audio, fileName + fileSuffix("Output", fft=args.fft, iter=args.iter) + ".wav", sampleRate)
def handleAudio(filePath, args): console.h1("Creating Spectrogram") INPUT_FILE = filePath INPUT_FILENAME = basename(INPUT_FILE) console.info("Attempting to read from " + INPUT_FILE) audio, sampleRate = loadAudioFile(INPUT_FILE) console.info("Max of audio file is " + str(np.max(audio))) spectrogram, phase = audioFileToSpectrogram(audio, fftWindowSize=args.fft) SPECTROGRAM_FILENAME = INPUT_FILENAME + fileSuffix("Input Spectrogram", fft=args.fft, iter=args.iter, sampleRate=sampleRate) + ".png" saveSpectrogram(spectrogram, SPECTROGRAM_FILENAME) print() console.wait("Saved Spectrogram; press Enter to continue...") print() handleImage(SPECTROGRAM_FILENAME, args, phase)
def handle_audio(file_path, args): console.h1("Creating Spectrogram") input_filename = basename(file_path) console.info("Attempting to read from " + file_path) audio, sample_rate = load_audio_file(file_path) console.info("Max of audio file is " + str(np.max(audio))) spectrogram = audio_file_to_spectrogram(audio, fft_window_size=args.fft) spectrogram_filename = input_filename + \ file_suffix("Input Spectrogram", fft=args.fft, iter=args.iter, sample_rate=sample_rate) + ".png" save_spectrogram(spectrogram, spectrogram_filename) print() console.wait("Saved Spectrogram; press Enter to continue...") print() handle_image(spectrogram_filename, args)
def handle_image(filename, args): console.h1("Reconstructing Audio from Spectrogram") spectrogram, sample_rate = load_spectrogram(filename) audio = spectrogram_to_audio_file( spectrogram, fft_window_size=args.fft, phase_iterations=args.iter) sanity_check = audio_file_to_spectrogram(audio, fft_window_size=args.fft) outname = filename \ + file_suffix("Output Spectrogram", fft=args.fft, iter=args.iter, sample_rate=sample_rate) \ + ".png" save_spectrogram(sanity_check, outname) outname = filename \ + file_suffix("Output", fft=args.fft, iter=args.iter) \ + ".wav", sample_rate save_audio_file(audio, outname)
def handle_image(file_name, args, phase=None): console.h1("Reconstructing Audio from Spectrogram") spectrogram, sample_rate = load_spectrogram(file_name) audio = spectrogram_to_audio(spectrogram, fft_window_size=args.fft, phase_iterations=args.iter) sanity_check, phase = audio_to_spectrogram(audio, fft_window_size=args.fft, sr=sample_rate) save_spectrogram( sanity_check, file_name + file_suffix("Output Spectrogram", fft=args.fft, iter=args.iter, sample_rate=sample_rate) + ".png") save_audio( audio, file_name + file_suffix("Output", fft=args.fft, iter=args.iter) + ".wav", sample_rate)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--train", default=None, type=str, help="path containing training data") parser.add_argument("--valid", default=None, type=str, help="path containing validation data") parser.add_argument("--epochs", default=10, type=int, help="number of epochs to train") parser.add_argument("--name", default=None, type=str, help="name of experiment") parser.add_argument( "--weights", default="weights.h5", type=str, help="h5 file to read/write weights to" ) parser.add_argument("--batch_size", default=16, type=int, help="batch size for training") parser.add_argument( "--load", action="store_true", help="Load previous weights file before starting" ) parser.add_argument("--test", default=None, type=str, help="Test file to infer on every epoch") parser.add_argument("files", nargs="*", default=[]) args = parser.parse_args() post_processor = PostProcessor(args.name) if len(args.files) == 0 and args.train: console.h1("preparing to train on {}".format(args.train)) if args.load: console.log("loading weights from {}".format(args.weights)) post_processor.load_weights(args.weights) console.log("loading data") train = DataGenerator(args.train, args.batch_size) valid = DataGenerator(args.valid, args.batch_size) if args.valid else None console.h1("training") post_processor.train(train, args.epochs, validation_data=valid, test_file=args.test) post_processor.save_weights(args.weights) elif len(args.files) > 0: console.h1("preparing to process", args.files, "...") post_processor.load_weights(args.weights) for f in args.files: post_processor.denoise_from_file(f) else: console.error("please provide data to train on (--train) or files to process")
num_ok = 0 num_total = 0 db = pickle.load(open(Config.db_path, 'rb')) for pid, j in db.items(): pdfs = [x['href'] for x in j['links'] if x['type'] == 'application/pdf'] assert len(pdfs) == 1 pdf_url = pdfs[0] + '.pdf' pdf_url = pdf_url.replace("http:","https:") # ?? basename = pdf_url.split('/')[-1] fname = os.path.join(Config.pdf_dir, basename) # try retrieve the pdf num_total += 1 try: if not basename in have: console.log('fetching %s into %s' % (pdf_url, fname)) req = urlopen(pdf_url, None, timeout_secs) with open(fname, 'wb') as fp: shutil.copyfileobj(req, fp) time.sleep(0.05 + random.uniform(0, 0.1)) else: console.info('%s exists, skipping' % (fname, )) num_ok += 1 except Exception as e: console.warn('error downloading: ', pdf_url) console.log(e) console.info('%d/%d of %d downloaded ok.' % (num_ok, num_total, len(db))) console.h1('Final number of papers downloaded okay: %d/%d' % (num_ok, len(db)))
files = sys.argv[1:] config_str = str(config) print(config_str) vocal_isolation = VocalIsolation(config) if len(files) == 0 and config.data: console.log("No files provided; attempting to train on " + config.data + "...") if config.batch_generator.startswith("random") \ and config.epoch_steps == 0: console.error("EPOCH_STEPS is not set," " but cannot be determined from data.") exit(1) if config.load: console.h1("Loading Weights") vocal_isolation.load_weights(config.weights) console.h1("Loading Data") data = Data() console.h1("Training Model") signal.signal(signal.SIGINT, get_signal_handler(vocal_isolation)) vocal_isolation.run(data) elif len(files) > 0: console.log("Weights provided; performing inference on " + str(files) + "...") console.h1("Loading weights") vocal_isolation.load_weights(config.weights) for f in files: vocal_isolation.infer(f, config.fft, config.phase_iterations, config.learn_phase, config.get_channels()) else:
mashup = np.maximum(acapella, instrumental) # chop into slices so everything's the same size in a batch dim = SLICE_SIZE mashup_slices = chop(mashup, dim) acapella_slices = chop(acapella, dim) count += 1 self.x.extend(mashup_slices) self.y.extend(acapella_slices) console.info("Created", count, "mashups for key", k, "with", len(self.x), "total slices so far") # Add a "channels" channel to please the network self.x = np.array(self.x)[:, :, :, np.newaxis] self.y = np.array(self.y)[:, :, :, np.newaxis] # Save to file if asked if as_h5: h5f = h5py.File(h5_path, "w") h5f.create_dataset("x", data=self.x) h5f.create_dataset("y", data=self.y) h5f.close() if __name__ == "__main__": # Simple testing code to use while developing console.h1("Loading Data") d = Data(sys.argv[1], 1536) console.h1("Writing Sample Data") conversion.save_spectrogram(d.x[0], "x_sample_0.png") conversion.save_spectrogram(d.y[0], "y_sample_0.png") audio = conversion.spectrogram_to_audio(d.x[0], 1536) conversion.save_audio(audio, "x_sample.wav", 22050)