def load_1(self, *raw_args): if self._proc != None: self.unload_1() args = self._load_parser.parse_args(map(str, raw_args)) python = args.python if not python: python = sys.executable worker_script = os.path.join(script_dir, "samplernn_worker.py") print_err("starting samplernn_worker process, this may take a while") worker_command = (str(python), worker_script, "--canvas_dir", self._canvas_dir, *args.worker_args) print_err("worker_command =", worker_command) self._proc = subprocess.Popen( worker_command, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE, cwd = self._canvas_dir ) self._stderr_printer = threading.Thread(target = self._keep_printing_stderr) self._stderr_printer.start() self._read_tag(protocol.OUT_TAG_INIT) self._outlet(1, ["worker", "on"])
def load_1(self, ckpt_dir, batch_size=1): if self._proc != None: self.unload_1() python = sys.executable worker_script = sopimagenta_path("gansynth_worker") ckpt_dir = os.path.join(self._canvas_dir, str(ckpt_dir)) print_err("starting gansynth_worker process, this may take a while") self._proc = subprocess.Popen( (python, worker_script, ckpt_dir, str(batch_size)), stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE ) self._stderr_printer = threading.Thread(target = self._keep_printing_stderr) self._stderr_printer.start() self._read_tag(protocol.OUT_TAG_INIT) info_msg = self._proc.stdout.read(protocol.init_struct.size) audio_length, sample_rate = protocol.from_info_msg(info_msg) print_err("gansynth_worker is ready") self._outlet(1, ["worker", "on", audio_length, sample_rate])
def unload_1(self): if self._proc: self._proc.terminate() self._proc = None self._stderr_printer = None else: print_err("no gansynth_worker process is running") self._outlet(1, ["worker", "off"])
def save_z_buf(z_name, path): z_buf = pyext.Buffer(z_name) z = from_z32(z_buf) base, ext = os.path.splitext(path) if not ext: ext = ".npy" path_fixed = base + ext print_err("save: " + path_fixed)
def handle_gen_audio(model, stdin, stdout, state): count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) pitches = [] zs = [] for i in range(count): gen_msg = read_msg(stdin, protocol.gen_audio_struct.size) pitch, z = protocol.from_gen_msg(gen_msg) pitches.append(pitch) zs.append(z) layer_offsets = {} if 'ganspace_component_amplitudes' in state: components = state['ganspace_components']['comp'] std_devs = state['ganspace_components']['stdev'] edits = state['ganspace_component_amplitudes'] amounts = np.zeros(components.shape[:1], dtype=np.float32) amounts[:len(list(map(float, edits)))] = edits * std_devs scaled_directions = amounts.reshape(-1, 1, 1, 1) * components linear_combination = np.sum(scaled_directions, axis=0) linear_combination_batch = np.repeat(linear_combination.reshape( 1, *linear_combination.shape), 8, axis=0) layer_offsets[state['ganspace_components'] ['layer']] = linear_combination_batch z_arr = np.array(zs) try: audios = model.generate_samples_from_z(z_arr, pitches, layer_offsets=layer_offsets) except KeyError as e: print_err( "can't synthesize - model was not trained on pitch {}".format( e.args[0])) audios = [] stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write(protocol.to_count_msg(len(audios))) for audio in audios: stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize)) stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def handle_load_ganspace_components(model, stdin, stdout, state): size_msg = read_msg(stdin, protocol.int_struct.size) size = protocol.from_int_msg(size_msg) msg = read_msg(stdin, size) file = msg.decode('utf-8') print_err("Opening components file '{}'".format(file)) with open(file, "rb") as fp: state['ganspace_components'] = pickle.load(fp) print_err("Components file loaded.") component_count = len(state['ganspace_components']["comp"]) state['ganspace_component_count'] = component_count stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_LOAD_COMPONENTS)) stdout.write(protocol.to_count_msg(component_count)) stdout.flush()
def load_1(self, python): if self._proc != None: self.unload_1() worker_script = os.path.join(script_dir, "ddsp_worker.py") print_err("starting ddsp_worker process, this may take a while") self._proc = subprocess.Popen((str(python), worker_script), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) self._stderr_printer = threading.Thread( target=self._keep_printing_stderr) self._stderr_printer.start() self._read_tag(protocol.OUT_TAG_INIT) self._outlet(1, ["worker", "on"])
def load_ganspace_components_1(self, ganspace_components_file): ganspace_components_file = os.path.join( self._canvas_dir, str(ganspace_components_file) ) print("Loading GANSpace components...", file=sys.stderr) size_msg = protocol.to_int_msg(len(ganspace_components_file)) components_msg = ganspace_components_file.encode('utf-8') self._write_msg(protocol.IN_TAG_LOAD_COMPONENTS, size_msg, components_msg) self._read_tag(protocol.OUT_TAG_LOAD_COMPONENTS) count_msg = self._proc.stdout.read(protocol.count_struct.size) self._component_count = protocol.from_count_msg(count_msg) print_err("_component_count =", self._component_count) buf = pyext.Buffer(self._edits_buf_name) #buf.resize(component_count) #buf.dirty() print_err("GANSpace components loaded!")
def handle_synthesize_noz(model, stdin, stdout, state): print_err("handle_synthesize_noz") count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) pitches = [] for i in range(count): gen_msg = read_msg(stdin, protocol.synthesize_noz_struct.size) pitch = protocol.from_synthesize_noz_msg(gen_msg) pitches.append(pitch) pca = state["ganspace_components"] stdevs = pca["stdev"] layer_dtype = stdevs.dtype edits = np.array(state["ganspace_component_amplitudes"], dtype=layer_dtype) layer = make_layer(pca, edits) layers = np.repeat([layer], len(pitches), axis=0) try: audios = model.generate_samples_from_layers({pca["layer"]: layers}, pitches) except KeyError as e: print_err( "can't synthesize - model was not trained on pitch {}".format( e.args[0])) audios = [] stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write(protocol.to_count_msg(len(audios))) for audio in audios: stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize)) stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def handle_hallucinate(model, stdin, stdout, state): max_note_length = model.config['audio_length'] sample_rate = model.config['sample_rate'] hallucinate_msg = read_msg(stdin, protocol.hallucinate_struct.size) args = protocol.from_hallucinate_msg(hallucinate_msg) note_count, interpolation_steps, spacing, start_trim, attack, sustain, release = args print_err( "note_count = {} interpolation_steps = {}, spacing = {}s, start_trim = {}s, attack = {}s, sustain = {}s, release = {}s" .format(*args)) initial_notes = model.generate_z(note_count) initial_piches = np.array( [32] * len(initial_notes) ) # np.floor(30 + np.random.rand(len(initial_notes)) * 30) final_notes, final_pitches = interpolate_notes(initial_notes, initial_piches, interpolation_steps) audios = synthesize(model, final_notes, final_pitches) final_audio = combine_notes(audios, spacing=spacing, start_trim=start_trim, attack=attack, sustain=sustain, release=release, max_note_length=max_note_length, sr=sample_rate) final_audio = final_audio.astype('float32') stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write( protocol.to_audio_size_msg(final_audio.size * final_audio.itemsize)) stdout.write(protocol.to_audio_msg(final_audio)) stdout.flush()
def hallucinate_noz_1(self, audio_buf_name): if not self._proc: raise Exception("can't hallucinate - load a checkpoint first") if not self._steps: raise Exception("can't hallucinate - no steps added") self._read_edits() step_count = len(self._steps) print_err("step_count =", step_count) print_err("steps =", self._steps) edit_count = len(self._steps[0]["edits"]) edit_list = [] for step in self._steps: for edit in step["edits"]: edit_list.append(edit) print_err("len(edit_list) =", len(edit_list)) self._write_msg( protocol.IN_TAG_HALLUCINATE_NOZ, protocol.to_hallucinate_msg( step_count, self._interp_steps, self._sample_spacing, self._start_trim, self._attack, self._sustain, self._release ), protocol.to_count_msg(edit_count), *map(protocol.to_f64_msg, edit_list) ) self._read_tag(protocol.OUT_TAG_AUDIO) audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size) audio_size = protocol.from_audio_size_msg(audio_size_msg) audio_msg = self._proc.stdout.read(audio_size) audio = protocol.from_audio_msg(audio_msg) audio_buf = pyext.Buffer(audio_buf_name) if len(audio_buf) != len(audio): audio_buf.resize(len(audio)) audio_buf[:] = audio audio_buf.dirty() self._outlet(1, ["hallucinated", len(audio)])
def handle_timbre_transfer(stdin, stdout): transfer_msg = read_msg(stdin, protocol.timbre_transfer_struct.size) h = protocol.from_timbre_transfer_msg(transfer_msg) print_err(repr(h)) in_sample_rate, out_sample_rate, f0_octave_shift, f0_confidence_threshold, loudness_db_shift, adjust, quiet, autotune, ckpt_dir_len, in_audio_len = h print_err("ckpt_dir_len =", ckpt_dir_len) print_err("in_audio_len =", in_audio_len) ckpt_dir_msg = stdin.read(ckpt_dir_len) ckpt_dir = protocol.from_str_msg(ckpt_dir_msg) print_err("ckpt_dir =", ckpt_dir) in_audio_msg = stdin.read(in_audio_len) print_err("len(in_audio_msg) =", len(in_audio_msg)) in_audio = protocol.from_audio_msg(in_audio_msg) print_err("in_audio.size =", in_audio.size) out_audio = timbre_transfer( ckpt_dir=ckpt_dir, audio=in_audio, in_sample_rate=in_sample_rate, out_sample_rate=out_sample_rate, f0_octave_shift=f0_octave_shift, f0_confidence_threshold=f0_confidence_threshold, loudness_db_shift=loudness_db_shift, adjust=adjust, quiet=quiet, autotune=autotune, log=print_err) out_audio = out_audio.numpy().ravel() out_audio_len = out_audio.size * out_audio.itemsize print_err("out_audio.shape =", out_audio.shape) print_err("out_audio_len =", out_audio_len) stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_TIMBRE_TRANSFERRED)) print_err("wrote tag_timbre_transferred") stdout.write(protocol.to_timbre_transferred_msg(out_audio_len)) print_err("wrote size") bytez = protocol.to_audio_msg(out_audio) print_err("len(bytez) =", len(bytez)) stdout.write(bytez) print_err("wrote out_audio") stdout.flush()
parser.add_argument("--sample_rate", type=int, default=16000) parser.add_argument("--f0_octave_shift", type=int, default=0) parser.add_argument("--f0_confidence_threshold", type=float, default=0.0) parser.add_argument("--loudness_db_shift", type=float, default=0.0) parser.add_argument("--adjust", dest="adjust", action="store_true") parser.add_argument("--no-adjust", dest="adjust", action="store_false") parser.set_defaults(adjust=True) parser.add_argument("--quiet", type=float, default=20.0) parser.add_argument("--autotune", type=float, default=0.0) args = parser.parse_args() audio, in_sample_rate = librosa.load(args.in_file, sr=None) out_audio = timbre_transfer( ckpt_dir=args.ckpt_dir, audio=audio, in_sample_rate=in_sample_rate, out_sample_rate=args.sample_rate, f0_octave_shift=args.f0_octave_shift, f0_confidence_threshold=args.f0_confidence_threshold, loudness_db_shift=args.loudness_db_shift, adjust=args.adjust, quiet=args.quiet, autotune=args.autotune, log=print_err) print_err("saving generated audio to {}".format(args.out_file)) scipy.io.wavfile.write(args.out_file, args.sample_rate, out_audio.numpy().T)
def _outlet(self, *args): print_err("_outlet{}".format(args))
def _anything_1(self, *args): print_err("unhandled input:", args)
def state_1(self): print_err("notes =", self.notes) print_err("t0 =", self.t0)
import time import magenta from magenta.models.melody_rnn import melody_rnn_config_flags, melody_rnn_model, melody_rnn_sequence_generator from magenta.models.shared.sequence_generator_bundle import read_bundle_file from magenta.music.protobuf import generator_pb2, music_pb2 import monotonic import pretty_midi from sopilib.utils import print_err try: import pyext ext_class = pyext._class except: print_err("failed to load pyext module") class ext_class(object): def _outlet(self, *args): print_err("_outlet{}".format(args)) test_notes = [(0.5, 32, 100), (0.6, 33, 50), (0.7, 33, 0), (0.8, 32, 0)] def notes_to_midi(notes, t0=0.0): midi = pretty_midi.PrettyMIDI() inst = pretty_midi.Instrument(0) # add notes to instrument # a note is added only after a note-on and a corresponding note-off
def handle_generate(state): stdin = state.stdin stdout = state.stdout model = state.model config = state.config num_seqs = state.num_seqs generate_msg = read_msg(stdin, protocol.generate_struct.size) seed_sr, out_sr, num_outs, dur, seed_len = protocol.from_generate_msg( generate_msg) print_err("seed_sr =", seed_sr) print_err("out_sr =", out_sr) print_err("num_outs =", num_outs) print_err("dur =", dur) print_err("seed_len =", seed_len) if seed_len > 0: seed_msg = read_msg(stdin, seed_len * protocol.f32_struct.size) seed_audio = protocol.from_audio_msg(seed_msg) else: seed_audio = np.array([], dtype=np.float32) print_err("seed_audio size*itemsize =", seed_audio.size * seed_audio.itemsize) temps = [] for i in range(num_outs): temp_len_msg = read_msg(stdin, protocol.size_struct.size) temp_len = protocol.from_size_msg(temp_len_msg) temp_str_msg = read_msg(stdin, temp_len) temp_str = protocol.from_str_msg(temp_str_msg) temp = gen.check_temperature(temp_str) temps.append(temp) print_err("temps =", temps) # out_audios = [np.random.uniform(0.0, 1.0, out_len).astype(np.float32)]*num_outs out_audios = list(generate(state, out_sr, dur, temps, seed_audio)) print_err("generated") stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_GENERATED)) stdout.write( protocol.to_generated_msg(out_sr, len(out_audios), out_audios[0].size)) for audio in out_audios: stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
from __future__ import print_function import os import sys import numpy as np import sopilib.ddsp_protocol as protocol from sopilib.utils import print_err, read_msg from handlers import handlers print_err("hello :)") # open standard input/output handles stdin = sys.stdin.buffer stdout = sys.stdout.buffer # write init message stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_INIT)) stdout.flush() print_err("it begins @_@") while True: in_tag_msg = read_msg(stdin, protocol.tag_struct.size) in_tag = protocol.from_tag_msg(in_tag_msg) if in_tag not in handlers:
def generate_1(self, *raw_args): if not self._proc: raise Exception("no samplernn_worker process is running") args = self._generate_parser.parse_args(map(str, raw_args)) print_err("args =", args) outs = args.out if args.out != None else [] temps = args.temp if args.temp != None else [] num_outs = len(outs) if num_outs < 1: print_err("no outputs specified") while len(temps) < num_outs: temps.append(temps[-1]) if args.seed != None: seed_buf = pyext.Buffer(args.seed) seed_audio = np.array(seed_buf, dtype=np.float32) else: seed_audio = np.array([], dtype=np.float32) print_err("seed_audio size*itemsize =", seed_audio.size * seed_audio.itemsize) seed_len = seed_audio.size * seed_audio.itemsize generate_msg = protocol.to_generate_msg(args.seed_sr, args.out_sr, num_outs, args.dur, seed_len) seed_audio_msg = protocol.to_audio_msg(seed_audio) temp_msgs = map(protocol.to_str_msg, temps) temp_msgs = map(lambda bs: (protocol.to_size_msg(len(bs)), bs), temp_msgs) temp_msgs = (x for pair in temp_msgs for x in pair) self._write_msg(protocol.IN_TAG_GENERATE, generate_msg, seed_audio_msg, *temp_msgs) print_err("wrote") self._read_tag(protocol.OUT_TAG_GENERATED) generated_msg = self._proc.stdout.read(protocol.generated_struct.size) g_out_sr, g_num_outs, g_out_len = protocol.from_generated_msg(generated_msg) print_err("g_out_sr =", g_out_sr) print_err("g_num_outs =", g_num_outs) print_err("g_out_len =", g_out_len) out_audios = [] for i in range(g_num_outs): out_audio_msg = self._proc.stdout.read(g_out_len * protocol.f32_struct.size) out_audios.append(protocol.from_audio_msg(out_audio_msg)) print_err("len(out_audios) =", len(out_audios)) assert len(outs) == len(out_audios) for buf_name, audio in zip(outs, out_audios): buf = pyext.Buffer(buf_name) if len(buf) != len(audio): buf.resize(len(audio)) buf[:] = audio buf.dirty() self._outlet(1, ["generated", buf_name, g_out_sr, g_out_len])
def synthesize_noz_1(self, *args): if not self._proc: raise Exception("can't synthesize - no gansynth_worker process is running") arg_count = len(args) if arg_count == 0 or arg_count % 2 != 0: raise ValueError("invalid number of arguments ({}), should be a multiple of 2: synthesize_noz audio1 pitch1 [audio2 pitch2 ...]".format(arg_count)) component_buff = pyext.Buffer(self._edits_buf_name) components = np.array(component_buff, dtype=np.float64) component_msgs = [] for value in components: component_msgs.append(protocol.to_float_msg(value)) for i in range(self._component_count - len(components)): component_msgs.append(protocol.to_float_msg(0.0)) self._write_msg(protocol.IN_TAG_SET_COMPONENT_AMPLITUDES, *component_msgs) gen_msgs = [] audio_buf_names = [] for i in range(0, arg_count, 2): audio_buf_name, pitch = args[i:i+2] gen_msgs.append(protocol.to_synthesize_noz_msg(pitch)) audio_buf_names.append(audio_buf_name) in_count = len(gen_msgs) in_count_msg = protocol.to_count_msg(in_count) self._write_msg(protocol.IN_TAG_SYNTHESIZE_NOZ, in_count_msg, *gen_msgs) self._read_tag(protocol.OUT_TAG_AUDIO) out_count_msg = self._proc.stdout.read(protocol.count_struct.size) out_count = protocol.from_count_msg(out_count_msg) print_err("out_count =", out_count) if out_count == 0: print_err("no audio was synthesized!") return assert out_count == in_count for audio_buf_name in audio_buf_names: audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size) audio_size = protocol.from_audio_size_msg(audio_size_msg) audio_msg = self._proc.stdout.read(audio_size) audio_note = protocol.from_audio_msg(audio_msg) audio_buf = pyext.Buffer(audio_buf_name) if len(audio_buf) != len(audio_note): audio_buf.resize(len(audio_note)) audio_buf[:] = audio_note audio_buf.dirty() self._outlet(1, "synthesized")
args = parser.parse_args() ckpt_path = gen.find_checkpoint_path(args.ckpt_dir) config = gen.find_config(ckpt_path, args.config) max_ckpt = None for fn in os.listdir(args.ckpt_dir): m = re.match(r"^(model\.ckpt-(\d+))\.index$", fn) if m: num = int(m.group(2)) if max_ckpt == None or max_ckpt[1] < num: max_ckpt = (m.group(1), num) if max_ckpt == None: print_err("no model.ckpt-#.index files found in checkpoint dir") sys.exit(1) ckpt_path = os.path.join(args.ckpt_dir, max_ckpt[0]) print_err("ckpt_path:", ckpt_path) model = gen.create_inference_model(ckpt_path, args.num_seqs, config) print_err("hello :)") # open standard input/output handles stdin = sys.stdin.buffer stdout = sys.stdout.buffer
def timbre_transfer(ckpt_dir, audio, in_sample_rate, out_sample_rate, f0_octave_shift, f0_confidence_threshold, loudness_db_shift, log=print): print_err("args:", (ckpt_dir, "<audio>", in_sample_rate, out_sample_rate, f0_octave_shift, f0_confidence_threshold, loudness_db_shift)) log("converting audio...") start_time = time.time() audio = librosa.to_mono(audio) audio = librosa.resample(audio, in_sample_rate, out_sample_rate) audio = audio[np.newaxis, :] duration = time.time() - start_time log("done - {:.1f} s".format(duration)) # Setup the session. ddsp.spectral_ops.reset_crepe() # Compute features. log("computing audio features...") start_time = time.time() audio_features = ddsp.training.eval_util.compute_audio_features(audio) audio_features['loudness_db'] = audio_features['loudness_db'].astype( np.float32) audio_features_mod = None duration = time.time() - start_time log("done - {:.1f} s".format(duration)) model_dir = ckpt_dir gin_file = os.path.join(model_dir, 'operative_config-0.gin') # Parse gin config, with gin.unlock_config(): gin.parse_config_file(gin_file, skip_unknown=True) # Assumes only one checkpoint in the folder, 'ckpt-[iter]`. ckpt_files = [f for f in tf.io.gfile.listdir(model_dir) if 'ckpt' in f] ckpt_name = ckpt_files[0].split('.')[0] ckpt = os.path.join(model_dir, ckpt_name) # Ensure dimensions and sampling rates are equal time_steps_train = gin.query_parameter('DefaultPreprocessor.time_steps') n_samples_train = gin.query_parameter('Additive.n_samples') hop_size = int(n_samples_train / time_steps_train) time_steps = int(audio.shape[1] / hop_size) n_samples = time_steps * hop_size gin_params = [ 'Additive.n_samples = {}'.format(n_samples), 'FilteredNoise.n_samples = {}'.format(n_samples), 'DefaultPreprocessor.time_steps = {}'.format(time_steps), ] with gin.unlock_config(): gin.parse_config(gin_params) # Trim all input vectors to correct lengths for key in ['f0_hz', 'f0_confidence', 'loudness_db']: audio_features[key] = audio_features[key][:time_steps] audio_features['audio'] = audio_features['audio'][:, :n_samples] # Set up the model just to predict audio given new conditioning log("restoring model...") start_time = time.time() model = ddsp.training.models.Autoencoder() model.restore(ckpt) # Build model by running a batch through it. _ = model(audio_features, training=False) duration = time.time() - start_time log("done - {:.1f} s".format(duration)) audio_features_mod = {k: v.copy() for k, v in audio_features.items()} audio_features_mod = shift_ld(audio_features_mod, loudness_db_shift) audio_features_mod = shift_f0(audio_features_mod, f0_octave_shift) audio_features_mod = mask_by_confidence(audio_features_mod, f0_confidence_threshold) af = audio_features if audio_features_mod is None else audio_features_mod # Run a batch of predictions. log("predicting...") start_time = time.time() audio_gen = model(af, training=False) duration = time.time() - start_time log("done - {:.1f} s".format(duration)) return audio_gen
def run_1(self, gin_file, in_arr, out_arr, f0_octave_shift=0, f0_confidence_threshold=0.0, loudness_db_shift=0.0, in_sample_rate=44100, out_sample_rate=16000, adjust=True, quiet=20.0, autotune=0.0): if not self._proc: raise Exception("no ddsp_worker process is running") # get buffers in_buf = pyext.Buffer(in_arr) out_buf = pyext.Buffer(out_arr) in_audio = np.array(in_buf, dtype=np.float32) print_err("in_audio.size =", in_audio.size) print_err("in_audio.itemsize =", in_audio.itemsize) # make timbre transfer message gin_path = os.path.join(script_dir, str(gin_file)) ckpt_dir = os.path.dirname(gin_path) ckpt_msg = protocol.to_str_msg(ckpt_dir) print_err("len(ckpt_msg) = ", len(ckpt_msg)) transfer_msg = protocol.to_timbre_transfer_msg( in_sample_rate, out_sample_rate, f0_octave_shift, f0_confidence_threshold, loudness_db_shift, bool(adjust), quiet, autotune, len(ckpt_msg), in_audio.size * in_audio.itemsize) print_err("len(transfer_msg) = ", len(transfer_msg)) in_audio_msg = protocol.to_audio_msg(in_audio) print_err("len(in_audio_msg) = ", len(in_audio_msg)) # write timbre transfer message self._write_msg(protocol.IN_TAG_TIMBRE_TRANSFER, transfer_msg, ckpt_msg, in_audio_msg) print_err("wrote") # read timbre transferred message self._read_tag(protocol.OUT_TAG_TIMBRE_TRANSFERRED) print_err("read") transferred_msg = self._proc.stdout.read( protocol.timbre_transferred_struct.size) print_err("len(transferred_msg) =", len(transferred_msg)) out_audio_len = protocol.from_timbre_transferred_msg(transferred_msg) print_err("out_audio_len =", out_audio_len) out_audio_msg = self._proc.stdout.read(out_audio_len) print_err("len(out_audio_msg)", len(out_audio_msg)) out_audio = protocol.from_audio_msg(out_audio_msg) # resize output buffer if needed if len(out_audio) != len(out_buf): print_err("resizing") out_buf.resize(len(out_audio)) print_err("resized") else: print_err("no resize") # write output out_buf[:] = normalize(out_audio) print_err("wrote out_audio") out_buf.dirty() self._outlet(1, ["transferred", len(out_audio)])
from magenta.models.gansynth.lib import flags as lib_flags from magenta.models.gansynth.lib import generate_util as gu from magenta.models.gansynth.lib import model as lib_model from magenta.models.gansynth.lib import util import tensorflow as tf import sopilib.gansynth_protocol as gss from sopilib.utils import print_err, read_msg from handlers import handlers try: ckpt_dir = sys.argv[1] batch_size = int(sys.argv[2]) except IndexError: print_err("usage: {} checkpoint_dir batch_size".format( os.path.basename(__file__))) sys.exit(1) flags = lib_flags.Flags({"batch_size_schedule": [batch_size]}) model = lib_model.Model.load_from_path(ckpt_dir, flags) stdin = os.fdopen(sys.stdin.fileno(), "rb", 0) stdout = os.fdopen(sys.stdout.fileno(), "wb", 0) stdout.write(gss.to_tag_msg(gss.OUT_TAG_INIT)) audio_length = model.config['audio_length'] sample_rate = model.config['sample_rate'] info_msg = gss.to_info_msg(audio_length=audio_length, sample_rate=sample_rate) stdout.write(info_msg) stdout.flush()