def handle_hallucinate_noz(model, stdin, stdout, state): max_note_length = model.config['audio_length'] sample_rate = model.config['sample_rate'] pca = state["ganspace_components"] stdevs = pca["stdev"] layer_dtype = stdevs.dtype hallucinate_msg = read_msg(stdin, protocol.hallucinate_struct.size) step_count, interpolation_steps, spacing, start_trim, attack, sustain, release = protocol.from_hallucinate_msg( hallucinate_msg) edit_count_msg = read_msg(stdin, protocol.count_struct.size) edit_count = protocol.from_count_msg(edit_count_msg) pitch = min(model.pitch_counts.keys()) steps = [] for i in range(step_count): edits = [] for j in range(edit_count): edit_msg = read_msg(stdin, protocol.f64_struct.size) edit = protocol.from_f64_msg(edit_msg) edits.append(edit) steps.append(np.array(edits, dtype=layer_dtype)) steps = list(interpolate_edits(steps, interpolation_steps)) layer_steps = np.array(list( map(lambda edits: make_layer(pca, edits), steps)), dtype=layer_dtype) pitch_steps = np.repeat([pitch], len(steps)) audios = model.generate_samples_from_layers({pca["layer"]: layer_steps}, pitch_steps) final_audio = combine_notes(audios, spacing=spacing, start_trim=start_trim, attack=attack, sustain=sustain, release=release, max_note_length=max_note_length, sr=sample_rate) final_audio = final_audio.astype("float32") stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write( protocol.to_audio_size_msg(final_audio.size * final_audio.itemsize)) stdout.write(protocol.to_audio_msg(final_audio)) stdout.flush()
def handle_gen_audio(model, stdin, stdout, state): count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) pitches = [] zs = [] for i in range(count): gen_msg = read_msg(stdin, protocol.gen_audio_struct.size) pitch, z = protocol.from_gen_msg(gen_msg) pitches.append(pitch) zs.append(z) layer_offsets = {} if 'ganspace_component_amplitudes' in state: components = state['ganspace_components']['comp'] std_devs = state['ganspace_components']['stdev'] edits = state['ganspace_component_amplitudes'] amounts = np.zeros(components.shape[:1], dtype=np.float32) amounts[:len(list(map(float, edits)))] = edits * std_devs scaled_directions = amounts.reshape(-1, 1, 1, 1) * components linear_combination = np.sum(scaled_directions, axis=0) linear_combination_batch = np.repeat(linear_combination.reshape( 1, *linear_combination.shape), 8, axis=0) layer_offsets[state['ganspace_components'] ['layer']] = linear_combination_batch z_arr = np.array(zs) try: audios = model.generate_samples_from_z(z_arr, pitches, layer_offsets=layer_offsets) except KeyError as e: print_err( "can't synthesize - model was not trained on pitch {}".format( e.args[0])) audios = [] stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write(protocol.to_count_msg(len(audios))) for audio in audios: stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize)) stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def handle_generate(state): stdin = state.stdin stdout = state.stdout model = state.model config = state.config num_seqs = state.num_seqs generate_msg = read_msg(stdin, protocol.generate_struct.size) seed_sr, out_sr, num_outs, dur, seed_len = protocol.from_generate_msg( generate_msg) print_err("seed_sr =", seed_sr) print_err("out_sr =", out_sr) print_err("num_outs =", num_outs) print_err("dur =", dur) print_err("seed_len =", seed_len) if seed_len > 0: seed_msg = read_msg(stdin, seed_len * protocol.f32_struct.size) seed_audio = protocol.from_audio_msg(seed_msg) else: seed_audio = np.array([], dtype=np.float32) print_err("seed_audio size*itemsize =", seed_audio.size * seed_audio.itemsize) temps = [] for i in range(num_outs): temp_len_msg = read_msg(stdin, protocol.size_struct.size) temp_len = protocol.from_size_msg(temp_len_msg) temp_str_msg = read_msg(stdin, temp_len) temp_str = protocol.from_str_msg(temp_str_msg) temp = gen.check_temperature(temp_str) temps.append(temp) print_err("temps =", temps) # out_audios = [np.random.uniform(0.0, 1.0, out_len).astype(np.float32)]*num_outs out_audios = list(generate(state, out_sr, dur, temps, seed_audio)) print_err("generated") stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_GENERATED)) stdout.write( protocol.to_generated_msg(out_sr, len(out_audios), out_audios[0].size)) for audio in out_audios: stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def handle_load_ganspace_components(model, stdin, stdout, state): size_msg = read_msg(stdin, protocol.int_struct.size) size = protocol.from_int_msg(size_msg) msg = read_msg(stdin, size) file = msg.decode('utf-8') print_err("Opening components file '{}'".format(file)) with open(file, "rb") as fp: state['ganspace_components'] = pickle.load(fp) print_err("Components file loaded.") component_count = len(state['ganspace_components']["comp"]) state['ganspace_component_count'] = component_count stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_LOAD_COMPONENTS)) stdout.write(protocol.to_count_msg(component_count)) stdout.flush()
def handle_set_component_amplitudes(model, stdin, stdout, state): amplitudes = [] for i in range(0, state['ganspace_component_count']): msg = read_msg(stdin, protocol.f64_struct.size) value = protocol.from_float_msg(msg) amplitudes.append(value) state['ganspace_component_amplitudes'] = amplitudes
def handle_slerp_z(model, stdin, stdout, state): slerp_z_msg = read_msg(stdin, protocol.slerp_z_struct.size) z0, z1, amount = protocol.from_slerp_z_msg(slerp_z_msg) z = gu.slerp(z0, z1, amount) stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_Z)) stdout.write(protocol.to_count_msg(1)) stdout.write(protocol.to_z_msg(z)) stdout.flush()
def handle_synthesize_noz(model, stdin, stdout, state): print_err("handle_synthesize_noz") count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) pitches = [] for i in range(count): gen_msg = read_msg(stdin, protocol.synthesize_noz_struct.size) pitch = protocol.from_synthesize_noz_msg(gen_msg) pitches.append(pitch) pca = state["ganspace_components"] stdevs = pca["stdev"] layer_dtype = stdevs.dtype edits = np.array(state["ganspace_component_amplitudes"], dtype=layer_dtype) layer = make_layer(pca, edits) layers = np.repeat([layer], len(pitches), axis=0) try: audios = model.generate_samples_from_layers({pca["layer"]: layers}, pitches) except KeyError as e: print_err( "can't synthesize - model was not trained on pitch {}".format( e.args[0])) audios = [] stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write(protocol.to_count_msg(len(audios))) for audio in audios: stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize)) stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def handle_rand_z(model, stdin, stdout, state): """ Generates a given number of new Z coordinates. """ count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) zs = model.generate_z(count) stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_Z)) stdout.write(protocol.to_count_msg(len(zs))) for z in zs: stdout.write(protocol.to_z_msg(z)) stdout.flush()
def handle_timbre_transfer(stdin, stdout): transfer_msg = read_msg(stdin, protocol.timbre_transfer_struct.size) h = protocol.from_timbre_transfer_msg(transfer_msg) print_err(repr(h)) in_sample_rate, out_sample_rate, f0_octave_shift, f0_confidence_threshold, loudness_db_shift, adjust, quiet, autotune, ckpt_dir_len, in_audio_len = h print_err("ckpt_dir_len =", ckpt_dir_len) print_err("in_audio_len =", in_audio_len) ckpt_dir_msg = stdin.read(ckpt_dir_len) ckpt_dir = protocol.from_str_msg(ckpt_dir_msg) print_err("ckpt_dir =", ckpt_dir) in_audio_msg = stdin.read(in_audio_len) print_err("len(in_audio_msg) =", len(in_audio_msg)) in_audio = protocol.from_audio_msg(in_audio_msg) print_err("in_audio.size =", in_audio.size) out_audio = timbre_transfer( ckpt_dir=ckpt_dir, audio=in_audio, in_sample_rate=in_sample_rate, out_sample_rate=out_sample_rate, f0_octave_shift=f0_octave_shift, f0_confidence_threshold=f0_confidence_threshold, loudness_db_shift=loudness_db_shift, adjust=adjust, quiet=quiet, autotune=autotune, log=print_err) out_audio = out_audio.numpy().ravel() out_audio_len = out_audio.size * out_audio.itemsize print_err("out_audio.shape =", out_audio.shape) print_err("out_audio_len =", out_audio_len) stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_TIMBRE_TRANSFERRED)) print_err("wrote tag_timbre_transferred") stdout.write(protocol.to_timbre_transferred_msg(out_audio_len)) print_err("wrote size") bytez = protocol.to_audio_msg(out_audio) print_err("len(bytez) =", len(bytez)) stdout.write(bytez) print_err("wrote out_audio") stdout.flush()
def handle_hallucinate(model, stdin, stdout, state): max_note_length = model.config['audio_length'] sample_rate = model.config['sample_rate'] hallucinate_msg = read_msg(stdin, protocol.hallucinate_struct.size) args = protocol.from_hallucinate_msg(hallucinate_msg) note_count, interpolation_steps, spacing, start_trim, attack, sustain, release = args print_err( "note_count = {} interpolation_steps = {}, spacing = {}s, start_trim = {}s, attack = {}s, sustain = {}s, release = {}s" .format(*args)) initial_notes = model.generate_z(note_count) initial_piches = np.array( [32] * len(initial_notes) ) # np.floor(30 + np.random.rand(len(initial_notes)) * 30) final_notes, final_pitches = interpolate_notes(initial_notes, initial_piches, interpolation_steps) audios = synthesize(model, final_notes, final_pitches) final_audio = combine_notes(audios, spacing=spacing, start_trim=start_trim, attack=attack, sustain=sustain, release=release, max_note_length=max_note_length, sr=sample_rate) final_audio = final_audio.astype('float32') stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write( protocol.to_audio_size_msg(final_audio.size * final_audio.itemsize)) stdout.write(protocol.to_audio_msg(final_audio)) stdout.flush()
ckpt_dir = sys.argv[1] batch_size = int(sys.argv[2]) except IndexError: print_err("usage: {} checkpoint_dir batch_size".format( os.path.basename(__file__))) sys.exit(1) flags = lib_flags.Flags({"batch_size_schedule": [batch_size]}) model = lib_model.Model.load_from_path(ckpt_dir, flags) stdin = os.fdopen(sys.stdin.fileno(), "rb", 0) stdout = os.fdopen(sys.stdout.fileno(), "wb", 0) stdout.write(gss.to_tag_msg(gss.OUT_TAG_INIT)) audio_length = model.config['audio_length'] sample_rate = model.config['sample_rate'] info_msg = gss.to_info_msg(audio_length=audio_length, sample_rate=sample_rate) stdout.write(info_msg) stdout.flush() state = {} while True: in_tag_msg = read_msg(stdin, gss.tag_struct.size) in_tag = gss.from_tag_msg(in_tag_msg) if in_tag not in handlers: raise ValueError("unknown input message tag: {}".format(in_tag)) handlers[in_tag](model, stdin, stdout, state)
model = gen.create_inference_model(ckpt_path, args.num_seqs, config) print_err("hello :)") # open standard input/output handles stdin = sys.stdin.buffer stdout = sys.stdout.buffer # write init message stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_INIT)) stdout.flush() print_err("it begins @_@") state = SimpleNamespace(stdin=stdin, stdout=stdout, config=config, num_seqs=args.num_seqs, model=model) while True: in_tag_msg = read_msg(stdin, protocol.tag_struct.size) in_tag = protocol.from_tag_msg(in_tag_msg) if in_tag not in handlers: raise ValueError("unknown input message tag: {}".format(in_tag)) handlers[in_tag](state)