def randomize_z_1(self, *buf_names): if not self._proc: raise Exception("can't randomize z - no gansynth_worker process is running") in_count = len(buf_names) if in_count == 0: raise ValueError("no buffer name(s) specified") in_count_msg = protocol.to_count_msg(in_count) self._write_msg(protocol.IN_TAG_RAND_Z, in_count_msg) self._read_tag(protocol.OUT_TAG_Z) out_count_msg = self._proc.stdout.read(protocol.count_struct.size) out_count = protocol.from_count_msg(out_count_msg) assert out_count == in_count for buf_name in buf_names: z_msg = self._proc.stdout.read(protocol.z_struct.size) z = protocol.from_z_msg(z_msg) z32 = z.astype(np.float32) buf = pyext.Buffer(buf_name) if len(buf) != len(z32): buf.resize(len(z32)) buf[:] = z32 buf.dirty() self._outlet(1, "randomized")
def hallucinate_noz_1(self, audio_buf_name): if not self._proc: raise Exception("can't hallucinate - load a checkpoint first") if not self._steps: raise Exception("can't hallucinate - no steps added") self._read_edits() step_count = len(self._steps) print_err("step_count =", step_count) print_err("steps =", self._steps) edit_count = len(self._steps[0]["edits"]) edit_list = [] for step in self._steps: for edit in step["edits"]: edit_list.append(edit) print_err("len(edit_list) =", len(edit_list)) self._write_msg( protocol.IN_TAG_HALLUCINATE_NOZ, protocol.to_hallucinate_msg( step_count, self._interp_steps, self._sample_spacing, self._start_trim, self._attack, self._sustain, self._release ), protocol.to_count_msg(edit_count), *map(protocol.to_f64_msg, edit_list) ) self._read_tag(protocol.OUT_TAG_AUDIO) audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size) audio_size = protocol.from_audio_size_msg(audio_size_msg) audio_msg = self._proc.stdout.read(audio_size) audio = protocol.from_audio_msg(audio_msg) audio_buf = pyext.Buffer(audio_buf_name) if len(audio_buf) != len(audio): audio_buf.resize(len(audio)) audio_buf[:] = audio audio_buf.dirty() self._outlet(1, ["hallucinated", len(audio)])
def handle_slerp_z(model, stdin, stdout, state): slerp_z_msg = read_msg(stdin, protocol.slerp_z_struct.size) z0, z1, amount = protocol.from_slerp_z_msg(slerp_z_msg) z = gu.slerp(z0, z1, amount) stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_Z)) stdout.write(protocol.to_count_msg(1)) stdout.write(protocol.to_z_msg(z)) stdout.flush()
def handle_gen_audio(model, stdin, stdout, state): count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) pitches = [] zs = [] for i in range(count): gen_msg = read_msg(stdin, protocol.gen_audio_struct.size) pitch, z = protocol.from_gen_msg(gen_msg) pitches.append(pitch) zs.append(z) layer_offsets = {} if 'ganspace_component_amplitudes' in state: components = state['ganspace_components']['comp'] std_devs = state['ganspace_components']['stdev'] edits = state['ganspace_component_amplitudes'] amounts = np.zeros(components.shape[:1], dtype=np.float32) amounts[:len(list(map(float, edits)))] = edits * std_devs scaled_directions = amounts.reshape(-1, 1, 1, 1) * components linear_combination = np.sum(scaled_directions, axis=0) linear_combination_batch = np.repeat(linear_combination.reshape( 1, *linear_combination.shape), 8, axis=0) layer_offsets[state['ganspace_components'] ['layer']] = linear_combination_batch z_arr = np.array(zs) try: audios = model.generate_samples_from_z(z_arr, pitches, layer_offsets=layer_offsets) except KeyError as e: print_err( "can't synthesize - model was not trained on pitch {}".format( e.args[0])) audios = [] stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write(protocol.to_count_msg(len(audios))) for audio in audios: stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize)) stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def synthesize_1(self, *args): if not self._proc: raise Exception("can't synthesize - no gansynth_worker process is running") arg_count = len(args) if arg_count == 0 or arg_count % 3 != 0: raise ValueError("invalid number of arguments ({}), should be a multiple of 3: synthesize z1 audio1 pitch1 [z2 audio2 pitch2 ...]".format(arg_count)) gen_audio_msgs = [] audio_buf_names = [] for i in range(0, arg_count, 3): z_buf_name, audio_buf_name, pitch = args[i:i+3] z32_buf = pyext.Buffer(z_buf_name) z = np.array(z32_buf, dtype=np.float64) gen_audio_msgs.append(protocol.to_gen_audio_msg(pitch, z)) audio_buf_names.append(audio_buf_name) in_count = len(gen_audio_msgs) in_count_msg = protocol.to_count_msg(in_count) self._write_msg(protocol.IN_TAG_GEN_AUDIO, in_count_msg, *gen_audio_msgs) self._read_tag(protocol.OUT_TAG_AUDIO) out_count_msg = self._proc.stdout.read(protocol.count_struct.size) out_count = protocol.from_count_msg(out_count_msg) if out_count == 0: return assert out_count == in_count for audio_buf_name in audio_buf_names: audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size) audio_size = protocol.from_audio_size_msg(audio_size_msg) audio_msg = self._proc.stdout.read(audio_size) audio_note = protocol.from_audio_msg(audio_msg) audio_buf = pyext.Buffer(audio_buf_name) if len(audio_buf) != len(audio_note): audio_buf.resize(len(audio_note)) audio_buf[:] = audio_note audio_buf.dirty() self._outlet(1, "synthesized")
def handle_load_ganspace_components(model, stdin, stdout, state): size_msg = read_msg(stdin, protocol.int_struct.size) size = protocol.from_int_msg(size_msg) msg = read_msg(stdin, size) file = msg.decode('utf-8') print_err("Opening components file '{}'".format(file)) with open(file, "rb") as fp: state['ganspace_components'] = pickle.load(fp) print_err("Components file loaded.") component_count = len(state['ganspace_components']["comp"]) state['ganspace_component_count'] = component_count stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_LOAD_COMPONENTS)) stdout.write(protocol.to_count_msg(component_count)) stdout.flush()
def handle_rand_z(model, stdin, stdout, state): """ Generates a given number of new Z coordinates. """ count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) zs = model.generate_z(count) stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_Z)) stdout.write(protocol.to_count_msg(len(zs))) for z in zs: stdout.write(protocol.to_z_msg(z)) stdout.flush()
def handle_synthesize_noz(model, stdin, stdout, state): print_err("handle_synthesize_noz") count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) pitches = [] for i in range(count): gen_msg = read_msg(stdin, protocol.synthesize_noz_struct.size) pitch = protocol.from_synthesize_noz_msg(gen_msg) pitches.append(pitch) pca = state["ganspace_components"] stdevs = pca["stdev"] layer_dtype = stdevs.dtype edits = np.array(state["ganspace_component_amplitudes"], dtype=layer_dtype) layer = make_layer(pca, edits) layers = np.repeat([layer], len(pitches), axis=0) try: audios = model.generate_samples_from_layers({pca["layer"]: layers}, pitches) except KeyError as e: print_err( "can't synthesize - model was not trained on pitch {}".format( e.args[0])) audios = [] stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write(protocol.to_count_msg(len(audios))) for audio in audios: stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize)) stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def synthesize_noz_1(self, *args): if not self._proc: raise Exception("can't synthesize - no gansynth_worker process is running") arg_count = len(args) if arg_count == 0 or arg_count % 2 != 0: raise ValueError("invalid number of arguments ({}), should be a multiple of 2: synthesize_noz audio1 pitch1 [audio2 pitch2 ...]".format(arg_count)) component_buff = pyext.Buffer(self._edits_buf_name) components = np.array(component_buff, dtype=np.float64) component_msgs = [] for value in components: component_msgs.append(protocol.to_float_msg(value)) for i in range(self._component_count - len(components)): component_msgs.append(protocol.to_float_msg(0.0)) self._write_msg(protocol.IN_TAG_SET_COMPONENT_AMPLITUDES, *component_msgs) gen_msgs = [] audio_buf_names = [] for i in range(0, arg_count, 2): audio_buf_name, pitch = args[i:i+2] gen_msgs.append(protocol.to_synthesize_noz_msg(pitch)) audio_buf_names.append(audio_buf_name) in_count = len(gen_msgs) in_count_msg = protocol.to_count_msg(in_count) self._write_msg(protocol.IN_TAG_SYNTHESIZE_NOZ, in_count_msg, *gen_msgs) self._read_tag(protocol.OUT_TAG_AUDIO) out_count_msg = self._proc.stdout.read(protocol.count_struct.size) out_count = protocol.from_count_msg(out_count_msg) print_err("out_count =", out_count) if out_count == 0: print_err("no audio was synthesized!") return assert out_count == in_count for audio_buf_name in audio_buf_names: audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size) audio_size = protocol.from_audio_size_msg(audio_size_msg) audio_msg = self._proc.stdout.read(audio_size) audio_note = protocol.from_audio_msg(audio_msg) audio_buf = pyext.Buffer(audio_buf_name) if len(audio_buf) != len(audio_note): audio_buf.resize(len(audio_note)) audio_buf[:] = audio_note audio_buf.dirty() self._outlet(1, "synthesized")