def slerp_z_1(self, z0_name, z1_name, z_dst_name, amount): if not self._proc: raise Exception("can't slerp - no gansynth_worker process is running") z0_buf = pyext.Buffer(z0_name) z1_buf = pyext.Buffer(z1_name) z_dst_buf = pyext.Buffer(z_dst_name) z0_f64 = np.array(z0_buf, dtype=np.float64) z1_f64 = np.array(z1_buf, dtype=np.float64) self._write_msg(protocol.IN_TAG_SLERP_Z, protocol.to_slerp_z_msg(z0_f64, z1_f64, amount)) self._read_tag(protocol.OUT_TAG_Z) out_count_msg = self._proc.stdout.read(protocol.count_struct.size) out_count = protocol.from_count_msg(out_count_msg) assert out_count == 1 z_msg = self._proc.stdout.read(protocol.z_struct.size) z = protocol.from_z_msg(z_msg) z32 = z.astype(np.float32) if len(z_dst_buf) != len(z32): z_dst_buf.resize(len(z32)) z_dst_buf[:] = z32 z_dst_buf.dirty() self._outlet(1, "slerped")
def randomize_z_1(self, *buf_names): if not self._proc: raise Exception("can't randomize z - no gansynth_worker process is running") in_count = len(buf_names) if in_count == 0: raise ValueError("no buffer name(s) specified") in_count_msg = protocol.to_count_msg(in_count) self._write_msg(protocol.IN_TAG_RAND_Z, in_count_msg) self._read_tag(protocol.OUT_TAG_Z) out_count_msg = self._proc.stdout.read(protocol.count_struct.size) out_count = protocol.from_count_msg(out_count_msg) assert out_count == in_count for buf_name in buf_names: z_msg = self._proc.stdout.read(protocol.z_struct.size) z = protocol.from_z_msg(z_msg) z32 = z.astype(np.float32) buf = pyext.Buffer(buf_name) if len(buf) != len(z32): buf.resize(len(z32)) buf[:] = z32 buf.dirty() self._outlet(1, "randomized")
def handle_gen_audio(model, stdin, stdout, state): count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) pitches = [] zs = [] for i in range(count): gen_msg = read_msg(stdin, protocol.gen_audio_struct.size) pitch, z = protocol.from_gen_msg(gen_msg) pitches.append(pitch) zs.append(z) layer_offsets = {} if 'ganspace_component_amplitudes' in state: components = state['ganspace_components']['comp'] std_devs = state['ganspace_components']['stdev'] edits = state['ganspace_component_amplitudes'] amounts = np.zeros(components.shape[:1], dtype=np.float32) amounts[:len(list(map(float, edits)))] = edits * std_devs scaled_directions = amounts.reshape(-1, 1, 1, 1) * components linear_combination = np.sum(scaled_directions, axis=0) linear_combination_batch = np.repeat(linear_combination.reshape( 1, *linear_combination.shape), 8, axis=0) layer_offsets[state['ganspace_components'] ['layer']] = linear_combination_batch z_arr = np.array(zs) try: audios = model.generate_samples_from_z(z_arr, pitches, layer_offsets=layer_offsets) except KeyError as e: print_err( "can't synthesize - model was not trained on pitch {}".format( e.args[0])) audios = [] stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write(protocol.to_count_msg(len(audios))) for audio in audios: stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize)) stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def handle_hallucinate_noz(model, stdin, stdout, state): max_note_length = model.config['audio_length'] sample_rate = model.config['sample_rate'] pca = state["ganspace_components"] stdevs = pca["stdev"] layer_dtype = stdevs.dtype hallucinate_msg = read_msg(stdin, protocol.hallucinate_struct.size) step_count, interpolation_steps, spacing, start_trim, attack, sustain, release = protocol.from_hallucinate_msg( hallucinate_msg) edit_count_msg = read_msg(stdin, protocol.count_struct.size) edit_count = protocol.from_count_msg(edit_count_msg) pitch = min(model.pitch_counts.keys()) steps = [] for i in range(step_count): edits = [] for j in range(edit_count): edit_msg = read_msg(stdin, protocol.f64_struct.size) edit = protocol.from_f64_msg(edit_msg) edits.append(edit) steps.append(np.array(edits, dtype=layer_dtype)) steps = list(interpolate_edits(steps, interpolation_steps)) layer_steps = np.array(list( map(lambda edits: make_layer(pca, edits), steps)), dtype=layer_dtype) pitch_steps = np.repeat([pitch], len(steps)) audios = model.generate_samples_from_layers({pca["layer"]: layer_steps}, pitch_steps) final_audio = combine_notes(audios, spacing=spacing, start_trim=start_trim, attack=attack, sustain=sustain, release=release, max_note_length=max_note_length, sr=sample_rate) final_audio = final_audio.astype("float32") stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write( protocol.to_audio_size_msg(final_audio.size * final_audio.itemsize)) stdout.write(protocol.to_audio_msg(final_audio)) stdout.flush()
def synthesize_1(self, *args): if not self._proc: raise Exception("can't synthesize - no gansynth_worker process is running") arg_count = len(args) if arg_count == 0 or arg_count % 3 != 0: raise ValueError("invalid number of arguments ({}), should be a multiple of 3: synthesize z1 audio1 pitch1 [z2 audio2 pitch2 ...]".format(arg_count)) gen_audio_msgs = [] audio_buf_names = [] for i in range(0, arg_count, 3): z_buf_name, audio_buf_name, pitch = args[i:i+3] z32_buf = pyext.Buffer(z_buf_name) z = np.array(z32_buf, dtype=np.float64) gen_audio_msgs.append(protocol.to_gen_audio_msg(pitch, z)) audio_buf_names.append(audio_buf_name) in_count = len(gen_audio_msgs) in_count_msg = protocol.to_count_msg(in_count) self._write_msg(protocol.IN_TAG_GEN_AUDIO, in_count_msg, *gen_audio_msgs) self._read_tag(protocol.OUT_TAG_AUDIO) out_count_msg = self._proc.stdout.read(protocol.count_struct.size) out_count = protocol.from_count_msg(out_count_msg) if out_count == 0: return assert out_count == in_count for audio_buf_name in audio_buf_names: audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size) audio_size = protocol.from_audio_size_msg(audio_size_msg) audio_msg = self._proc.stdout.read(audio_size) audio_note = protocol.from_audio_msg(audio_msg) audio_buf = pyext.Buffer(audio_buf_name) if len(audio_buf) != len(audio_note): audio_buf.resize(len(audio_note)) audio_buf[:] = audio_note audio_buf.dirty() self._outlet(1, "synthesized")
def handle_rand_z(model, stdin, stdout, state): """ Generates a given number of new Z coordinates. """ count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) zs = model.generate_z(count) stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_Z)) stdout.write(protocol.to_count_msg(len(zs))) for z in zs: stdout.write(protocol.to_z_msg(z)) stdout.flush()
def load_ganspace_components_1(self, ganspace_components_file): ganspace_components_file = os.path.join( self._canvas_dir, str(ganspace_components_file) ) print("Loading GANSpace components...", file=sys.stderr) size_msg = protocol.to_int_msg(len(ganspace_components_file)) components_msg = ganspace_components_file.encode('utf-8') self._write_msg(protocol.IN_TAG_LOAD_COMPONENTS, size_msg, components_msg) self._read_tag(protocol.OUT_TAG_LOAD_COMPONENTS) count_msg = self._proc.stdout.read(protocol.count_struct.size) self._component_count = protocol.from_count_msg(count_msg) print_err("_component_count =", self._component_count) buf = pyext.Buffer(self._edits_buf_name) #buf.resize(component_count) #buf.dirty() print_err("GANSpace components loaded!")
def handle_synthesize_noz(model, stdin, stdout, state): print_err("handle_synthesize_noz") count_msg = read_msg(stdin, protocol.count_struct.size) count = protocol.from_count_msg(count_msg) pitches = [] for i in range(count): gen_msg = read_msg(stdin, protocol.synthesize_noz_struct.size) pitch = protocol.from_synthesize_noz_msg(gen_msg) pitches.append(pitch) pca = state["ganspace_components"] stdevs = pca["stdev"] layer_dtype = stdevs.dtype edits = np.array(state["ganspace_component_amplitudes"], dtype=layer_dtype) layer = make_layer(pca, edits) layers = np.repeat([layer], len(pitches), axis=0) try: audios = model.generate_samples_from_layers({pca["layer"]: layers}, pitches) except KeyError as e: print_err( "can't synthesize - model was not trained on pitch {}".format( e.args[0])) audios = [] stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO)) stdout.write(protocol.to_count_msg(len(audios))) for audio in audios: stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize)) stdout.write(protocol.to_audio_msg(audio)) stdout.flush()
def synthesize_noz_1(self, *args): if not self._proc: raise Exception("can't synthesize - no gansynth_worker process is running") arg_count = len(args) if arg_count == 0 or arg_count % 2 != 0: raise ValueError("invalid number of arguments ({}), should be a multiple of 2: synthesize_noz audio1 pitch1 [audio2 pitch2 ...]".format(arg_count)) component_buff = pyext.Buffer(self._edits_buf_name) components = np.array(component_buff, dtype=np.float64) component_msgs = [] for value in components: component_msgs.append(protocol.to_float_msg(value)) for i in range(self._component_count - len(components)): component_msgs.append(protocol.to_float_msg(0.0)) self._write_msg(protocol.IN_TAG_SET_COMPONENT_AMPLITUDES, *component_msgs) gen_msgs = [] audio_buf_names = [] for i in range(0, arg_count, 2): audio_buf_name, pitch = args[i:i+2] gen_msgs.append(protocol.to_synthesize_noz_msg(pitch)) audio_buf_names.append(audio_buf_name) in_count = len(gen_msgs) in_count_msg = protocol.to_count_msg(in_count) self._write_msg(protocol.IN_TAG_SYNTHESIZE_NOZ, in_count_msg, *gen_msgs) self._read_tag(protocol.OUT_TAG_AUDIO) out_count_msg = self._proc.stdout.read(protocol.count_struct.size) out_count = protocol.from_count_msg(out_count_msg) print_err("out_count =", out_count) if out_count == 0: print_err("no audio was synthesized!") return assert out_count == in_count for audio_buf_name in audio_buf_names: audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size) audio_size = protocol.from_audio_size_msg(audio_size_msg) audio_msg = self._proc.stdout.read(audio_size) audio_note = protocol.from_audio_msg(audio_msg) audio_buf = pyext.Buffer(audio_buf_name) if len(audio_buf) != len(audio_note): audio_buf.resize(len(audio_note)) audio_buf[:] = audio_note audio_buf.dirty() self._outlet(1, "synthesized")