def handle_hallucinate_noz(model, stdin, stdout, state):
    max_note_length = model.config['audio_length']
    sample_rate = model.config['sample_rate']

    pca = state["ganspace_components"]
    stdevs = pca["stdev"]
    layer_dtype = stdevs.dtype

    hallucinate_msg = read_msg(stdin, protocol.hallucinate_struct.size)
    step_count, interpolation_steps, spacing, start_trim, attack, sustain, release = protocol.from_hallucinate_msg(
        hallucinate_msg)

    edit_count_msg = read_msg(stdin, protocol.count_struct.size)
    edit_count = protocol.from_count_msg(edit_count_msg)

    pitch = min(model.pitch_counts.keys())

    steps = []
    for i in range(step_count):
        edits = []
        for j in range(edit_count):
            edit_msg = read_msg(stdin, protocol.f64_struct.size)
            edit = protocol.from_f64_msg(edit_msg)

            edits.append(edit)

        steps.append(np.array(edits, dtype=layer_dtype))

    steps = list(interpolate_edits(steps, interpolation_steps))

    layer_steps = np.array(list(
        map(lambda edits: make_layer(pca, edits), steps)),
                           dtype=layer_dtype)
    pitch_steps = np.repeat([pitch], len(steps))

    audios = model.generate_samples_from_layers({pca["layer"]: layer_steps},
                                                pitch_steps)

    final_audio = combine_notes(audios,
                                spacing=spacing,
                                start_trim=start_trim,
                                attack=attack,
                                sustain=sustain,
                                release=release,
                                max_note_length=max_note_length,
                                sr=sample_rate)
    final_audio = final_audio.astype("float32")

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO))
    stdout.write(
        protocol.to_audio_size_msg(final_audio.size * final_audio.itemsize))
    stdout.write(protocol.to_audio_msg(final_audio))
    stdout.flush()
Exemple #2
0
def handle_gen_audio(model, stdin, stdout, state):
    count_msg = read_msg(stdin, protocol.count_struct.size)
    count = protocol.from_count_msg(count_msg)

    pitches = []
    zs = []
    for i in range(count):
        gen_msg = read_msg(stdin, protocol.gen_audio_struct.size)

        pitch, z = protocol.from_gen_msg(gen_msg)

        pitches.append(pitch)
        zs.append(z)

    layer_offsets = {}
    if 'ganspace_component_amplitudes' in state:
        components = state['ganspace_components']['comp']
        std_devs = state['ganspace_components']['stdev']
        edits = state['ganspace_component_amplitudes']

        amounts = np.zeros(components.shape[:1], dtype=np.float32)
        amounts[:len(list(map(float, edits)))] = edits * std_devs

        scaled_directions = amounts.reshape(-1, 1, 1, 1) * components

        linear_combination = np.sum(scaled_directions, axis=0)
        linear_combination_batch = np.repeat(linear_combination.reshape(
            1, *linear_combination.shape),
                                             8,
                                             axis=0)

        layer_offsets[state['ganspace_components']
                      ['layer']] = linear_combination_batch

    z_arr = np.array(zs)
    try:
        audios = model.generate_samples_from_z(z_arr,
                                               pitches,
                                               layer_offsets=layer_offsets)
    except KeyError as e:
        print_err(
            "can't synthesize - model was not trained on pitch {}".format(
                e.args[0]))
        audios = []

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO))
    stdout.write(protocol.to_count_msg(len(audios)))

    for audio in audios:
        stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize))
        stdout.write(protocol.to_audio_msg(audio))

    stdout.flush()
Exemple #3
0
def handle_generate(state):
    stdin = state.stdin
    stdout = state.stdout
    model = state.model
    config = state.config
    num_seqs = state.num_seqs

    generate_msg = read_msg(stdin, protocol.generate_struct.size)
    seed_sr, out_sr, num_outs, dur, seed_len = protocol.from_generate_msg(
        generate_msg)

    print_err("seed_sr =", seed_sr)
    print_err("out_sr =", out_sr)
    print_err("num_outs =", num_outs)
    print_err("dur =", dur)
    print_err("seed_len =", seed_len)

    if seed_len > 0:
        seed_msg = read_msg(stdin, seed_len * protocol.f32_struct.size)
        seed_audio = protocol.from_audio_msg(seed_msg)
    else:
        seed_audio = np.array([], dtype=np.float32)

    print_err("seed_audio size*itemsize =",
              seed_audio.size * seed_audio.itemsize)

    temps = []
    for i in range(num_outs):
        temp_len_msg = read_msg(stdin, protocol.size_struct.size)
        temp_len = protocol.from_size_msg(temp_len_msg)

        temp_str_msg = read_msg(stdin, temp_len)
        temp_str = protocol.from_str_msg(temp_str_msg)

        temp = gen.check_temperature(temp_str)

        temps.append(temp)

    print_err("temps =", temps)

    # out_audios = [np.random.uniform(0.0, 1.0, out_len).astype(np.float32)]*num_outs
    out_audios = list(generate(state, out_sr, dur, temps, seed_audio))

    print_err("generated")

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_GENERATED))
    stdout.write(
        protocol.to_generated_msg(out_sr, len(out_audios), out_audios[0].size))
    for audio in out_audios:
        stdout.write(protocol.to_audio_msg(audio))
    stdout.flush()
Exemple #4
0
def handle_load_ganspace_components(model, stdin, stdout, state):
    size_msg = read_msg(stdin, protocol.int_struct.size)
    size = protocol.from_int_msg(size_msg)
    msg = read_msg(stdin, size)
    file = msg.decode('utf-8')
    print_err("Opening components file '{}'".format(file))
    with open(file, "rb") as fp:
        state['ganspace_components'] = pickle.load(fp)
    print_err("Components file loaded.")

    component_count = len(state['ganspace_components']["comp"])
    state['ganspace_component_count'] = component_count
    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_LOAD_COMPONENTS))
    stdout.write(protocol.to_count_msg(component_count))
    stdout.flush()
Exemple #5
0
def handle_set_component_amplitudes(model, stdin, stdout, state):
    amplitudes = []
    for i in range(0, state['ganspace_component_count']):
        msg = read_msg(stdin, protocol.f64_struct.size)
        value = protocol.from_float_msg(msg)
        amplitudes.append(value)
    state['ganspace_component_amplitudes'] = amplitudes
Exemple #6
0
def handle_slerp_z(model, stdin, stdout, state):
    slerp_z_msg = read_msg(stdin, protocol.slerp_z_struct.size)
    z0, z1, amount = protocol.from_slerp_z_msg(slerp_z_msg)

    z = gu.slerp(z0, z1, amount)

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_Z))
    stdout.write(protocol.to_count_msg(1))
    stdout.write(protocol.to_z_msg(z))

    stdout.flush()
Exemple #7
0
def handle_synthesize_noz(model, stdin, stdout, state):
    print_err("handle_synthesize_noz")

    count_msg = read_msg(stdin, protocol.count_struct.size)
    count = protocol.from_count_msg(count_msg)

    pitches = []
    for i in range(count):
        gen_msg = read_msg(stdin, protocol.synthesize_noz_struct.size)

        pitch = protocol.from_synthesize_noz_msg(gen_msg)

        pitches.append(pitch)

    pca = state["ganspace_components"]
    stdevs = pca["stdev"]
    layer_dtype = stdevs.dtype
    edits = np.array(state["ganspace_component_amplitudes"], dtype=layer_dtype)

    layer = make_layer(pca, edits)
    layers = np.repeat([layer], len(pitches), axis=0)

    try:
        audios = model.generate_samples_from_layers({pca["layer"]: layers},
                                                    pitches)
    except KeyError as e:
        print_err(
            "can't synthesize - model was not trained on pitch {}".format(
                e.args[0]))
        audios = []

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO))
    stdout.write(protocol.to_count_msg(len(audios)))

    for audio in audios:
        stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize))
        stdout.write(protocol.to_audio_msg(audio))

    stdout.flush()
Exemple #8
0
def handle_rand_z(model, stdin, stdout, state):
    """
        Generates a given number of new Z coordinates.
    """
    count_msg = read_msg(stdin, protocol.count_struct.size)
    count = protocol.from_count_msg(count_msg)

    zs = model.generate_z(count)

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_Z))
    stdout.write(protocol.to_count_msg(len(zs)))

    for z in zs:
        stdout.write(protocol.to_z_msg(z))

    stdout.flush()
def handle_timbre_transfer(stdin, stdout):
    transfer_msg = read_msg(stdin, protocol.timbre_transfer_struct.size)
    h = protocol.from_timbre_transfer_msg(transfer_msg)
    print_err(repr(h))
    in_sample_rate, out_sample_rate, f0_octave_shift, f0_confidence_threshold, loudness_db_shift, adjust, quiet, autotune, ckpt_dir_len, in_audio_len = h

    print_err("ckpt_dir_len =", ckpt_dir_len)
    print_err("in_audio_len =", in_audio_len)
    ckpt_dir_msg = stdin.read(ckpt_dir_len)
    ckpt_dir = protocol.from_str_msg(ckpt_dir_msg)
    print_err("ckpt_dir =", ckpt_dir)

    in_audio_msg = stdin.read(in_audio_len)
    print_err("len(in_audio_msg) =", len(in_audio_msg))
    in_audio = protocol.from_audio_msg(in_audio_msg)
    print_err("in_audio.size =", in_audio.size)

    out_audio = timbre_transfer(
        ckpt_dir=ckpt_dir,
        audio=in_audio,
        in_sample_rate=in_sample_rate,
        out_sample_rate=out_sample_rate,
        f0_octave_shift=f0_octave_shift,
        f0_confidence_threshold=f0_confidence_threshold,
        loudness_db_shift=loudness_db_shift,
        adjust=adjust,
        quiet=quiet,
        autotune=autotune,
        log=print_err)
    out_audio = out_audio.numpy().ravel()

    out_audio_len = out_audio.size * out_audio.itemsize

    print_err("out_audio.shape =", out_audio.shape)
    print_err("out_audio_len =", out_audio_len)

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_TIMBRE_TRANSFERRED))
    print_err("wrote tag_timbre_transferred")

    stdout.write(protocol.to_timbre_transferred_msg(out_audio_len))
    print_err("wrote size")
    bytez = protocol.to_audio_msg(out_audio)
    print_err("len(bytez) =", len(bytez))
    stdout.write(bytez)
    print_err("wrote out_audio")
    stdout.flush()
def handle_hallucinate(model, stdin, stdout, state):
    max_note_length = model.config['audio_length']
    sample_rate = model.config['sample_rate']

    hallucinate_msg = read_msg(stdin, protocol.hallucinate_struct.size)
    args = protocol.from_hallucinate_msg(hallucinate_msg)
    note_count, interpolation_steps, spacing, start_trim, attack, sustain, release = args

    print_err(
        "note_count = {} interpolation_steps = {}, spacing = {}s, start_trim = {}s, attack = {}s, sustain = {}s, release = {}s"
        .format(*args))

    initial_notes = model.generate_z(note_count)
    initial_piches = np.array(
        [32] * len(initial_notes)
    )  # np.floor(30 + np.random.rand(len(initial_notes)) * 30)
    final_notes, final_pitches = interpolate_notes(initial_notes,
                                                   initial_piches,
                                                   interpolation_steps)

    audios = synthesize(model, final_notes, final_pitches)
    final_audio = combine_notes(audios,
                                spacing=spacing,
                                start_trim=start_trim,
                                attack=attack,
                                sustain=sustain,
                                release=release,
                                max_note_length=max_note_length,
                                sr=sample_rate)

    final_audio = final_audio.astype('float32')

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO))
    stdout.write(
        protocol.to_audio_size_msg(final_audio.size * final_audio.itemsize))
    stdout.write(protocol.to_audio_msg(final_audio))
    stdout.flush()
Exemple #11
0
    ckpt_dir = sys.argv[1]
    batch_size = int(sys.argv[2])
except IndexError:
    print_err("usage: {} checkpoint_dir batch_size".format(
        os.path.basename(__file__)))
    sys.exit(1)

flags = lib_flags.Flags({"batch_size_schedule": [batch_size]})
model = lib_model.Model.load_from_path(ckpt_dir, flags)

stdin = os.fdopen(sys.stdin.fileno(), "rb", 0)
stdout = os.fdopen(sys.stdout.fileno(), "wb", 0)
stdout.write(gss.to_tag_msg(gss.OUT_TAG_INIT))

audio_length = model.config['audio_length']
sample_rate = model.config['sample_rate']
info_msg = gss.to_info_msg(audio_length=audio_length, sample_rate=sample_rate)
stdout.write(info_msg)
stdout.flush()

state = {}

while True:
    in_tag_msg = read_msg(stdin, gss.tag_struct.size)
    in_tag = gss.from_tag_msg(in_tag_msg)

    if in_tag not in handlers:
        raise ValueError("unknown input message tag: {}".format(in_tag))

    handlers[in_tag](model, stdin, stdout, state)
model = gen.create_inference_model(ckpt_path, args.num_seqs, config)

print_err("hello :)")

# open standard input/output handles

stdin = sys.stdin.buffer
stdout = sys.stdout.buffer

# write init message

stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_INIT))
stdout.flush()

print_err("it begins @_@")

state = SimpleNamespace(stdin=stdin,
                        stdout=stdout,
                        config=config,
                        num_seqs=args.num_seqs,
                        model=model)

while True:
    in_tag_msg = read_msg(stdin, protocol.tag_struct.size)
    in_tag = protocol.from_tag_msg(in_tag_msg)

    if in_tag not in handlers:
        raise ValueError("unknown input message tag: {}".format(in_tag))

    handlers[in_tag](state)