Esempio n. 1
0
    def load_1(self, *raw_args):
        if self._proc != None:
            self.unload_1()

        args = self._load_parser.parse_args(map(str, raw_args))

        python = args.python
        if not python:
            python = sys.executable
            
        worker_script = os.path.join(script_dir, "samplernn_worker.py")
        
        print_err("starting samplernn_worker process, this may take a while")
        worker_command = (str(python), worker_script, "--canvas_dir", self._canvas_dir, *args.worker_args)
        print_err("worker_command =", worker_command)
        
        self._proc = subprocess.Popen(
            worker_command,
            stdin = subprocess.PIPE,
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE,
            cwd = self._canvas_dir
        )
        self._stderr_printer = threading.Thread(target = self._keep_printing_stderr)
        self._stderr_printer.start()
        
        self._read_tag(protocol.OUT_TAG_INIT)
        
        self._outlet(1, ["worker", "on"])
Esempio n. 2
0
    def load_1(self, ckpt_dir, batch_size=1):
        if self._proc != None:
            self.unload_1()
            
        python = sys.executable
        worker_script = sopimagenta_path("gansynth_worker")
        ckpt_dir = os.path.join(self._canvas_dir, str(ckpt_dir))

        print_err("starting gansynth_worker process, this may take a while")

        self._proc = subprocess.Popen(
            (python, worker_script, ckpt_dir, str(batch_size)),
            stdin = subprocess.PIPE,
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE
        )
        self._stderr_printer = threading.Thread(target = self._keep_printing_stderr)
        self._stderr_printer.start()
        
        self._read_tag(protocol.OUT_TAG_INIT)
        
        info_msg = self._proc.stdout.read(protocol.init_struct.size)
        audio_length, sample_rate = protocol.from_info_msg(info_msg)

        print_err("gansynth_worker is ready")
        self._outlet(1, ["worker", "on", audio_length, sample_rate])
Esempio n. 3
0
    def unload_1(self):
        if self._proc:
            self._proc.terminate()
            self._proc = None
            self._stderr_printer = None
        else:
            print_err("no gansynth_worker process is running")

        self._outlet(1, ["worker", "off"])
Esempio n. 4
0
def save_z_buf(z_name, path):
    z_buf = pyext.Buffer(z_name)
    z = from_z32(z_buf)

    base, ext = os.path.splitext(path)
    if not ext:
        ext = ".npy"
    path_fixed = base + ext

    print_err("save: " + path_fixed)
Esempio n. 5
0
def handle_gen_audio(model, stdin, stdout, state):
    count_msg = read_msg(stdin, protocol.count_struct.size)
    count = protocol.from_count_msg(count_msg)

    pitches = []
    zs = []
    for i in range(count):
        gen_msg = read_msg(stdin, protocol.gen_audio_struct.size)

        pitch, z = protocol.from_gen_msg(gen_msg)

        pitches.append(pitch)
        zs.append(z)

    layer_offsets = {}
    if 'ganspace_component_amplitudes' in state:
        components = state['ganspace_components']['comp']
        std_devs = state['ganspace_components']['stdev']
        edits = state['ganspace_component_amplitudes']

        amounts = np.zeros(components.shape[:1], dtype=np.float32)
        amounts[:len(list(map(float, edits)))] = edits * std_devs

        scaled_directions = amounts.reshape(-1, 1, 1, 1) * components

        linear_combination = np.sum(scaled_directions, axis=0)
        linear_combination_batch = np.repeat(linear_combination.reshape(
            1, *linear_combination.shape),
                                             8,
                                             axis=0)

        layer_offsets[state['ganspace_components']
                      ['layer']] = linear_combination_batch

    z_arr = np.array(zs)
    try:
        audios = model.generate_samples_from_z(z_arr,
                                               pitches,
                                               layer_offsets=layer_offsets)
    except KeyError as e:
        print_err(
            "can't synthesize - model was not trained on pitch {}".format(
                e.args[0]))
        audios = []

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO))
    stdout.write(protocol.to_count_msg(len(audios)))

    for audio in audios:
        stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize))
        stdout.write(protocol.to_audio_msg(audio))

    stdout.flush()
Esempio n. 6
0
def handle_load_ganspace_components(model, stdin, stdout, state):
    size_msg = read_msg(stdin, protocol.int_struct.size)
    size = protocol.from_int_msg(size_msg)
    msg = read_msg(stdin, size)
    file = msg.decode('utf-8')
    print_err("Opening components file '{}'".format(file))
    with open(file, "rb") as fp:
        state['ganspace_components'] = pickle.load(fp)
    print_err("Components file loaded.")

    component_count = len(state['ganspace_components']["comp"])
    state['ganspace_component_count'] = component_count
    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_LOAD_COMPONENTS))
    stdout.write(protocol.to_count_msg(component_count))
    stdout.flush()
Esempio n. 7
0
    def load_1(self, python):
        if self._proc != None:
            self.unload_1()

        worker_script = os.path.join(script_dir, "ddsp_worker.py")

        print_err("starting ddsp_worker process, this may take a while")

        self._proc = subprocess.Popen((str(python), worker_script),
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE)
        self._stderr_printer = threading.Thread(
            target=self._keep_printing_stderr)
        self._stderr_printer.start()

        self._read_tag(protocol.OUT_TAG_INIT)

        self._outlet(1, ["worker", "on"])
Esempio n. 8
0
    def load_ganspace_components_1(self, ganspace_components_file):
        ganspace_components_file = os.path.join(
            self._canvas_dir,
            str(ganspace_components_file)
        )

        print("Loading GANSpace components...", file=sys.stderr)

        size_msg = protocol.to_int_msg(len(ganspace_components_file))
        components_msg = ganspace_components_file.encode('utf-8')

        self._write_msg(protocol.IN_TAG_LOAD_COMPONENTS, size_msg, components_msg)
        self._read_tag(protocol.OUT_TAG_LOAD_COMPONENTS)
        count_msg = self._proc.stdout.read(protocol.count_struct.size)
        self._component_count = protocol.from_count_msg(count_msg)
        print_err("_component_count =", self._component_count)
        
        buf = pyext.Buffer(self._edits_buf_name)
        #buf.resize(component_count)
        #buf.dirty()

        print_err("GANSpace components loaded!")
Esempio n. 9
0
def handle_synthesize_noz(model, stdin, stdout, state):
    print_err("handle_synthesize_noz")

    count_msg = read_msg(stdin, protocol.count_struct.size)
    count = protocol.from_count_msg(count_msg)

    pitches = []
    for i in range(count):
        gen_msg = read_msg(stdin, protocol.synthesize_noz_struct.size)

        pitch = protocol.from_synthesize_noz_msg(gen_msg)

        pitches.append(pitch)

    pca = state["ganspace_components"]
    stdevs = pca["stdev"]
    layer_dtype = stdevs.dtype
    edits = np.array(state["ganspace_component_amplitudes"], dtype=layer_dtype)

    layer = make_layer(pca, edits)
    layers = np.repeat([layer], len(pitches), axis=0)

    try:
        audios = model.generate_samples_from_layers({pca["layer"]: layers},
                                                    pitches)
    except KeyError as e:
        print_err(
            "can't synthesize - model was not trained on pitch {}".format(
                e.args[0]))
        audios = []

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO))
    stdout.write(protocol.to_count_msg(len(audios)))

    for audio in audios:
        stdout.write(protocol.to_audio_size_msg(audio.size * audio.itemsize))
        stdout.write(protocol.to_audio_msg(audio))

    stdout.flush()
def handle_hallucinate(model, stdin, stdout, state):
    max_note_length = model.config['audio_length']
    sample_rate = model.config['sample_rate']

    hallucinate_msg = read_msg(stdin, protocol.hallucinate_struct.size)
    args = protocol.from_hallucinate_msg(hallucinate_msg)
    note_count, interpolation_steps, spacing, start_trim, attack, sustain, release = args

    print_err(
        "note_count = {} interpolation_steps = {}, spacing = {}s, start_trim = {}s, attack = {}s, sustain = {}s, release = {}s"
        .format(*args))

    initial_notes = model.generate_z(note_count)
    initial_piches = np.array(
        [32] * len(initial_notes)
    )  # np.floor(30 + np.random.rand(len(initial_notes)) * 30)
    final_notes, final_pitches = interpolate_notes(initial_notes,
                                                   initial_piches,
                                                   interpolation_steps)

    audios = synthesize(model, final_notes, final_pitches)
    final_audio = combine_notes(audios,
                                spacing=spacing,
                                start_trim=start_trim,
                                attack=attack,
                                sustain=sustain,
                                release=release,
                                max_note_length=max_note_length,
                                sr=sample_rate)

    final_audio = final_audio.astype('float32')

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_AUDIO))
    stdout.write(
        protocol.to_audio_size_msg(final_audio.size * final_audio.itemsize))
    stdout.write(protocol.to_audio_msg(final_audio))
    stdout.flush()
Esempio n. 11
0
    def hallucinate_noz_1(self, audio_buf_name):
        if not self._proc:
            raise Exception("can't hallucinate - load a checkpoint first")

        if not self._steps:
            raise Exception("can't hallucinate - no steps added")

        self._read_edits()
        
        step_count = len(self._steps)
        print_err("step_count =", step_count)

        print_err("steps =", self._steps)

        edit_count = len(self._steps[0]["edits"])
        
        edit_list = []
        for step in self._steps:
            for edit in step["edits"]:
                edit_list.append(edit)

        print_err("len(edit_list) =", len(edit_list))
        
        self._write_msg(
            protocol.IN_TAG_HALLUCINATE_NOZ,
            protocol.to_hallucinate_msg(
                step_count,
                self._interp_steps,
                self._sample_spacing,
                self._start_trim,
                self._attack,
                self._sustain,
                self._release
            ),
            protocol.to_count_msg(edit_count),
            *map(protocol.to_f64_msg, edit_list)
        )
        
        self._read_tag(protocol.OUT_TAG_AUDIO)
        
        audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size)
        audio_size = protocol.from_audio_size_msg(audio_size_msg)

        audio_msg = self._proc.stdout.read(audio_size)
        audio = protocol.from_audio_msg(audio_msg)

        audio_buf = pyext.Buffer(audio_buf_name)
        if len(audio_buf) != len(audio):
            audio_buf.resize(len(audio))

        audio_buf[:] = audio
        audio_buf.dirty()
        
        self._outlet(1, ["hallucinated", len(audio)])
def handle_timbre_transfer(stdin, stdout):
    transfer_msg = read_msg(stdin, protocol.timbre_transfer_struct.size)
    h = protocol.from_timbre_transfer_msg(transfer_msg)
    print_err(repr(h))
    in_sample_rate, out_sample_rate, f0_octave_shift, f0_confidence_threshold, loudness_db_shift, adjust, quiet, autotune, ckpt_dir_len, in_audio_len = h

    print_err("ckpt_dir_len =", ckpt_dir_len)
    print_err("in_audio_len =", in_audio_len)
    ckpt_dir_msg = stdin.read(ckpt_dir_len)
    ckpt_dir = protocol.from_str_msg(ckpt_dir_msg)
    print_err("ckpt_dir =", ckpt_dir)

    in_audio_msg = stdin.read(in_audio_len)
    print_err("len(in_audio_msg) =", len(in_audio_msg))
    in_audio = protocol.from_audio_msg(in_audio_msg)
    print_err("in_audio.size =", in_audio.size)

    out_audio = timbre_transfer(
        ckpt_dir=ckpt_dir,
        audio=in_audio,
        in_sample_rate=in_sample_rate,
        out_sample_rate=out_sample_rate,
        f0_octave_shift=f0_octave_shift,
        f0_confidence_threshold=f0_confidence_threshold,
        loudness_db_shift=loudness_db_shift,
        adjust=adjust,
        quiet=quiet,
        autotune=autotune,
        log=print_err)
    out_audio = out_audio.numpy().ravel()

    out_audio_len = out_audio.size * out_audio.itemsize

    print_err("out_audio.shape =", out_audio.shape)
    print_err("out_audio_len =", out_audio_len)

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_TIMBRE_TRANSFERRED))
    print_err("wrote tag_timbre_transferred")

    stdout.write(protocol.to_timbre_transferred_msg(out_audio_len))
    print_err("wrote size")
    bytez = protocol.to_audio_msg(out_audio)
    print_err("len(bytez) =", len(bytez))
    stdout.write(bytez)
    print_err("wrote out_audio")
    stdout.flush()
    parser.add_argument("--sample_rate", type=int, default=16000)
    parser.add_argument("--f0_octave_shift", type=int, default=0)
    parser.add_argument("--f0_confidence_threshold", type=float, default=0.0)
    parser.add_argument("--loudness_db_shift", type=float, default=0.0)
    parser.add_argument("--adjust", dest="adjust", action="store_true")
    parser.add_argument("--no-adjust", dest="adjust", action="store_false")
    parser.set_defaults(adjust=True)
    parser.add_argument("--quiet", type=float, default=20.0)
    parser.add_argument("--autotune", type=float, default=0.0)

    args = parser.parse_args()

    audio, in_sample_rate = librosa.load(args.in_file, sr=None)

    out_audio = timbre_transfer(
        ckpt_dir=args.ckpt_dir,
        audio=audio,
        in_sample_rate=in_sample_rate,
        out_sample_rate=args.sample_rate,
        f0_octave_shift=args.f0_octave_shift,
        f0_confidence_threshold=args.f0_confidence_threshold,
        loudness_db_shift=args.loudness_db_shift,
        adjust=args.adjust,
        quiet=args.quiet,
        autotune=args.autotune,
        log=print_err)

    print_err("saving generated audio to {}".format(args.out_file))
    scipy.io.wavfile.write(args.out_file, args.sample_rate,
                           out_audio.numpy().T)
Esempio n. 14
0
 def _outlet(self, *args):
     print_err("_outlet{}".format(args))
Esempio n. 15
0
 def _anything_1(self, *args):
     print_err("unhandled input:", args)
Esempio n. 16
0
 def state_1(self):
     print_err("notes =", self.notes)
     print_err("t0 =", self.t0)
Esempio n. 17
0
import time

import magenta
from magenta.models.melody_rnn import melody_rnn_config_flags, melody_rnn_model, melody_rnn_sequence_generator
from magenta.models.shared.sequence_generator_bundle import read_bundle_file
from magenta.music.protobuf import generator_pb2, music_pb2
import monotonic
import pretty_midi

from sopilib.utils import print_err

try:
    import pyext
    ext_class = pyext._class
except:
    print_err("failed to load pyext module")

    class ext_class(object):
        def _outlet(self, *args):
            print_err("_outlet{}".format(args))


test_notes = [(0.5, 32, 100), (0.6, 33, 50), (0.7, 33, 0), (0.8, 32, 0)]


def notes_to_midi(notes, t0=0.0):
    midi = pretty_midi.PrettyMIDI()
    inst = pretty_midi.Instrument(0)

    # add notes to instrument
    # a note is added only after a note-on and a corresponding note-off
Esempio n. 18
0
def handle_generate(state):
    stdin = state.stdin
    stdout = state.stdout
    model = state.model
    config = state.config
    num_seqs = state.num_seqs

    generate_msg = read_msg(stdin, protocol.generate_struct.size)
    seed_sr, out_sr, num_outs, dur, seed_len = protocol.from_generate_msg(
        generate_msg)

    print_err("seed_sr =", seed_sr)
    print_err("out_sr =", out_sr)
    print_err("num_outs =", num_outs)
    print_err("dur =", dur)
    print_err("seed_len =", seed_len)

    if seed_len > 0:
        seed_msg = read_msg(stdin, seed_len * protocol.f32_struct.size)
        seed_audio = protocol.from_audio_msg(seed_msg)
    else:
        seed_audio = np.array([], dtype=np.float32)

    print_err("seed_audio size*itemsize =",
              seed_audio.size * seed_audio.itemsize)

    temps = []
    for i in range(num_outs):
        temp_len_msg = read_msg(stdin, protocol.size_struct.size)
        temp_len = protocol.from_size_msg(temp_len_msg)

        temp_str_msg = read_msg(stdin, temp_len)
        temp_str = protocol.from_str_msg(temp_str_msg)

        temp = gen.check_temperature(temp_str)

        temps.append(temp)

    print_err("temps =", temps)

    # out_audios = [np.random.uniform(0.0, 1.0, out_len).astype(np.float32)]*num_outs
    out_audios = list(generate(state, out_sr, dur, temps, seed_audio))

    print_err("generated")

    stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_GENERATED))
    stdout.write(
        protocol.to_generated_msg(out_sr, len(out_audios), out_audios[0].size))
    for audio in out_audios:
        stdout.write(protocol.to_audio_msg(audio))
    stdout.flush()
Esempio n. 19
0
from __future__ import print_function

import os
import sys

import numpy as np

import sopilib.ddsp_protocol as protocol
from sopilib.utils import print_err, read_msg

from handlers import handlers

print_err("hello :)")

# open standard input/output handles

stdin = sys.stdin.buffer
stdout = sys.stdout.buffer

# write init message

stdout.write(protocol.to_tag_msg(protocol.OUT_TAG_INIT))
stdout.flush()

print_err("it begins @_@")

while True:
    in_tag_msg = read_msg(stdin, protocol.tag_struct.size)
    in_tag = protocol.from_tag_msg(in_tag_msg)

    if in_tag not in handlers:
Esempio n. 20
0
    def generate_1(self, *raw_args):
        if not self._proc:
            raise Exception("no samplernn_worker process is running")

        args = self._generate_parser.parse_args(map(str, raw_args))
        print_err("args =", args)

        outs = args.out if args.out != None else []
        temps = args.temp if args.temp != None else []
        
        num_outs = len(outs)

        if num_outs < 1:
            print_err("no outputs specified")

        while len(temps) < num_outs:
            temps.append(temps[-1])

        if args.seed != None:
            seed_buf = pyext.Buffer(args.seed)
            seed_audio = np.array(seed_buf, dtype=np.float32)
        else:
            seed_audio = np.array([], dtype=np.float32)

        print_err("seed_audio size*itemsize =", seed_audio.size * seed_audio.itemsize)
            
        seed_len = seed_audio.size * seed_audio.itemsize
                
        generate_msg = protocol.to_generate_msg(args.seed_sr, args.out_sr, num_outs, args.dur, seed_len)
        seed_audio_msg = protocol.to_audio_msg(seed_audio)
        temp_msgs = map(protocol.to_str_msg, temps)
        temp_msgs = map(lambda bs: (protocol.to_size_msg(len(bs)), bs), temp_msgs)
        temp_msgs = (x for pair in temp_msgs for x in pair)
        
        self._write_msg(protocol.IN_TAG_GENERATE, generate_msg, seed_audio_msg, *temp_msgs)

        print_err("wrote")
        
        self._read_tag(protocol.OUT_TAG_GENERATED)

        generated_msg = self._proc.stdout.read(protocol.generated_struct.size)

        g_out_sr, g_num_outs, g_out_len = protocol.from_generated_msg(generated_msg)

        print_err("g_out_sr =", g_out_sr)
        print_err("g_num_outs =", g_num_outs)
        print_err("g_out_len =", g_out_len)
        
        out_audios = []
        for i in range(g_num_outs):
            out_audio_msg = self._proc.stdout.read(g_out_len * protocol.f32_struct.size)
            out_audios.append(protocol.from_audio_msg(out_audio_msg))
        
        print_err("len(out_audios) =", len(out_audios))

        assert len(outs) == len(out_audios)

        for buf_name, audio in zip(outs, out_audios):
            buf = pyext.Buffer(buf_name)
            if len(buf) != len(audio):
                buf.resize(len(audio))

            buf[:] = audio
            buf.dirty()

            self._outlet(1, ["generated", buf_name, g_out_sr, g_out_len])
Esempio n. 21
0
    def synthesize_noz_1(self, *args):        
        if not self._proc:
            raise Exception("can't synthesize - no gansynth_worker process is running")
        
        arg_count = len(args)
        
        if arg_count == 0 or arg_count % 2 != 0:
            raise ValueError("invalid number of arguments ({}), should be a multiple of 2: synthesize_noz audio1 pitch1 [audio2 pitch2 ...]".format(arg_count))

        component_buff = pyext.Buffer(self._edits_buf_name)
        components = np.array(component_buff, dtype=np.float64)
        component_msgs = []

        for value in components:
            component_msgs.append(protocol.to_float_msg(value))
            
        for i in range(self._component_count - len(components)):
            component_msgs.append(protocol.to_float_msg(0.0))
                  
        self._write_msg(protocol.IN_TAG_SET_COMPONENT_AMPLITUDES, *component_msgs)

        gen_msgs = []
        audio_buf_names = []
        for i in range(0, arg_count, 2):
            audio_buf_name, pitch = args[i:i+2]
            
            gen_msgs.append(protocol.to_synthesize_noz_msg(pitch))
            audio_buf_names.append(audio_buf_name)
            
        in_count = len(gen_msgs)
        in_count_msg = protocol.to_count_msg(in_count)
        self._write_msg(protocol.IN_TAG_SYNTHESIZE_NOZ, in_count_msg, *gen_msgs)
                  
        self._read_tag(protocol.OUT_TAG_AUDIO)

        out_count_msg = self._proc.stdout.read(protocol.count_struct.size)
        out_count = protocol.from_count_msg(out_count_msg)

        print_err("out_count =", out_count)
        
        if out_count == 0:
            print_err("no audio was synthesized!")
            return

        assert out_count == in_count

        for audio_buf_name in audio_buf_names:
            audio_size_msg = self._proc.stdout.read(protocol.audio_size_struct.size)
            audio_size = protocol.from_audio_size_msg(audio_size_msg)

            audio_msg = self._proc.stdout.read(audio_size)
            audio_note = protocol.from_audio_msg(audio_msg)

            audio_buf = pyext.Buffer(audio_buf_name)
            if len(audio_buf) != len(audio_note):
                audio_buf.resize(len(audio_note))

            audio_buf[:] = audio_note
            audio_buf.dirty()
            
        self._outlet(1, "synthesized")
args = parser.parse_args()

ckpt_path = gen.find_checkpoint_path(args.ckpt_dir)
config = gen.find_config(ckpt_path, args.config)

max_ckpt = None
for fn in os.listdir(args.ckpt_dir):
    m = re.match(r"^(model\.ckpt-(\d+))\.index$", fn)
    if m:
        num = int(m.group(2))
        if max_ckpt == None or max_ckpt[1] < num:
            max_ckpt = (m.group(1), num)

if max_ckpt == None:
    print_err("no model.ckpt-#.index files found in checkpoint dir")
    sys.exit(1)

ckpt_path = os.path.join(args.ckpt_dir, max_ckpt[0])

print_err("ckpt_path:", ckpt_path)

model = gen.create_inference_model(ckpt_path, args.num_seqs, config)

print_err("hello :)")

# open standard input/output handles

stdin = sys.stdin.buffer
stdout = sys.stdout.buffer
def timbre_transfer(ckpt_dir,
                    audio,
                    in_sample_rate,
                    out_sample_rate,
                    f0_octave_shift,
                    f0_confidence_threshold,
                    loudness_db_shift,
                    log=print):
    print_err("args:",
              (ckpt_dir, "<audio>", in_sample_rate, out_sample_rate,
               f0_octave_shift, f0_confidence_threshold, loudness_db_shift))
    log("converting audio...")
    start_time = time.time()
    audio = librosa.to_mono(audio)
    audio = librosa.resample(audio, in_sample_rate, out_sample_rate)
    audio = audio[np.newaxis, :]
    duration = time.time() - start_time
    log("done - {:.1f} s".format(duration))

    # Setup the session.
    ddsp.spectral_ops.reset_crepe()

    # Compute features.
    log("computing audio features...")
    start_time = time.time()
    audio_features = ddsp.training.eval_util.compute_audio_features(audio)
    audio_features['loudness_db'] = audio_features['loudness_db'].astype(
        np.float32)
    audio_features_mod = None
    duration = time.time() - start_time
    log("done - {:.1f} s".format(duration))

    model_dir = ckpt_dir
    gin_file = os.path.join(model_dir, 'operative_config-0.gin')

    # Parse gin config,
    with gin.unlock_config():
        gin.parse_config_file(gin_file, skip_unknown=True)

    # Assumes only one checkpoint in the folder, 'ckpt-[iter]`.
    ckpt_files = [f for f in tf.io.gfile.listdir(model_dir) if 'ckpt' in f]
    ckpt_name = ckpt_files[0].split('.')[0]
    ckpt = os.path.join(model_dir, ckpt_name)

    # Ensure dimensions and sampling rates are equal
    time_steps_train = gin.query_parameter('DefaultPreprocessor.time_steps')
    n_samples_train = gin.query_parameter('Additive.n_samples')
    hop_size = int(n_samples_train / time_steps_train)

    time_steps = int(audio.shape[1] / hop_size)
    n_samples = time_steps * hop_size

    gin_params = [
        'Additive.n_samples = {}'.format(n_samples),
        'FilteredNoise.n_samples = {}'.format(n_samples),
        'DefaultPreprocessor.time_steps = {}'.format(time_steps),
    ]

    with gin.unlock_config():
        gin.parse_config(gin_params)

    # Trim all input vectors to correct lengths
    for key in ['f0_hz', 'f0_confidence', 'loudness_db']:
        audio_features[key] = audio_features[key][:time_steps]
    audio_features['audio'] = audio_features['audio'][:, :n_samples]

    # Set up the model just to predict audio given new conditioning
    log("restoring model...")
    start_time = time.time()
    model = ddsp.training.models.Autoencoder()
    model.restore(ckpt)

    # Build model by running a batch through it.
    _ = model(audio_features, training=False)
    duration = time.time() - start_time
    log("done - {:.1f} s".format(duration))

    audio_features_mod = {k: v.copy() for k, v in audio_features.items()}

    audio_features_mod = shift_ld(audio_features_mod, loudness_db_shift)
    audio_features_mod = shift_f0(audio_features_mod, f0_octave_shift)
    audio_features_mod = mask_by_confidence(audio_features_mod,
                                            f0_confidence_threshold)

    af = audio_features if audio_features_mod is None else audio_features_mod

    # Run a batch of predictions.
    log("predicting...")
    start_time = time.time()
    audio_gen = model(af, training=False)
    duration = time.time() - start_time
    log("done - {:.1f} s".format(duration))

    return audio_gen
Esempio n. 24
0
    def run_1(self,
              gin_file,
              in_arr,
              out_arr,
              f0_octave_shift=0,
              f0_confidence_threshold=0.0,
              loudness_db_shift=0.0,
              in_sample_rate=44100,
              out_sample_rate=16000,
              adjust=True,
              quiet=20.0,
              autotune=0.0):
        if not self._proc:
            raise Exception("no ddsp_worker process is running")

        # get buffers

        in_buf = pyext.Buffer(in_arr)
        out_buf = pyext.Buffer(out_arr)

        in_audio = np.array(in_buf, dtype=np.float32)

        print_err("in_audio.size =", in_audio.size)
        print_err("in_audio.itemsize =", in_audio.itemsize)

        # make timbre transfer message

        gin_path = os.path.join(script_dir, str(gin_file))
        ckpt_dir = os.path.dirname(gin_path)
        ckpt_msg = protocol.to_str_msg(ckpt_dir)
        print_err("len(ckpt_msg) = ", len(ckpt_msg))
        transfer_msg = protocol.to_timbre_transfer_msg(
            in_sample_rate, out_sample_rate,
            f0_octave_shift, f0_confidence_threshold, loudness_db_shift,
            bool(adjust), quiet, autotune, len(ckpt_msg),
            in_audio.size * in_audio.itemsize)
        print_err("len(transfer_msg) = ", len(transfer_msg))
        in_audio_msg = protocol.to_audio_msg(in_audio)
        print_err("len(in_audio_msg) = ", len(in_audio_msg))

        # write timbre transfer message

        self._write_msg(protocol.IN_TAG_TIMBRE_TRANSFER, transfer_msg,
                        ckpt_msg, in_audio_msg)

        print_err("wrote")

        # read timbre transferred message

        self._read_tag(protocol.OUT_TAG_TIMBRE_TRANSFERRED)

        print_err("read")

        transferred_msg = self._proc.stdout.read(
            protocol.timbre_transferred_struct.size)
        print_err("len(transferred_msg) =", len(transferred_msg))
        out_audio_len = protocol.from_timbre_transferred_msg(transferred_msg)
        print_err("out_audio_len =", out_audio_len)
        out_audio_msg = self._proc.stdout.read(out_audio_len)
        print_err("len(out_audio_msg)", len(out_audio_msg))
        out_audio = protocol.from_audio_msg(out_audio_msg)

        # resize output buffer if needed

        if len(out_audio) != len(out_buf):
            print_err("resizing")
            out_buf.resize(len(out_audio))
            print_err("resized")
        else:
            print_err("no resize")

        # write output

        out_buf[:] = normalize(out_audio)
        print_err("wrote out_audio")
        out_buf.dirty()

        self._outlet(1, ["transferred", len(out_audio)])
Esempio n. 25
0
from magenta.models.gansynth.lib import flags as lib_flags
from magenta.models.gansynth.lib import generate_util as gu
from magenta.models.gansynth.lib import model as lib_model
from magenta.models.gansynth.lib import util
import tensorflow as tf

import sopilib.gansynth_protocol as gss
from sopilib.utils import print_err, read_msg

from handlers import handlers

try:
    ckpt_dir = sys.argv[1]
    batch_size = int(sys.argv[2])
except IndexError:
    print_err("usage: {} checkpoint_dir batch_size".format(
        os.path.basename(__file__)))
    sys.exit(1)

flags = lib_flags.Flags({"batch_size_schedule": [batch_size]})
model = lib_model.Model.load_from_path(ckpt_dir, flags)

stdin = os.fdopen(sys.stdin.fileno(), "rb", 0)
stdout = os.fdopen(sys.stdout.fileno(), "wb", 0)
stdout.write(gss.to_tag_msg(gss.OUT_TAG_INIT))

audio_length = model.config['audio_length']
sample_rate = model.config['sample_rate']
info_msg = gss.to_info_msg(audio_length=audio_length, sample_rate=sample_rate)
stdout.write(info_msg)
stdout.flush()