Ejemplo n.º 1
0
def inference(model_itr, bar):
    bar.next()
    model_path = Path('05output/predictor_' + str(model_itr) + '.npz')
    config_path = Path('recipe/config.json')
    config = create_config(config_path)
    acoustic_converter = AcousticConverter(config, model_path, gpu=0)
    wave = acoustic_converter(voice_path="01input02/music0001_80.wav")
    librosa.output.write_wav('inference_output_' + str(model_itr) + '.wav',
                             wave.wave,
                             wave.sampling_rate,
                             norm=True)
def process(p: Path, acoustic_converter: AcousticConverter):
    try:
        if p.suffix in ['.npy', '.npz']:
            fn = glob.glob(str(input_wave_directory / p.stem) + '.*')[0]
            p = Path(fn)
        wave = acoustic_converter(p)
        librosa.output.write_wav(str(output / p.stem) + '.wav', wave.wave, wave.sampling_rate, norm=True)
    except:
        import traceback
        print('error!', str(p))
        print(traceback.format_exc())


for model_name in args.model_names:
    base_model = model_directory / model_name
    config = create_config(base_model / 'config.json')

    input_paths = list(sorted([Path(p) for p in glob.glob(str(config.dataset.input_glob))]))
    numpy.random.RandomState(config.dataset.seed).shuffle(input_paths)
    path_train = input_paths[0]
    path_test = input_paths[-1]

    if it is not None:
        model_path = base_model / 'predictor_{}.npz'.format(it)
    else:
        model_paths = base_model.glob('predictor_*.npz')
        model_path = list(sorted(model_paths, key=extract_number))[-1]
    print(model_path)
    acoustic_converter = AcousticConverter(config, model_path, gpu=gpu)

    output = Path('./output').absolute() / base_model.name
Ejemplo n.º 3
0
            fn = glob.glob(str(input_wave_directory / p.stem) + '.*')[0]
            p = Path(fn)
        wave = acoustic_converter(p)
        librosa.output.write_wav(str(output / p.stem) + '.wav',
                                 wave.wave,
                                 wave.sampling_rate,
                                 norm=True)
    except:
        import traceback
        print('error!', str(p))
        print(traceback.format_exc())


for model_name in args.model_names:
    base_model = model_directory / model_name
    config = create_config(base_model / 'config.json')

    input_paths = list(
        sorted([Path(p) for p in glob.glob(str(config.dataset.input_glob))]))
    numpy.random.RandomState(config.dataset.seed).shuffle(input_paths)
    path_train = input_paths[0]
    path_test = input_paths[-1]

    if it is not None:
        model_path = base_model / 'predictor_{}.npz'.format(it)
    else:
        model_paths = base_model.glob('predictor_*.npz')
        model_path = list(sorted(model_paths, key=extract_number))[-1]
    print(model_path)
    acoustic_converter = AcousticConverter(config, model_path, gpu=gpu)
parser.add_argument('--f0_floor1', type=float, default=71)
parser.add_argument('--f0_ceil1', type=float, default=800)
parser.add_argument('--f0_floor2', type=float, default=71)
parser.add_argument('--f0_ceil2', type=float, default=800)
parser.add_argument('--ignore_feature',
                    nargs='+',
                    default=['spectrogram', 'aperiodicity'])
parser.add_argument('--disable_alignment', action='store_true')
parser.add_argument('--enable_overwrite', action='store_true')
arguments = parser.parse_args()

pprint(dir(arguments))

pre_convert = arguments.pre_converter1_config is not None
if pre_convert:
    config = create_config(arguments.pre_converter1_config)
    pre_converter1 = AcousticConverter(config, arguments.pre_converter1_model)
else:
    pre_converter1 = None


def generate_feature(path1, path2):
    out1 = Path(arguments.output1_directory, path1.stem + '.npy')
    out2 = Path(arguments.output2_directory, path2.stem + '.npy')
    if out1.exists() and out2.exists() and not arguments.enable_overwrite:
        return

    # load wave and padding
    wave_file_load_process = WaveFileLoadProcess(
        sample_rate=arguments.sample_rate,
        top_db=arguments.top_db,
class AudioConfig(NamedTuple):
    rate: int
    chunk: int
    vocoder_buffer_size: int
    out_norm: float


model_base_path = Path('~/Github/become-yukarin/trained/').expanduser()
test_data_path = Path('tests/test-deep-learning-yuduki-yukari.wav')
test_output_path = Path('output.wav')

print('model loading...', flush=True)

model_path = model_base_path / Path('harvest-innoise03/predictor_1390000.npz')
config_path = model_base_path / Path('harvest-innoise03/config.json')
config = create_config(config_path)
acoustic_converter = AcousticConverter(config, model_path, gpu=0)
print('model 1 loaded!', flush=True)

model_path = model_base_path / Path('sr-noise3/predictor_180000.npz')
config_path = model_base_path / Path('sr-noise3/config.json')
sr_config = create_sr_config(config_path)
super_resolution = SuperResolution(sr_config, model_path, gpu=0)
print('model 2 loaded!', flush=True)

audio_config = AudioConfig(
    rate=config.dataset.param.voice_param.sample_rate,
    chunk=config.dataset.param.voice_param.sample_rate,
    vocoder_buffer_size=config.dataset.param.voice_param.sample_rate // 16,
    out_norm=4.5,
)
Ejemplo n.º 6
0
def main():
    print('model loading...', flush=True)

    queue_input_wave = Queue()
    queue_output_wave = Queue()

    model_path = Path('./trained/harvest-innoise03/predictor_1390000.npz')
    config_path = Path('./trained/harvest-innoise03/config.json')
    config = create_config(config_path)
    acoustic_converter = AcousticConverter(config, model_path, gpu=0)
    print('model 1 loaded!', flush=True)

    model_path = Path('./trained/sr-noise3/predictor_180000.npz')
    config_path = Path('./trained/sr-noise3/config.json')
    sr_config = create_sr_config(config_path)
    super_resolution = SuperResolution(sr_config, model_path, gpu=0)
    print('model 2 loaded!', flush=True)

    audio_instance = pyaudio.PyAudio()
    audio_config = AudioConfig(
        rate=config.dataset.param.voice_param.sample_rate,
        audio_chunk=config.dataset.param.voice_param.sample_rate,
        convert_chunk=config.dataset.param.voice_param.sample_rate,
        vocoder_buffer_size=config.dataset.param.voice_param.sample_rate // 16,
        out_norm=2.5,
    )

    process_converter = Process(target=convert_worker,
                                kwargs=dict(
                                    config=config,
                                    audio_config=audio_config,
                                    acoustic_converter=acoustic_converter,
                                    super_resolution=super_resolution,
                                    queue_input_wave=queue_input_wave,
                                    queue_output_wave=queue_output_wave,
                                ))
    process_converter.start()

    signal.signal(signal.SIGINT,
                  lambda signum, frame: process_converter.terminate())

    audio_stream = audio_instance.open(
        format=pyaudio.paFloat32,
        channels=1,
        rate=audio_config.rate,
        frames_per_buffer=audio_config.audio_chunk,
        input=True,
        output=True,
    )

    # process_converter.join()

    while True:
        # input audio
        in_data = audio_stream.read(audio_config.audio_chunk)
        wave = numpy.fromstring(in_data, dtype=numpy.float32)
        print('input', len(wave), flush=True)
        queue_input_wave.put(wave)

        # output
        try:
            wave = queue_output_wave.get_nowait()
        except:
            wave = None

        if wave is not None:
            print('output', len(wave), flush=True)
            wave *= audio_config.out_norm
            b = wave.astype(numpy.float32).tobytes()
            audio_stream.write(b)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
parser.add_argument('--f0_estimating_method', type=str, default=base_acoustic_feature_param.f0_estimating_method)
parser.add_argument('--f0_floor1', type=float, default=71)
parser.add_argument('--f0_ceil1', type=float, default=800)
parser.add_argument('--f0_floor2', type=float, default=71)
parser.add_argument('--f0_ceil2', type=float, default=800)
parser.add_argument('--ignore_feature', nargs='+', default=['spectrogram', 'aperiodicity'])
parser.add_argument('--disable_alignment', action='store_true')
parser.add_argument('--enable_overwrite', action='store_true')
arguments = parser.parse_args()

pprint(dir(arguments))

pre_convert = arguments.pre_converter1_config is not None
if pre_convert:
    config = create_config(arguments.pre_converter1_config)
    pre_converter1 = AcousticConverter(config, arguments.pre_converter1_model)
else:
    pre_converter1 = None


def generate_feature(path1, path2):
    out1 = Path(arguments.output1_directory, path1.stem + '.npy')
    out2 = Path(arguments.output2_directory, path2.stem + '.npy')
    if out1.exists() and out2.exists() and not arguments.enable_overwrite:
        return

    # load wave and padding
    wave_file_load_process = WaveFileLoadProcess(
        sample_rate=arguments.sample_rate,
        top_db=arguments.top_db,