def setUp(self): self.voice_changer: VoiceChanger = AttrDict( acoustic_converter=AttrDict(config=AttrDict(dataset=AttrDict( acoustic_param=AcousticParam(sampling_rate=16000), ), ), ), super_resolution=AttrDict(config=AttrDict(dataset=AttrDict( param=Param(), ), ), ), output_sampling_rate=24000, ) self.stream = ConvertStream(voice_changer=self.voice_changer) self.stream.in_segment_method._keys = ['f0']
def create_from_json(s: Union[str, Path]): d = json.load(open(s)) backward_compatible(d) return Config( dataset=DatasetConfig( acoustic_param=AcousticParam(**d['dataset']['acoustic_param']), input_glob=Path(d['dataset']['input_glob']), target_glob=Path(d['dataset']['target_glob']), indexes_glob=Path(d['dataset']['indexes_glob']), in_features=d['dataset']['in_features'], out_features=d['dataset']['out_features'], train_crop_size=d['dataset']['train_crop_size'], input_global_noise=d['dataset']['input_global_noise'], input_local_noise=d['dataset']['input_local_noise'], target_global_noise=d['dataset']['target_global_noise'], target_local_noise=d['dataset']['target_local_noise'], seed=d['dataset']['seed'], num_test=d['dataset']['num_test'], ), model=ModelConfig( in_channels=d['model']['in_channels'], out_channels=d['model']['out_channels'], generator_base_channels=d['model']['generator_base_channels'], generator_extensive_layers=d['model'] ['generator_extensive_layers'], discriminator_base_channels=d['model'] ['discriminator_base_channels'], discriminator_extensive_layers=d['model'] ['discriminator_extensive_layers'], weak_discriminator=d['model']['weak_discriminator'], glu_generator=d['model']['glu_generator'], ), loss=LossConfig( mse=d['loss']['mse'], adversarial=d['loss']['adversarial'], ), train=TrainConfig( batchsize=d['train']['batchsize'], gpu=d['train']['gpu'], log_iteration=d['train']['log_iteration'], snapshot_iteration=d['train']['snapshot_iteration'], stop_iteration=d['train']['stop_iteration'], optimizer=d['train']['optimizer'], pretrained_model=d['train']['pretrained_model'], ), project=ProjectConfig( name=d['project']['name'], tags=d['project']['tags'], ))
import math import multiprocessing from pathlib import Path from pprint import pprint import matplotlib.pyplot as plt import numpy import tqdm from sprocket.speech import FeatureExtractor from yukarin import Wave from yukarin.param import AcousticParam from yukarin.utility.sprocket_utility import SpeakerYML from yukarin.utility.sprocket_utility import low_cut_filter base_acoustic_param = AcousticParam() parser = argparse.ArgumentParser() parser.add_argument('--input_wave_glob', '-i') parser.add_argument('--candidate_threshold', '-th', nargs='+', type=float) parser.add_argument('--output_image', '-o', type=Path) parser.add_argument('--speaker_yml', type=Path) parser.add_argument('--pad_second', type=float, default=base_acoustic_param.pad_second) arguments = parser.parse_args() # read parameters from speaker yml sconf = SpeakerYML(arguments.speaker_yml) # constract FeatureExtractor class