def __init__(self): super().__init__() # BUILDING MELGAN ################################################# hparams_melgan = importlib.import_module(config_melgan).config hparams_melgan.override(USE_CACHED_PADDING=config.USE_CACHED_PADDING) melgan = get_model(hparams_melgan) pretrained_state_dict = torch.load(path.join(ROOT, "melgan", "melgan_state.pth"), map_location="cpu")[0] state_dict = melgan.state_dict() state_dict.update(pretrained_state_dict) melgan.load_state_dict(state_dict) ################################################################### # BUILDING VANILLA ################################################ hparams_vanilla = importlib.import_module(config_vanilla).config hparams_vanilla.override(USE_CACHED_PADDING=config.USE_CACHED_PADDING) vanilla = get_model(hparams_vanilla) pretrained_state_dict = torch.load(path.join(ROOT, "vanilla", "vanilla_state.pth"), map_location="cpu") state_dict = vanilla.state_dict() state_dict.update(pretrained_state_dict) vanilla.load_state_dict(state_dict) ################################################################### vanilla.eval() melgan.eval() # PRETRACE MODELS ################################################# self.latent_size = int(config.CHANNELS[-1] // 2) self.mel_size = int(config.CHANNELS[0]) if config.USE_CACHED_PADDING: test_wav = torch.randn(1, config.BUFFER_SIZE) test_mel = torch.randn(1, config.INPUT_SIZE, 2) if hparams_vanilla.EXTRACT_LOUDNESS: test_z = torch.randn(1, self.latent_size + 1, 1) else: test_z = torch.randn(1, self.latent_size, 1) else: test_wav = torch.randn(1, 8192) test_mel = torch.randn(1, config.INPUT_SIZE, 16) if hparams_vanilla.EXTRACT_LOUDNESS: test_z = torch.randn(1, self.latent_size + 1, 16) else: test_z = torch.randn(1, self.latent_size, 16) melencoder = TracedMelEncoder( vanilla.melencoder, BufferSTFT(config.BUFFER_SIZE, config.HOP_LENGTH), config.HOP_LENGTH, config.USE_CACHED_PADDING) logloudness = LogLoudness( int(hparams_vanilla.HOP_LENGTH * np.prod(hparams_vanilla.RATIOS)), 1e-4) self.trace_logloudness = torch.jit.script(logloudness) self.trace_melencoder = torch.jit.trace(melencoder, test_wav, check_trace=False) self.trace_encoder = torch.jit.trace(vanilla.topvae.encoder, test_mel, check_trace=False) self.trace_decoder = torch.jit.trace(vanilla.topvae.decoder, test_z, check_trace=False) self.trace_melgan = torch.jit.trace(melgan.decoder, test_mel, check_trace=False) config.override(SAMPRATE=hparams_vanilla.SAMPRATE, N_SIGNAL=hparams_vanilla.N_SIGNAL, EXTRACT_LOUDNESS=hparams_vanilla.EXTRACT_LOUDNESS, TYPE=hparams_vanilla.TYPE, HOP_LENGTH=hparams_vanilla.HOP_LENGTH, RATIOS=hparams_vanilla.RATIOS, WAV_LOC=hparams_vanilla.WAV_LOC, LMDB_LOC=hparams_vanilla.LMDB_LOC) self.pca = None if PCA: try: self.pca = torch.load(path.join(ROOT, "pca.pth")) print("Precomputed pca found") except: if config.USE_CACHED_PADDING: raise Exception( "PCA should be first computed in non cache mode") print("No precomputed pca found. Computing.") self.pca = None if self.pca == None: self.pca = compute_pca(self, 32) torch.save(self.pca, path.join(ROOT, "pca.pth")) self.register_buffer("mean", self.pca[0]) self.register_buffer("std", self.pca[1]) self.register_buffer("U", self.pca[2]) self.extract_loudness = config.EXTRACT_LOUDNESS
def _load_model(): global model model = get_model(model_bucket, model_filename) logger.debug('successfully loaded model')
config.parse_args() # PREPARE DATA dataset = Loader(config.AUGMENT) dataloader = torch.utils.data.DataLoader(dataset, batch_size=config.BATCH, shuffle=True, drop_last=True) # PREPARE MODELS device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # MELGAN TRAINING if config.TYPE == "melgan": gen = get_model() dis = Discriminator() if config.CKPT is not None: ckptgen, ckptdis = torch.load(config.CKPT, map_location="cpu") gen.load_state_dict(ckptgen) dis.load_state_dict(ckptdis) gen = gen.to(device) dis = dis.to(device) # PREPARE OPTIMIZERS opt_gen = torch.optim.Adam(gen.parameters(), lr=config.LR, betas=[.5, .9]) opt_dis = torch.optim.Adam(dis.parameters(), lr=config.LR, betas=[.5, .9]) model = gen, dis
def main(mode=None): r"""starts the model Args: mode (int): 1: train, 2: test, 3: eval, reads from config file if not specified """ config = load_config(mode) # initialize random seed torch.manual_seed(config.SEED) torch.cuda.manual_seed_all(config.SEED) np.random.seed(config.SEED) random.seed(config.SEED) if mode == 1: # copy network files as a backup when training os.system("cp -r ./src %s/" % config.PATH) elif mode == 2: # select source code when testing global Config, get_model src = importlib.import_module("checkpoints.%s.src" % (os.path.basename(config.PATH))) Config = src.Config get_model = src.get_model # cuda visble devices # os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(e) for e in config.GPU) if os.environ.get('CUDA_VISIBLE_DEVICES'): CUDA_VISIBLE_DEVICES = [ int(x) for x in list(os.environ.get("CUDA_VISIBLE_DEVICES")) if x.isdigit() ] config.GPU = list(range(len(CUDA_VISIBLE_DEVICES))) # init device if torch.cuda.is_available(): config.DEVICE = torch.device("cuda") torch.backends.cudnn.benchmark = True # cudnn auto-tuner else: config.DEVICE = torch.device("cpu") # set cv2 running threads to 1 (prevents deadlocks with pytorch dataloader) cv2.setNumThreads(0) # import inpainting model InpaintingModel = get_model(model=config.MODEL) # build the model and initialize model = InpaintingModel(config) model.load() # model training if config.MODE == 1: config.print() model.print() print('\nstart training...\n') model.train() # model test elif config.MODE == 2: print('\nstart testing...\n') model.test() # eval mode else: print('\nstart eval...\n') model.eval()