def trainer(model_name): chkpt_path = None #@param device = xm.xla_device() pt_dir = os.path.join('.', config.log['chkpt_dir'], model_name) os.makedirs(pt_dir, exist_ok=True) log_dir = os.path.join('.', config.log['log_dir'], model_name) os.makedirs(log_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler( os.path.join( log_dir, '%s-%d.log' % (model_name, time.time()))), logging.StreamHandler() ]) logger = logging.getLogger() writer = MyWriter(log_dir) trainloader = create_dataloader(train=True) testloader = create_dataloader(train=False) embedder_pt = torch.load( '/drive/content/My Drive/ColabDisk/embedder_cpu.pt') embedder = SpeechEmbedder().to(device) embedder.load_state_dict(embedder_pt) embedder.eval() model = VoiceFilter().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=config.train['adam']) audio = Audio() starting_epoch = 1 if chkpt_path is not None: logger.info("Resuming from checkpoint: %s" % chkpt_path) checkpoint_file = torch.load(chkpt_path) model.load_state_dict(checkpoint_file['model']) optimizer.load_state_dict(checkpoint_file['optimizer']) starting_epoch = checkpoint_file['epoch'] else: logger.info("Starting new training run") for epoch in range(starting_epoch, config.train['epoch'] + 1): para_loader = pl.ParallelLoader(trainloader, [device]).per_device_loader(device) train(embedder, model, optimizer, para_loader, writer, logger, epoch, pt_dir, device) xm.master_print("Finished training epoch {}".format(epoch)) logger.info("Starting to validate epoch...") para_loader = pl.ParallelLoader(testloader, [device]).per_device_loader(device) validate(audio, model, embedder, para_loader, writer, epoch, device) model_saver(model, optimizer, pt_dir, config.train['epoch'])
log_dir = os.path.join(hp.log.log_dir, args.name) os.makedirs(pt_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler( os.path.join( log_dir, '%s-%d.log' % (args.name, time.time()))), logging.StreamHandler() ]) logger = logging.getLogger() writer = MyWriter(hp, log_dir) assert hp.audio.hop_length == 256, \ 'hp.audio.hop_length must be equal to 256, got %d' % hp.audio.hop_length assert hp.data.train != '' and hp.data.validation != '', \ 'hp.data.train and hp.data.validation can\'t be empty: please fix %s' % args.config trainloader = create_dataloader(hp, args, True) valloader = create_dataloader(hp, args, False) train(args, pt_dir, args.checkpoint_path, trainloader, valloader, writer, logger, hp, hp_str) def repl_test(): chkpt_path = args.checkpoint_path
logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(os.path.join(log_dir, '%s-%d.log' % (args.model, time.time()))), logging.StreamHandler() ] ) logger = logging.getLogger() if hp.data.train == '' or hp.data.val == '': logger.error("hp.data.train, hp.data.val cannot be empty") raise Exception("Please specify directories of train data.") if hp.model.graph0 == '' or hp.model.graph1 == '' or hp.model.graph2 == '': logger.error("hp.model.graph0, graph1, graph2 cannot be empty") raise Exception("Please specify random DAG architecture.") graphs = [ read_graph(hp.model.graph0), read_graph(hp.model.graph1), read_graph(hp.model.graph2), ] writer = MyWriter(log_dir) trainset = KMNIST_dataloader(hp, args, True) valset = KMNIST_dataloader(hp, args, False) train(out_dir, chkpt_path, trainset, valset, writer, logger, hp, hp_str, graphs, in_channels=1)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch Voice Filter') parser.add_argument('-b', '--base_dir', type=str, default='.', help="Root directory of run.") parser.add_argument('--checkpoint_path', type=str, default=None, help='Path to last checkpoint') parser.add_argument('-e', '--embedder_path', type=str, required=True, help="path of embedder model pt file") parser.add_argument( '-m', '--model', type=str, required=True, help="Name of the model. Used for both logging and saving checkpoints." ) args = parser.parse_args() chkpt_path = args.checkpoint_path if args.checkpoint_path is not None else None pt_dir = os.path.join(args.base_dir, config.log['chkpt_dir'], args.model) os.makedirs(pt_dir, exist_ok=True) log_dir = os.path.join(args.base_dir, config.log['log_dir'], args.model) os.makedirs(log_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler( os.path.join( log_dir, '%s-%d.log' % (args.model, time.time()))), logging.StreamHandler() ]) logger = logging.getLogger() writer = MyWriter(log_dir) trainloader = create_dataloader(train=True) testloader = create_dataloader(train=False) embedder_pt = torch.load(args.embedder_path) embedder = SpeechEmbedder().cuda() embedder.load_state_dict(embedder_pt) embedder.eval() model = nn.DataParallel(VoiceFilter()) optimizer = torch.optim.Adam(model.parameters(), lr=config.train['adam']) audio = Audio() starting_step = 0 starting_epoch = 1 if chkpt_path is not None: logger.info("Resuming from checkpoint: %s" % chkpt_path) checkpoint_file = torch.load(chkpt_path) model.load_state_dict(checkpoint_file['model']) starting_epoch = checkpoint_file['epoch'] starting_step = checkpoint_file['step'] else: logger.info("Starting new training run") scheduler = StepLR(optimizer, step_size=1, gamma=0.7) for epoch in range(starting_epoch, config.train['epoch'] + 1): train(embedder, model, optimizer, trainloader, writer, logger, epoch, pt_dir, starting_step) validate(audio, model, embedder, testloader, writer, epoch) scheduler.step() starting_step = 0 model_saver(model, pt_dir, config.train['epoch'], config.train['train_step_pre_epoch'])
num_epochs = hp.train.epoch num_workers = hp.train.num_workers best_loss = 100000 channels = 3 feature_size = hp.model.feature_size ## dirs modelsave_path = hp.log.root + '/' + 'chkpt' + '/' + args.version_name log_dir = hp.log.root + '/' + 'log' + '/' + args.version_name os.makedirs(modelsave_path, exist_ok=True) os.makedirs(log_dir, exist_ok=True) ## Logger writer = MyWriter(hp, log_dir) ## Data SNRs = hp.data.SNR train_dataset = None test_dataset = None if hp.feature == 'MFCC': train_dataset = data.dataset(hp.data.root + '/MFCC/train/', SNRs, context=context) test_dataset = data.dataset(hp.data.root + '/MFCC/test/', SNRs, context=context) else: raise Exception('feature type is not available')
device=None, requires_grad=False).to(device) best_loss = 10 modelsave_path = args.modelsave_path + '/' + args.version_name if not os.path.exists(modelsave_path): os.makedirs(modelsave_path) pickle_path = hp.data.pkl log_dir = hp.log.root + args.version_name if not os.path.exists(log_dir): os.makedirs(log_dir) writer = MyWriter(hp, log_dir) train_dataset = pickleDataset(pickle_path, 'train', hp) val_dataset = pickleDataset(pickle_path, 'test', hp) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) model = UNet().to(device) if not args.chkpt == None:
def ga_trainer(args, index_list, f_path, f_name): # parser = argparse.ArgumentParser() # parser.add_argument('-c', '--config', type=str, required=True, # help="yaml file for configuration") # parser.add_argument('-p', '--checkpoint_path', type=str, default=None, required=False, # help="path of checkpoint pt file") # parser.add_argument('-m', '--model', type=str, required=True, # help="name of the model. used for logging/saving checkpoints") # args = parser.parse_args() individual_model_name = args.model + "_{}_{}_{}".format( index_list[0], index_list[1], index_list[2]) hp = HParam(args.config) with open(args.config, 'r') as f: hp_str = ''.join(f.readlines()) ## pytoch 모델 저장하는 위치 pt_path = os.path.join('.', hp.log.chkpt_dir) ## 모델 사전에 정의한 모델 이름으로 저장 out_dir = os.path.join(pt_path, individual_model_name) os.makedirs(out_dir, exist_ok=True) log_dir = os.path.join('.', hp.log.log_dir) log_dir = os.path.join(log_dir, individual_model_name) os.makedirs(log_dir, exist_ok=True) if args.checkpoint_path is not None: chkpt_path = args.checkpoint_path else: chkpt_path = None logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler( os.path.join( log_dir, '%s-%d.log' % (args.model, time.time()))), logging.StreamHandler() ]) logger = logging.getLogger() if hp.data.train == '' or hp.data.val == '': logger.error("hp.data.train, hp.data.val cannot be empty") raise Exception("Please specify directories of train data.") if hp.model.graph0 == '' or hp.model.graph1 == '' or hp.model.graph2 == '': logger.error("hp.model.graph0, graph1, graph2 cannot be empty") raise Exception("Please specify random DAG architecture.") # graphs = [ # read_graph(hp.model.graph0), # read_graph(hp.model.graph1), # read_graph(hp.model.graph2), # ] ## 새로 생성한 파일 위치에서 그래프 읽기 #print(os.path.join(f_path, args.model + '_' + str(7) +'.txt')) graphs = [ read_graph(os.path.join(f_path, args.model + '_' + str(idx) + '.txt')) for idx in index_list ] writer = MyWriter(log_dir) dataset = hp.data.type switcher = { 'MNIST': MNIST_dataloader, 'CIFAR10': CIFAR10_dataloader, 'ImageNet': create_dataloader, } assert dataset in switcher.keys(), 'Dataset type currently not supported' dl_func = switcher[dataset] trainset = dl_func(hp, args, True) valset = dl_func(hp, args, False) val_acc = ga_train(out_dir, chkpt_path, trainset, valset, writer, logger, hp, hp_str, graphs) return val_acc