def main(epochs=5, learning_rate=1e-3): # use GPU device = torch.device('cuda') # get data loaders training = get_dataloader(train=True) testing = get_dataloader(train=False) # model model = CNN().to(device) info('Model') print(model) # cost function cost = torch.nn.BCELoss() # optimizers optimizer = optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(1, epochs + 1): info('Epoch {}'.format(epoch)) train(model, device, training, cost, optimizer, epoch) test(model, device, testing, cost) # save model info('Saving Model') save_model(model, device, 'model.onnx') print('Saving PyTorch Model as model.pth') torch.save(model.state_dict(), 'model.pth')
def run(args): if not os.path.exists(args.logdir): os.makedirs(args.logdir) logger = get_logger(os.path.join(args.logdir, 'main.log')) logger.info(args) # data source_transform = transforms.Compose([ # transforms.Grayscale(), transforms.ToTensor()] ) target_transform = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Lambda(lambda x: x.repeat(3, 1, 1)) ]) source_dataset_train = SVHN( './input', 'train', transform=source_transform, download=True) target_dataset_train = MNIST( './input', train=True, transform=target_transform, download=True) target_dataset_test = MNIST( './input', train=False, transform=target_transform, download=True) source_train_loader = DataLoader( source_dataset_train, args.batch_size, shuffle=True, drop_last=True, num_workers=args.n_workers) target_train_loader = DataLoader( target_dataset_train, args.batch_size, shuffle=True, drop_last=True, num_workers=args.n_workers) target_test_loader = DataLoader( target_dataset_test, args.batch_size, shuffle=False, num_workers=args.n_workers) # train source CNN source_cnn = CNN(in_channels=args.in_channels).to(args.device) if os.path.isfile(args.trained): print("load model") c = torch.load(args.trained) source_cnn.load_state_dict(c['model']) logger.info('Loaded `{}`'.format(args.trained)) else: print("not load model") # train target CNN target_cnn = CNN(in_channels=args.in_channels, target=True).to(args.device) target_cnn.load_state_dict(source_cnn.state_dict()) discriminator = Discriminator(args=args).to(args.device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam( target_cnn.encoder.parameters(), lr=args.lr, betas=args.betas, weight_decay=args.weight_decay) d_optimizer = optim.Adam( discriminator.parameters(), lr=args.lr, betas=args.betas, weight_decay=args.weight_decay) train_target_cnn( source_cnn, target_cnn, discriminator, criterion, optimizer, d_optimizer, source_train_loader, target_train_loader, target_test_loader, args=args)
def main(): parser = argparse.ArgumentParser(description="CNN") parser.add_argument("--num_epoch", type=int, default=30) parser.add_argument("--batch_size", type=int, default=64) parser.add_argument("--device", type=str, default="cuda") parser.add_argument("--data_root", type=str, default="./data") parser.add_argument("--data_name", type=str, default="mnist") parser.add_argument("--image_size", type=int, default=32) parser.add_argument("--image_channels", type=int, default=1) opt = parser.parse_args() model = CNN(opt.image_size, opt.image_channels).to(opt.device) for epoch in range(opt.num_epoch): loss = train(model, opt) print("loss: {:.6f}".format(loss)) torch.save(model.state_dict(), "./weights/cnn.pth")
def train(): transforms = Compose([ToTensor()]) train_dataset = CaptchaData('./data/train', transform=transforms) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True) test_data = CaptchaData('./data/test', transform=transforms) test_data_loader = DataLoader(test_data, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True) cnn = CNN() if torch.cuda.is_available(): cnn.cuda() if restor: cnn.load_state_dict(torch.load(model_path)) # freezing_layers = list(cnn.named_parameters())[:10] # for param in freezing_layers: # param[1].requires_grad = False # print('freezing layer:', param[0]) optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr) criterion = nn.MultiLabelSoftMarginLoss() for epoch in range(max_epoch): start_ = time.time() loss_history = [] acc_history = [] cnn.train() for img, target in train_data_loader: img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() acc = calculat_acc(output, target) acc_history.append(acc) loss_history.append(loss) print('train_loss: {:.4}|train_acc: {:.4}'.format( torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) loss_history = [] acc_history = [] cnn.eval() for img, target in test_data_loader: img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) acc = calculat_acc(output, target) acc_history.append(acc) loss_history.append(float(loss)) print('test_loss: {:.4}|test_acc: {:.4}'.format( torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_)) torch.save(cnn.state_dict(), model_path)
-1) if model == 'MLP' else input # 인풋 데이터를 1차원 행렬로 만듬 # [batch size, channel*height*weight] classification_results = model.forward(input) # [batch size, 10] # MLP 클래스의 forward 부함수들 실행. 입력 데이터는 위에서 만든 input l = loss(classification_results, label) # loss 객체(Cross entropy) 에 forward 결과값과 라벨 입력 list_loss.append(l.detach().item()) # loss_list 에 loss 저장. # l.detach(): 로스 계산을 하지 않겠다는 선언. 이미 l = loss 에서 로스를 계산했으니 필요 없음. # item(): tensor 형식으로 돌려줄 값을 float 형식으로 돌려줌 optim.zero_grad( ) # make gradients zero. 각 웨이트가 부여받은 gradient를 0으로 만듬 l.backward( ) # giving weight's gradient to each weights. l 객체에서 구한 loss 값을 가중치들에 부여 optim.step() # Weights adjust themself using given gradient. # 각 객체가 부여받은 loss 값을 선언한 옵티마이저 설정에 맞게 수정하여 가중치에 반영 torch.save(model, '{}.pt'.format(MODEL)) # mlp.pt 라는 이름으로 모델의 가중치&바이어스 파일 저장 # 하지만 가중치 + 모델 구조를 동시에 저장하기에 저장한 모델을 불러오는 폴더에 모델 구조 파일(models.py)가 같이 있어야 함 torch.save(model.state_dict(), 'MNIST_model_{}.pt'.format(MODEL)) # 위처럼 저장할 시 .pt 파일로 모델의 가중치만 dictionary 형식으로 저장됨. # 이 경우 가중치 파일과 models 파일이 같은 폴더 내에 있을 필요가 없음. 따로 저장해도 됨 -> 관리 Good! plt.figure() plt.plot(range(len(list_loss)), list_loss) plt.show()
class Solver(object): def __init__(self, config, data_loader): self.config = config self.data_loader = data_loader def build(self, is_train): if torch.cuda.is_available(): self.model = nn.DataParallel(CNN(self.config)).cuda() else: self.model = CNN(self.config) self.loss_fn = self.config.loss_fn() if is_train: self.model.train() self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr) else: if torch.cuda.is_available(): self.model = self.model.module self.model.eval() def save(self, ckpt_path): """Save model parameters""" print('Save parameters at ', ckpt_path) if torch.cuda.is_available(): torch.save(self.model.module.state_dict(), ckpt_path) else: torch.save(self.model.state_dict(), ckpt_path) def load(self, ckpt_path=None, epoch=None): """Load model parameters""" if not (ckpt_path or epoch): epoch = self.config.epochs if epoch: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl') print('Load parameters from ', ckpt_path) print (self.model) self.model.load_state_dict(torch.load(ckpt_path)) def train_once(self): loss_history = [] for batch_i, batch in enumerate(tqdm(self.data_loader)): text, label = batch.text, batch.label if torch.cuda.is_available(): text = text.cuda() label = label.cuda() text.data.t_() logit = self.model(text) average_batch_loss = self.loss_fn(logit, label) loss_history.append(average_batch_loss.item()) self.optimizer.zero_grad() average_batch_loss.backward() self.optimizer.step() epoch_loss = np.mean(loss_history) return epoch_loss def train(self): """Train model with training data""" for epoch in tqdm(range(self.config.epochs)): loss_history = [] for batch_i, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label if torch.cuda.is_available(): text = text.cuda() label = label.cuda() # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append(average_batch_loss.item()) # Variable -> Tensor # Flush out remaining gradient self.optimizer.zero_grad() # Backpropagation average_batch_loss.backward() # Gradient descent self.optimizer.step() # Log intermediate loss if (epoch + 1) % self.config.log_every_epoch == 0: epoch_loss = np.mean(loss_history) log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.4f}\n' print(log_str) # Save model parameters if (epoch + 1) % self.config.save_every_epoch == 0: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl') self.save(ckpt_path) def eval(self): """Evaluate model from text data""" n_total_data = 0 n_correct = 0 loss_history = [] ''' import ipdb ipdb.set_trace() ''' for _, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label if torch.cuda.is_available(): text = text.cuda() label = label.cuda() # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append(average_batch_loss.item()) # Variable -> Tensor # Calculate accuracy n_total_data += len(label) # [batch_size] _, prediction = logit.max(1) n_correct += (prediction == label).sum().data epoch_loss = np.mean(loss_history) accuracy = n_correct.item() / float(n_total_data) print(f'Loss: {epoch_loss:.2f}') print(f'Accuracy: {accuracy}') return epoch_loss, accuracy def inference(self, text): text = Variable(torch.LongTensor([text])) # [batch_size, 2] logit = self.model(text) _, prediction = torch.max(logit) return prediction def train_eval(self): # Set this variable to your MLflow server's DNS name mlflow_server = '172.23.147.124' # Tracking URI mlflow_tracking_URI = 'http://' + mlflow_server + ':5000' print ("MLflow Tracking URI: %s" % (mlflow_tracking_URI)) with mlflow.start_run(): for key, value in vars(self.config).items(): mlflow.log_param(key, value) ''' output_dir = 'mlflow_logs' if not os.path.exists(output_dir): os.mkdir(output_dir) ''' for epoch in tqdm(range(self.config.epochs)): # print out active_run print("Active Run ID: %s, Epoch: %s \n" % (mlflow.active_run(), epoch)) train_loss = self.train_once() mlflow.log_metric('train_loss', train_loss) val_loss, val_acc = self.eval() mlflow.log_metric('val_loss', val_loss) mlflow.log_metric('val_acc', val_acc) # Finish run mlflow.end_run(status='FINISHED')
class Solver(object): def __init__(self, config, data_loader): self.config = config self.data_loader = data_loader def build(self, is_train): self.model = CNN(self.config) self.loss_fn = self.config.loss_fn() if is_train: self.model.train() self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr) else: self.model.eval() def train(self): for epoch in tqdm(range(self.config.epochs)): loss_history = [] for batch_i, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append( average_batch_loss.data[0]) # Variable -> Tensor # Flush out remaining gradient self.optimizer.zero_grad() # Backpropagation average_batch_loss.backward() # Gradient descent self.optimizer.step() # Log intermediate loss if (epoch + 1) % self.config.log_every_epoch == 0: epoch_loss = np.mean(loss_history) log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.2f}\n' print(log_str) # Save model parameters if (epoch + 1) % self.config.save_every_epoch == 0: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl') print('Save parameters at ', ckpt_path) torch.save(self.model.state_dict(), ckpt_path) def eval(self, epoch=None): # Load model parameters if not isinstance(epoch, int): epoch = self.config.epochs ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl') print('Load parameters from ', ckpt_path) self.model.load_state_dict(torch.load(ckpt_path)) loss_history = [] for _, batch in tqdm(enumerate(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append( average_batch_loss.data[0]) # Variable -> Tensor epoch_loss = np.mean(loss_history) print('Loss: {epoch_loss:.2f}')
def train(): transforms = Compose([Resize((height, width)), ToTensor()]) train_dataset = CaptchaData(train_data_path, num_class=len(alphabet), num_char=int(numchar), transform=transforms, alphabet=alphabet) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, drop_last=True) test_data = CaptchaData(test_data_path, num_class=len(alphabet), num_char=int(numchar), transform=transforms, alphabet=alphabet) test_data_loader = DataLoader(test_data, batch_size=batch_size, num_workers=num_workers, shuffle=True, drop_last=True) cnn = CNN(num_class=len(alphabet), num_char=int(numchar), width=width, height=height) if use_gpu: cnn.cuda() optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr) criterion = nn.MultiLabelSoftMarginLoss() for epoch in range(max_epoch): start_ = time.time() loss_history = [] acc_history = [] cnn.train() for img, target in train_data_loader: img = Variable(img) target = Variable(target) if use_gpu: img = img.cuda() target = target.cuda() output = cnn(img) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() acc = calculat_acc(output, target) acc_history.append(float(acc)) loss_history.append(float(loss)) print('epoch:{},train_loss: {:.4}|train_acc: {:.4}'.format( epoch, torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) loss_history = [] acc_history = [] cnn.eval() for img, target in test_data_loader: img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) acc = calculat_acc(output, target) acc_history.append(float(acc)) loss_history.append(float(loss)) print('test_loss: {:.4}|test_acc: {:.4}'.format( torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_)) torch.save(cnn.state_dict(), os.path.join(model_path, "model_{}.path".format(epoch)))
def run(args): args.logdir = args.logdir + args.mode args.trained = args.trained + args.mode + '/best_model.pt' if not os.path.exists(args.logdir): os.makedirs(args.logdir) logger = get_logger(os.path.join(args.logdir, 'main.log')) logger.info(args) # data # source_transform = transforms.Compose([ # # transforms.Grayscale(), # transforms.ToTensor()] # ) # target_transform = transforms.Compose([ # transforms.Resize(32), # transforms.ToTensor(), # transforms.Lambda(lambda x: x.repeat(3, 1, 1)) # ]) # source_dataset_train = SVHN( # './input', 'train', transform=source_transform, download=True) # target_dataset_train = MNIST( # './input', train=True, transform=target_transform, download=True) # target_dataset_test = MNIST( # './input', train=False, transform=target_transform, download=True) # source_train_loader = DataLoader( # source_dataset_train, args.batch_size, shuffle=True, # drop_last=True, # num_workers=args.n_workers) # target_train_loader = DataLoader( # target_dataset_train, args.batch_size, shuffle=True, # drop_last=True, # num_workers=args.n_workers) # target_test_loader = DataLoader( # target_dataset_test, args.batch_size, shuffle=False, # num_workers=args.n_workers) batch_size = 128 if args.mode == 'm2mm': source_dataset_name = 'MNIST' target_dataset_name = 'mnist_m' source_image_root = os.path.join('dataset', source_dataset_name) target_image_root = os.path.join('dataset', target_dataset_name) image_size = 28 img_transform_source = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize(mean=(0.1307, ), std=(0.3081, )) ]) img_transform_target = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) dataset_source = datasets.MNIST(root='dataset', train=True, transform=img_transform_source, download=True) train_list = os.path.join(target_image_root, 'mnist_m_train_labels.txt') dataset_target_train = GetLoader(data_root=os.path.join( target_image_root, 'mnist_m_train'), data_list=train_list, transform=img_transform_target) test_list = os.path.join(target_image_root, 'mnist_m_test_labels.txt') dataset_target_test = GetLoader(data_root=os.path.join( target_image_root, 'mnist_m_test'), data_list=test_list, transform=img_transform_target) elif args.mode == 's2u': dataset_source = svhn.SVHN('./data/svhn/', split='train', download=True, transform=transforms.Compose([ transforms.Resize(28), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) dataset_target_train = usps.USPS('./data/usps/', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])) dataset_target_test = usps.USPS('./data/usps/', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])) source_dataset_name = 'svhn' target_dataset_name = 'usps' source_train_loader = torch.utils.data.DataLoader(dataset=dataset_source, batch_size=batch_size, shuffle=True, num_workers=8) target_train_loader = torch.utils.data.DataLoader( dataset=dataset_target_train, batch_size=batch_size, shuffle=True, num_workers=8) target_test_loader = torch.utils.data.DataLoader( dataset=dataset_target_test, batch_size=batch_size, shuffle=False, num_workers=8) # train source CNN source_cnn = CNN(in_channels=args.in_channels).to(args.device) if os.path.isfile(args.trained): print("load model") c = torch.load(args.trained) source_cnn.load_state_dict(c['model']) logger.info('Loaded `{}`'.format(args.trained)) else: print("not load model") # train target CNN target_cnn = CNN(in_channels=args.in_channels, target=True).to(args.device) target_cnn.load_state_dict(source_cnn.state_dict()) discriminator = Discriminator(args=args).to(args.device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(target_cnn.encoder.parameters(), lr=args.lr) # optimizer = optim.Adam( # target_cnn.encoder.parameters(), # lr=args.lr, betas=args.betas, weight_decay=args.weight_decay) d_optimizer = optim.Adam(discriminator.parameters(), lr=args.lr) # d_optimizer = optim.Adam( # discriminator.parameters(), # lr=args.lr, betas=args.betas, weight_decay=args.weight_decay) train_target_cnn(source_cnn, target_cnn, discriminator, criterion, optimizer, d_optimizer, source_train_loader, target_train_loader, target_test_loader, args=args)
class Solver(object): def __init__(self, config, data_loader): self.config = config self.data_loader = data_loader def build(self, is_train): self.model = CNN(self.config) self.loss_fn = self.config.loss_fn() if is_train: self.model.train() self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr) else: self.model.eval() def save(self, ckpt_path): """Save model parameters""" print('Save parameters at ', ckpt_path) torch.save(self.model.state_dict(), ckpt_path) def load(self, ckpt_path=None, epoch=None): """Load model parameters""" if not (ckpt_path or epoch): epoch = self.config.epochs if epoch: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl') print('Load parameters from ', ckpt_path) self.model.load_state_dict(torch.load(ckpt_path)) def train(self): """Train model with training data""" for epoch in tqdm(range(self.config.epochs)): loss_history = [] for batch_i, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append(average_batch_loss.data[0]) # Variable -> Tensor # Flush out remaining gradient self.optimizer.zero_grad() # Backpropagation average_batch_loss.backward() # Gradient descent self.optimizer.step() # Log intermediate loss if (epoch + 1) % self.config.log_every_epoch == 0: epoch_loss = np.mean(loss_history) log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.2f}\n' print(log_str) # Save model parameters if (epoch + 1) % self.config.save_every_epoch == 0: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl') self.save(ckpt_path) def eval(self): """Evaluate model from text data""" n_total_data = 0 n_correct = 0 loss_history = [] import ipdb ipdb.set_trace() for _, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append(average_batch_loss.data[0]) # Variable -> Tensor # Calculate accuracy n_total_data += len(label) # [batch_size] _, prediction = logit.max(1) n_correct += (prediction == label).sum().data epoch_loss = np.mean(loss_history) accuracy = n_correct / n_total_data print(f'Loss: {epoch_loss:.2f}') print(f'Accuracy: {accuracy}') def inference(self, text): text = Variable(torch.LongTensor([text])) # [batch_size, 2] logit = self.model(text) _, prediction = torch.max(logit) return prediction
class Trainer: """ 训练 """ def __init__(self, _hparams): utils.set_seed(_hparams.fixed_seed) self.train_loader = get_train_loader(_hparams) self.val_loader = get_val_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.loss_fn = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr) self.writer = SummaryWriter() self.max_sen_len = _hparams.max_sen_len self.val_cap = _hparams.val_cap self.ft_encoder_lr = _hparams.ft_encoder_lr self.ft_decoder_lr = _hparams.ft_decoder_lr self.best_CIDEr = 0 def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path, val_path): print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs', '*' * 20) self.encoder.fine_tune() self.optimizer = torch.optim.Adam([ { 'params': self.encoder.parameters(), 'lr': self.ft_encoder_lr }, { 'params': self.decoder.parameters(), 'lr': self.ft_decoder_lr }, ]) self.training(fine_tune_epochs, val_interval, save_path, val_path) self.encoder.froze() print('*' * 20, 'fine tune encoder complete', '*' * 20) def get_params(self): """ 模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数 :return: """ return list(self.decoder.parameters()) def training(self, max_epochs, val_interval, save_path, val_path): """ 训练 :param val_path: 保存验证过程生成句子的路径 :param save_path: 保存模型的地址 :param val_interval: 验证的间隔 :param max_epochs: 最大训练的轮次 :return: """ print('*' * 20, 'train', '*' * 20) for epoch in range(max_epochs): self.set_train() epoch_loss = 0 epoch_steps = len(self.train_loader) for step, (img, cap, cap_len) in tqdm(enumerate(self.train_loader)): # batch_size * 3 * 224 * 224 img = img.to(DEVICE) cap = cap.to(DEVICE) self.optimizer.zero_grad() features = self.encoder.forward(img) outputs = self.decoder.forward(features, cap) outputs = pack_padded_sequence(outputs, cap_len - 1, batch_first=True)[0] targets = pack_padded_sequence(cap[:, 1:], cap_len - 1, batch_first=True)[0] train_loss = self.loss_fn(outputs, targets) epoch_loss += train_loss.item() train_loss.backward() self.optimizer.step() epoch_loss /= epoch_steps self.writer.add_scalar('epoch_loss', epoch_loss, epoch) print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch)) if (epoch + 1) % val_interval == 0: CIDEr = self.validating(epoch, val_path) if self.best_CIDEr <= CIDEr: self.best_CIDEr = CIDEr self.save_model(save_path, epoch) def save_model(self, save_path, train_epoch): """ 保存最好的模型 :param save_path: 保存模型文件的地址 :param train_epoch: 当前训练的轮次 :return: """ model_state_dict = { 'encoder_state_dict': self.encoder.state_dict(), 'decoder_state_dict': self.decoder.state_dict(), 'tran_epoch': train_epoch, } print('*' * 20, 'save model to: ', save_path, '*' * 20) torch.save(model_state_dict, save_path) def validating(self, train_epoch, val_path): """ 验证 :param val_path: 保存验证过程生成句子的路径 :param train_epoch: 当前训练的epoch :return: """ print('*' * 20, 'validate', '*' * 20) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(val_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.val_cap, val_path) scores = {} for metric, score in result: scores[metric] = score self.writer.add_scalar(metric, score, train_epoch) return scores['CIDEr'] def set_train(self): self.encoder.train() self.decoder.train() def set_eval(self): self.encoder.eval() self.decoder.eval()
predictions = np.vstack(predictions) true_labels = np.hstack(true_labels).ravel().reshape((-1, 1)) full_test_cm = get_cm(torch.Tensor(true_labels), torch.Tensor(predictions)) msg = 'Finished Training. Test Accuracy : {:.2f} Mean Loss : {:.2f}'.format( (full_test_cm.diag().sum() / full_test_cm.sum()).item(), full_test_loss) print(msg) logging.info(msg) #save model if True: torch.save( { 'epoch': epoch, 'model_state_dict': network.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, }, r"src/saved_models/model2.pkl") #load example checkpoint = torch.load(r"src/saved_models/model2.pkl") network.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] #analyze results activity_labels = pd.read_csv("input/LabelMap.csv", index_col=0) test_df = pd.DataFrame(full_test_cm.long().numpy(), columns=activity_labels.Activity,
def training_run_cnn(combination, criterion, train_loader, valid_loader, run): n_featuremap_1, n_featuremap_2, mode = combination model_path = "CNN_run_{}.pt".format(run) results[model_path] = dict() # initialize the network with the given configuration my_net = CNN(n_featuremap_1=n_featuremap_1, n_featuremap_2=n_featuremap_2) # initialize weights with the given mode my_net.apply(partial(init_weights, mode=mode)) my_net.to(device) optimizer = torch.optim.Adam(my_net.parameters()) for epoch in range(10): # loop over the training dataset multiple times training_loss = .0 pbar = tqdm(10) for batch_idx, (x, target) in enumerate(train_loader): x, target = x.to(device), target.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = my_net(x).view(-1, 1) loss = criterion(outputs, target.view(-1, 1)) loss.backward() optimizer.step() if epoch == 9: # update training loss in the last epoch training_loss += loss.item() * len(x) if batch_idx % 100 == 99: # print every 100 mini-batches print("[ Epoch %d,Batch %2d] loss: %.3f" % (epoch + 1, batch_idx + 1, loss.item())) pbar.update(1) # update results results[model_path]["training_loss"] = training_loss / len(train) print("Finished Training !") print("Start Evaluating !") # Validation loss valid_loss = .0 correct = 0 thres = 0.5 with torch.no_grad(): for batch_idx, (x, target) in enumerate(valid_loader): x, target = x.to(device), target.to(device) outputs = my_net(x).view(-1, 1) prediction = outputs >= thres correct += prediction.eq(target.view(-1, 1)).sum().item() loss = criterion(outputs, target.view(-1, 1)) valid_loss += loss.item() * len(x) # update results results[model_path]["validation_loss"] = valid_loss / len(valid) results[model_path]["accuracy"] = correct / len(valid) # save model in disk torch.save(my_net.state_dict(), "./models/" + model_path)
loss = L(outputs, labels) # Calculate error loss.backward() # Compute gradients tloss.append(loss.data[0]) optimizer.step() # Update weights # output training loss print("Epoch [{}/{}], Training Loss: {}".format( epoch + 1, args.epochs, np.mean(tloss))) # compute validation loss if (epoch % 3 == 0): val_accuracy(test_loader, model, args.gpu) vl = val_loss(test_loader, model, L, args.gpu) # elapsed time time_elapsed = time.time() - start print("Time for this epoch: {0:.2f} seconds".format(time_elapsed)) # save the model if it has a better loss on the validation set if (vl < best): t.save(model.state_dict(), fname) best = vl print("Finished training.") # Change to evaluation model.eval() # Compute accuracy print("Final accuracy:") val_accuracy(test_loader, model, args.gpu) val_loss(test_loader, model, L, args.gpu)