def train(load_model: str, save_model: str, train_dataset: str, test_dataset: str, no_train: bool, no_test: bool, epochs: int, batch_size: int, learning_rate: float): device = torch.device('cuda:0' if cuda.is_available() else 'cpu') click.secho('Using device={}'.format(device), fg='blue') net = Net() net.to(device) if load_model is not None: click.secho('Loading model from \'{}\''.format(load_model), fg='yellow') net.load_state_dict(torch.load(load_model, map_location=device)) if not no_train: click.echo('Training model using {}'.format(train_dataset)) net.train() train_net(net, data_path=train_dataset, batch_size=batch_size, num_epochs=epochs, learning_rate=learning_rate) if not no_train and save_model is not None: click.secho('Saving model as \'{}\''.format(save_model), fg='yellow') torch.save(net.state_dict(), save_model) if not no_test: click.echo('Testing model using {}'.format(test_dataset)) net.eval() accuracy = test_net(net, data_path=test_dataset, batch_size=batch_size) color = 'green' if accuracy > 97. else 'red' click.secho('Accuracy={}'.format(accuracy), fg=color)
def main(): test_path = Path.cwd() / 'data_in' / 'test.txt' vocab_path = Path.cwd() / 'data_in' / 'vocab.pkl' with open(vocab_path, mode='rb') as io: vocab = pickle.load(io) tokenizer = MeCab() padder = PadSequence(length=70, pad_val=vocab.token_to_idx['<pad>']) test_ds = Corpus(test_path, vocab, tokenizer, padder) test_dl = DataLoader(test_ds, batch_size=1024) model = Net(vocab_len=len(vocab)) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) for epoch in range(1): model.train() index = 0 acc = 0 for label, sen1, sen2 in tqdm(test_dl, disable=True): optimizer.zero_grad() pre_label = model(sen1, sen2) loss = loss_fn(pre_label, label) loss.backward() optimizer.step() pred_cls = pre_label.data.max(1)[1] acc += pred_cls.eq(label.data).cpu().sum() print("epoch: {}, index: {}, loss: {}".format((epoch + 1), index, loss.item())) index += len(label) print('Accuracy : %d %%' % (100 * acc / index))
def train(train_data, val_data, fold_idx=None): train_data = MyDataset(train_data, train_transform) train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True) val_data = MyDataset(val_data, val_transform) val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False) model = Net(model_name).to(device) # criterion = nn.CrossEntropyLoss() criterion = FocalLoss(0.5) # optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) optimizer = Ranger(model.parameters(), lr=1e-3, weight_decay=0.0005) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=4) if fold_idx is None: print('start') model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name)) else: print('start fold: {}'.format(fold_idx + 1)) model_save_path = os.path.join(config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx)) # if os.path.isfile(model_save_path): # print('加载之前的训练模型') # model.load_state_dict(torch.load(model_save_path)) best_val_score = 0 best_val_score_cnt = 0 last_improved_epoch = 0 adjust_lr_num = 0 for cur_epoch in range(config.epochs_num): start_time = int(time.time()) model.train() print('epoch:{}, step:{}'.format(cur_epoch + 1, len(train_loader))) cur_step = 0 for batch_x, batch_y in train_loader: batch_x, batch_y = batch_x.to(device), batch_y.to(device) optimizer.zero_grad() probs = model(batch_x) train_loss = criterion(probs, batch_y) train_loss.backward() optimizer.step() cur_step += 1 if cur_step % config.train_print_step == 0: train_acc = accuracy(probs, batch_y) msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}' print(msg.format(cur_step, len(train_loader), train_loss.item(), train_acc[0].item())) val_loss, val_score = evaluate(model, val_loader, criterion) if val_score >= best_val_score: if val_score == best_val_score: best_val_score_cnt += 1 best_val_score = val_score torch.save(model.state_dict(), model_save_path) improved_str = '*' last_improved_epoch = cur_epoch else: improved_str = '' msg = 'the current epoch: {0}/{1}, val loss: {2:>5.2}, val acc: {3:>6.2%}, cost: {4}s {5}' end_time = int(time.time()) print(msg.format(cur_epoch + 1, config.epochs_num, val_loss, val_score, end_time - start_time, improved_str)) if cur_epoch - last_improved_epoch >= config.patience_epoch or best_val_score_cnt >= 3: if adjust_lr_num >= config.adjust_lr_num: print("No optimization for a long time, auto stopping...") break print("No optimization for a long time, adjust lr...") # scheduler.step() last_improved_epoch = cur_epoch # 加上,不然会连续更新的 adjust_lr_num += 1 best_val_score_cnt = 0 scheduler.step() del model gc.collect() if fold_idx is not None: model_score[fold_idx] = best_val_score
def train(train_data, val_data, fold_idx=None): train_data = MyDataset(train_data, train_transform) train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True) val_data = MyDataset(val_data, val_transform) val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False) model = Net(model_name).to(device) criterion = nn.CrossEntropyLoss() # criterion = FocalLoss(0.5) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # optimizer = torch.optim.Adagrad(model.parameters(), lr=1e-3) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # config.model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name)) best_val_acc = 0 last_improved_epoch = 0 if fold_idx is None: print('start') model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name)) else: print('start fold: {}'.format(fold_idx + 1)) model_save_path = os.path.join(config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx)) for cur_epoch in range(config.epochs_num): start_time = int(time.time()) model.train() print('epoch: ', cur_epoch + 1) cur_step = 0 for batch_x, batch_y in train_loader: batch_x, batch_y = batch_x.to(device), batch_y.to(device) optimizer.zero_grad() probs = model(batch_x) train_loss = criterion(probs, batch_y) train_loss.backward() optimizer.step() cur_step += 1 if cur_step % config.train_print_step == 0: train_acc = accuracy(probs, batch_y) msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}' print(msg.format(cur_step, len(train_loader), train_loss.item(), train_acc[0].item())) val_loss, val_acc = evaluate(model, val_loader, criterion) if val_acc >= best_val_acc: best_val_acc = val_acc torch.save(model.state_dict(), model_save_path) improved_str = '*' last_improved_epoch = cur_epoch else: improved_str = '' # msg = 'the current epoch: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}, ' \ # 'val loss: {4:>5.2}, val acc: {5:>6.2%}, {6}' msg = 'the current epoch: {0}/{1}, val loss: {2:>5.2}, val acc: {3:>6.2%}, cost: {4}s {5}' end_time = int(time.time()) print(msg.format(cur_epoch + 1, config.epochs_num, val_loss, val_acc, end_time - start_time, improved_str)) scheduler.step() if cur_epoch - last_improved_epoch > config.patience_epoch: print("No optimization for a long time, auto-stopping...") break del model gc.collect()
for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v) } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('-- Loading weights finished.') # 2.多GPU并行 if torch.cuda.is_available(): model = torch.nn.DataParallel(model) cudnn.benchmark = True model = model.cuda() # 3.创建计算loss的类 criterion = MultiBoxLoss() # 4.创建优化器 optimizer = optim.Adam(model.parameters(), lr=args.lr) model.train() # 5.读取数据开始训练Epoch轮 for epoch in range(args.Epoch): # 5.1每轮使用不同学习率 if epoch % 10 == 0: adjust_learning_rate(optimizer, args.lr, 0.95, epoch) # 5.2创建数据加载器 train_data = MyDataSet(args.annotation_path, Config['input_size'], transform=my_transform, loader=default_loader) # 因为每张图像上的目标个数不确定,所以batch_size只能为1。DataLoader自动把np.array转换成tensor data_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=1, shuffle=False) # 5.3分批训练
# My Libraries from model.net import Net from src.dataset import FaceDataset from src.loadConfig import loadConfig if __name__ == "__main__": train_dir = r"F:\DataSets\toy" model_dir = './checkpoints/model.dat' dst_dir = './checkpoints/model_transfered.dat' fdata = FaceDataset(dir=train_dir) config = loadConfig('config.json') NN = Net(config=config) NN.load(model_dir) NN.train(fdata) NN.save(dst_dir)