def train_dataloader(self): return DataLoader(dataset=MyDataset(self.data_path, split='TRAIN', input_type=self.input_type, input_length=self.input_length, w2v_type=self.w2v_type, is_balanced=self.is_balanced, is_subset=self.is_subset), batch_size=self.batch_size, shuffle=True, drop_last=False, num_workers=self.num_workers)
def test_dataloader(self): return DataLoader(dataset=MyDataset(self.data_path, split='TEST', input_type=self.input_type, input_length=self.input_length, num_chunk=self.num_chunk, w2v_type=self.w2v_type, is_balanced=self.is_balanced, is_subset=self.is_subset), batch_size=self.batch_size // self.num_chunk, shuffle=False, drop_last=False, num_workers=self.num_workers)
args = configparser.ConfigParser() args.read('argsConfig.ini') log_dir = args.get('Test', 'model_log_dir') writer = SummaryWriter(log_dir) log_file = log_dir + 'log.txt' with open(log_file, 'a') as f: f.write('=' * 50) f.write('Testing') f.write('=' * 50) # load testing data print("\nLoading testing data...") texts, labels, number_of_classes, sample_weights = load_data(args, 'test') test_dataset = MyDataset(texts, labels, args) print("Transferring testing data to iterator...") testing_params = {"batch_size": args.getint('Train', 'batch_size'), "shuffle": False, "num_workers": args.getint('Train', 'workers'), "drop_last": True} test_generator = DataLoader(test_dataset, **testing_params) print('\nNumber of testing samples: '+str(test_dataset.__len__())) with open(log_file, 'a') as f: f.write('\nNumber of testing samples: '+str(test_dataset.__len__())+'\n') model = CharacterLevelCNN(number_of_classes, args) print("=> loading weights from '{}'".format(args.get('Test', 'model_to_test'))) #assert os.path.isfile(args.get('Test', 'model_to_test')), "=> no checkpoint found at '{}'".format(args.get('Test', 'model_to_test'))
import torchvision.transforms as transforms from data_loader import MyDataset # hyper-parameters batch_size = 1 USE_CUDA = torch.cuda.is_available() root_dir = './data/test/' fn_list = glob.glob(root_dir + '*.csv') fn_list.sort() ids = [os.path.basename(fn).split('.')[0] for fn in fn_list] my_dataset = MyDataset(root_dir=root_dir, ids=ids) # sample = my_dataset[0] # print(sample['pts_xyz'], sample['pts_label'], sample['pts_bbox']) # test loader test_loader = torch.utils.data.DataLoader(my_dataset, batch_size=batch_size, shuffle=False) # load trained model model = torch.load('model/model_127.pkl', map_location='cpu').eval() if USE_CUDA: model = model.cuda() for i, data in enumerate(test_loader, 0):
def predict(dim, names, weight, batch_size, pretrain_model_path, model_types=None): print('-' * 100) print('multi-models begin predicting ...') print('-' * 100) # read test data test_file = '/kaggle/input/quora-question-pairs/test.csv.zip' # data test_df = pd.read_csv(test_file) test_ids = test_df['test_id'].values.tolist() result_prob_tmp = torch.zeros((len(test_ids), 2)) # load model for i, name in enumerate(names): # 3.17 add weight_ = weight[i] #model_path = '../model/' + name + '.pkl' output_model_file = os.path.join('output', name + '.pkl') state = torch.load(output_model_file) # 3.10 add model_type = model_types[i] if model_type == 'mlp': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) elif model_type == 'cnn': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyTextCNNModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) elif model_type == 'rcnn': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyRCNNModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) model.to(device) model.load_state_dict(state['model_state']) model.eval() print('-' * 20, 'model', i, '-' * 20) print('load model:%s, loss:%.4f, e:%d, lr:%.7f, time:%d' % (name, state['loss'], state['e'], state['lr'], state['time'])) # predict with torch.no_grad(): j = 0 for batch in tqdm(test_iter): batch = [b.cuda() for b in batch] out = model(batch, task='eval') out = out.cpu() # gpu -> cpu if j == 0: tmp = out # 初始化 tmp else: tmp = torch.cat([tmp, out], dim=0) # 将之后的预测结果拼接到 tmp 中 j += 1 # 当前 模型预测完成 print('model', i, 'predict finished!\n') # 3.17 按权重融合 result_prob_tmp += (weight_ / len(names)) * tmp # 删除模型 del model gc.collect() time.sleep(1) # 3.10 当前融合策略:prob 简单的取 avg _, result = torch.max(result_prob_tmp, dim=-1) result = result.numpy() # 3.16 update: label 0的prob 大于 3,就认为是 label=0 # with open('tmp.txt', 'w', encoding='utf-8') as f: # for r in result_prob_tmp: # f.write(str(r) + '\n') # save result df = pd.DataFrame() df['test_id'] = test_ids df['is_duplicate'] = result df.to_csv("submission.csv", encoding='utf-8', index=False)
for images, labels in test_loader: images, labels = images.to(device), labels.to(device) out = net(images) _, preds = torch.max(out.data, 1) correct = preds.eq(labels.data).sum().item() acc += correct n += labels.size(0) return acc / n # train_data = torchvision.datasets.ImageFolder(root='/data/datasets/mnist/train', # transform=transforms.Compose( # [transforms.Resize(227), transforms.ToTensor()])) # 训练时间更短,不知道为何 train_data = MyDataset(txt='/data1/zj/data/mnist/train.txt', data_shape=(227, 227), channel=3, transform=transforms.ToTensor()) train_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, num_workers=3) test_data = MyDataset(txt='/data1/zj/data/mnist/test.txt', data_shape=(227, 227), channel=3, transform=transforms.ToTensor()) test_loader = Data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True, num_workers=3)
def train(): # Model efficient_transformer = Linformer( dim=128, seq_len=300 + 1, # 7x7 patches + 1 cls-token depth=12, heads=8, k=64) my_model = ViT( dim=128, image_size=320, patch_size=16, num_classes=25, transformer=efficient_transformer, channels=3, ).to(device) if os.path.exists('transformer/my_model.pt'): my_model.load_state_dict(torch.load('transformer/my_model.pt')) print('Load my_model.pt') batch_size = 32 num_epoch = 100 num_classes = 25 learning_rate = 8e-4 train_set = MyDataset(is_train=True, num_cat=num_classes) validation_set = MyDataset(is_train=False, num_cat=num_classes) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True) validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=32, shuffle=True, pin_memory=True) optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate) loss_func = torch.nn.CrossEntropyLoss() scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=5, threshold=2e-1, verbose=True, min_lr=1e-5) bestTestAccuracy = 0 print('Start training') train_size = len(train_loader.dataset) test_size = len(validation_loader.dataset) for epoch in range(num_epoch): total = 0 correct = 0 my_model.train() for i, data in enumerate(train_loader, 0): labels = data['label'].to(device) img = data['img'].to(device).float() prediction = my_model(img) loss = loss_func(prediction, labels) optimizer.zero_grad() loss.backward() optimizer.step() _, predicted = torch.max(prediction, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print( f'Train | Epoch {epoch}/{num_epoch}, Batch {i}/{int(train_size/batch_size)} ' f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}' f' Acc: {(100 * correct / total):.3f}') total = 0 correct = 0 my_model.eval() for i, data in enumerate(validation_loader, 0): labels = data['label'].to(device) img = data['img'].to(device).float() prediction = my_model(img) _, predicted = torch.max(prediction, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print( f'Test | Epoch {epoch}/{num_epoch}, Batch {i}/{int(test_size/batch_size)} ' f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}' f' Acc: {(100 * correct / total):.3f} Best-so-far: {100*bestTestAccuracy:.5f}' ) if (correct / total) > bestTestAccuracy: bestTestAccuracy = correct / total print(f'Update best test: {100*bestTestAccuracy:.5f}') torch.save( my_model.state_dict(), f"transformer/my_model_{str(round(100*bestTestAccuracy,2)).replace('.', '_')}.pt" ) scheduler.step(bestTestAccuracy)
def train(): my_model = Resnet(kernel_size=3, filters=64, inChannels=3, input_shape=(3, 240, 320), conv_nonlinearity='relu', num_class=25) my_model = my_model.to(device) if os.path.exists('my_model.pt'): my_model.load_state_dict(torch.load('my_model.pt')) print('Load my_model.pt') batch_size = 32 num_epoch = 100 num_classes = 25 learning_rate = 8e-4 train_set = MyDataset(is_train=True, num_cat=num_classes) validation_set = MyDataset(is_train=False, num_cat=num_classes) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True) validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=32, shuffle=True, pin_memory=True) optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate) loss_func = torch.nn.NLLLoss() scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=10, threshold=2e-1, verbose=True, min_lr=1e-5) bestTestAccuracy = 0 print('Start training') train_size = len(train_loader.dataset) test_size = len(validation_loader.dataset) for epoch in range(num_epoch): total = 0 correct = 0 my_model.train() for i, data in enumerate(train_loader, 0): labels = data['label'].to(device) img = data['img'].to(device).float() prediction = my_model(img) loss = loss_func(prediction, labels) optimizer.zero_grad() loss.backward() optimizer.step() _, predicted = torch.max(prediction, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print( f'Train | Epoch {epoch}/{num_epoch}, Batch {i}/{int(train_size/batch_size)} ' f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}' f' Acc: {(100 * correct / total):.3f}') total = 0 correct = 0 my_model.eval() for i, data in enumerate(validation_loader, 0): labels = data['label'].to(device) img = data['img'].to(device).float() prediction = my_model(img) _, predicted = torch.max(prediction, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print( f'Test | Epoch {epoch}/{num_epoch}, Batch {i}/{int(test_size/batch_size)} ' f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}' f' Acc: {(100 * correct / total):.3f} Best-so-far: {100*bestTestAccuracy:.5f}' ) if (correct / total) > bestTestAccuracy: bestTestAccuracy = correct / total print(f'Update best test: {100*bestTestAccuracy:.5f}') torch.save( my_model.state_dict(), f"my_model_{str(round(100*bestTestAccuracy,2)).replace('.', '_')}.pt" ) scheduler.step(bestTestAccuracy)
def train( batch_size=16, pretrain_model_path='', name='', model_type='mlp', after_bert_choice='last_cls', dim=1024, lr=1e-5, epoch=12, smoothing=0.05, sample=False, #open_ad='', dialog_name='xxx'): if not pretrain_model_path or not name: assert 1 == -1 print('\n********** model type:', model_type, '**********') print('batch_size:', batch_size) # load dataset train_file = '/kaggle/input/dataset/my_train.csv' dev_file = '/kaggle/input/dataset/my_dev.csv' train_num = len(pd.read_csv(train_file).values.tolist()) val_num = len(pd.read_csv(dev_file).values.tolist()) print('train_num: %d, dev_num: %d' % (train_num, val_num)) # 选择模型 if model_type in ['siam', 'esim', 'sbert']: assert 1 == -1 else: train_iter = MyDataset(file=train_file, is_train=True, sample=sample, pretrain_model_path=pretrain_model_path) train_iter = get_dataloader(train_iter, batch_size, shuffle=True, drop_last=True) dev_iter = MyDataset(file=dev_file, is_train=True, sample=sample, pretrain_model_path=pretrain_model_path) dev_iter = get_dataloader(dev_iter, batch_size, shuffle=False, drop_last=False) if model_type == 'mlp': model = MyModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing, after_bert_choice='last_cls') elif model_type == 'cnn': model = MyTextCNNModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing) elif model_type == 'rcnn': model = MyRCNNModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing) #模型加载到gpu model.to(device) model_param_num = 0 ##### 3.24 muppti-gpu-training if n_gpu > 1: model = torch.nn.DataParallel(model) for p in model.parameters(): if p.requires_grad: model_param_num += p.nelement() print('param_num:%d\n' % model_param_num) # 加入对抗训练,提升泛化能力;但是训练速度明显变慢 (插件式调用) # 3.12 change to FGM 更快! """ if open_ad == 'fgm': fgm = FGM(model) elif open_ad == 'pgd': pgd = PGD(model) K = 3 """ # model-store-path #model_path = '/kaggle/output/' + name + '.pkl' # 输出模型默认存放在当前路径下 output_dir = 'output' state = {} time0 = time.time() best_loss = 999 early_stop = 0 for e in range(epoch): print("*" * 100) print("Epoch:", e) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=lr, warmup=0.05, t_total=len(train_iter)) # 设置优化器 train_loss = 0 train_c = 0 train_right_num = 0 model.train() # 将模型设置成训练模式(Sets the module in training mode) print('training..., %s, e:%d, lr:%7f' % (name, e, lr)) for batch in tqdm(train_iter): # 每一次返回 batch_size 条数据 optimizer.zero_grad() # 清空梯度 batch = [b.to(device) for b in batch] # cpu -> GPU # 正常训练 labels = batch[-1].view(-1).cpu().numpy() loss, bert_enc = model(batch, task='train', epoch=epoch) # 进行前向传播,真正开始训练;计算 loss right_num = count_right_num(bert_enc, labels) # multi-gpu training! if n_gpu > 1: loss = loss.mean() loss.backward() # 反向传播计算参数的梯度 #""" if open_ad == 'fgm': # 对抗训练 fgm.attack() # 在embedding上添加对抗扰动 if model_type == 'multi-task': loss_adv, _, _ = model(batch, task='train') else: loss_adv, _ = model(batch, task='train') if n_gpu > 1: loss_adv = loss_adv.mean() loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度 fgm.restore() # 恢复embedding参数 elif open_ad == 'pgd': pgd.backup_grad() # 对抗训练 for t in range(K): pgd.attack(is_first_attack=( t == 0 )) # 在embedding上添加对抗扰动, first attack时备份param.data if t != K - 1: optimizer.zero_grad() else: pgd.restore_grad() if model_type == 'multi-task': loss_adv, _, _ = model(batch, task='train') else: loss_adv, _ = model(batch, task='train') if n_gpu > 1: loss_adv = loss_adv.mean() loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度 pgd.restore() # 恢复embedding参数 #""" optimizer.step() # 更新参数 train_loss += loss.item() # loss 求和 train_c += 1 train_right_num += right_num val_loss = 0 val_c = 0 val_right_num = 0 model.eval() print('eval...') with torch.no_grad(): # 不进行梯度的反向传播 for batch in tqdm(dev_iter): # 每一次返回 batch_size 条数据 batch = [b.to(device) for b in batch] labels = batch[-1].view(-1).cpu().numpy() loss, bert_enc = model(batch, task='train', epoch=epoch) # 进行前向传播,真正开始训练;计算 loss right_num = count_right_num(bert_enc, labels) if n_gpu > 1: loss = loss.mean() val_c += 1 val_loss += loss.item() val_right_num += right_num train_acc = train_right_num / train_num val_acc = val_right_num / val_num print('train_acc: %.4f, val_acc: %.4f' % (train_acc, val_acc)) print('train_loss: %.4f, val_loss: %.4f, time: %d' % (train_loss / train_c, val_loss / val_c, time.time() - time0)) if val_loss / val_c < best_loss: early_stop = 0 best_loss = val_loss / val_c best_acc = val_acc # 3.24 update 多卡训练时模型保存避坑: if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = model.module if hasattr(model, 'module') else model state['model_state'] = model_to_save.state_dict() state['loss'] = val_loss / val_c state['acc'] = val_acc state['e'] = e state['time'] = time.time() - time0 state['lr'] = lr output_model_file = os.path.join(output_dir, name + '.pkl') torch.save(state, output_model_file) #torch.save(state, model_path) best_epoch = e cost_time = time.time() - time0 tmp_train_acc = train_acc best_model = model else: early_stop += 1 if early_stop == 2: break model = best_model lr = lr * 0.5 print("best_loss:", best_loss) # 3.12 add 打印显示最终的最优结果 print('-' * 30) print('best_epoch:', best_epoch, 'best_loss:', best_loss, 'best_acc:', best_acc, 'reach time:', cost_time, '\n') # model-clean del model gc.collect() # 实验结果写入日志 """
def main(): args = configparser.ConfigParser() args.read('argsConfig.ini') if args.getboolean('Log', 'flush_history') == 1: objects = os.listdir(args.get('Log', 'log_path')) for f in objects: if os.path.isdir(args.get('Log', 'log_path') + f): shutil.rmtree(args.get('Log', 'log_path') + f) if args.getboolean('Log', 'delete_model_name_dir'): objects = os.listdir(args.get('Log', 'output')) for f in objects: if f == args.get('Log', 'model_name'): shutil.rmtree(args.get('Log', 'output') + args.get('Log', 'model_name') + '/') now = datetime.now() logdir = args.get('Log', 'log_path') + now.strftime("%Y%m%d-%H%M%S") + "/" os.makedirs(logdir) log_file = logdir + 'log.txt' writer = SummaryWriter(logdir) texts, labels, number_of_classes, sample_weights = load_data(args, 'train') class_names = sorted(list(set(labels))) class_names = [str(class_name - 1) for class_name in class_names] train_texts, X_dev, train_labels, y_dev_labels, train_sample_weights, _ = train_test_split(texts, labels, sample_weights, train_size=args.getfloat( 'Train', 'train_size'), test_size=args.getfloat( 'Train', 'dev_size'), random_state=42, stratify=labels) training_set = MyDataset(train_texts, train_labels, args) validation_set = MyDataset(X_dev, y_dev_labels, args) training_params = {"batch_size": args.getint('Train', 'batch_size'), "shuffle": True, "num_workers": args.getint('Train', 'workers'), "drop_last": True} validation_params = {"batch_size": args.getint('Train', 'batch_size'), "shuffle": False, "num_workers": args.getint('Train', 'workers'), "drop_last": True} if args.getboolean('Train', 'use_sampler'): train_sample_weights = torch.from_numpy(train_sample_weights) sampler = WeightedRandomSampler(train_sample_weights.type( 'torch.DoubleTensor'), len(train_sample_weights)) training_params['sampler'] = sampler training_params['shuffle'] = False training_generator = DataLoader(training_set, **training_params) validation_generator = DataLoader(validation_set, **validation_params) model = CharacterLevelCNN(number_of_classes, args) if args.getboolean('Model', 'visualize_model_graph'): x = torch.zeros((args.getint('Train', 'batch_size'), args.getint('DataSet', 'char_num'), args.getint('DataSet', 'l0'))) out = model(x) make_dot(out).render("CharacterLevelCNN", format="png", quiet_view=True) if torch.cuda.is_available(): model.cuda() # todo check other other loss functions for binary and multi-label problems if args.get('Train', 'criterion') == 'nllloss': criterion = nn.NLLLoss() # criterion = nn.BCELoss() # optimization scheme if args.get('Train', 'optimizer') == 'Adam': optimizer = optim.Adam(model.parameters(), lr=args.getfloat('Train', 'lr')) elif args.get('Train', 'optimizer') == 'SGD': if args.get('Train', 'scheduler') == 'clr': optimizer = torch.optim.SGD( model.parameters(), lr=1, momentum=0.9, weight_decay=0.00001 ) else: optimizer = optim.SGD(model.parameters(), lr=args.getfloat('Train', 'lr'), momentum=0.9) elif args.get('Train', 'optimizer') == 'ASGD': optimizer = optim.ASGD(model.parameters(), lr=args.getfloat('Train', 'lr')) if os.path.isfile(args.get('Log', 'continue_from_model_checkpoint')): print("=> loading checkpoint from '{}'".format(args.get('Log', 'continue_from_model_checkpoint'))) checkpoint = torch.load(args.get('Log', 'continue_from_model_checkpoint')) start_epoch = checkpoint['epoch'] start_iter = checkpoint.get('iter', None) best_f1 = checkpoint.get('best_f1', None) if start_iter is None: start_epoch += 1 # Assume that we saved a model after an epoch finished, so start at the next epoch. start_iter = 0 else: start_iter += 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) else: start_iter = 0 start_epoch = 0 best_f1 = 0 best_epoch = 0 if args.get('Train', 'scheduler') == 'clr': stepsize = int(args.getint('Train', 'clr_step_size') * len(training_generator)) clr = utils.cyclical_lr(stepsize, args.getfloat('Train', 'clr_min_lr'), args.getfloat('Train', 'clr_max_lr')) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) else: scheduler = None lr_half_cnt = 0 utils.init_log(log_file=log_file, args=args, labels=class_names) try: for epoch in range(start_epoch, args.getint('Train', 'epochs')): training_loss, training_accuracy, train_f1 = train(model, training_generator, optimizer, criterion, epoch, start_iter, writer, log_file, scheduler, class_names, args, args.getint('Log', 'print_out_every')) validation_loss, validation_accuracy, validation_f1 = evaluate(model, validation_generator, criterion, epoch, writer, log_file) print('\n[Epoch: {} / {}]\ttrain_loss: {:.4f} \ttrain_acc: {:.4f} \tval_loss: {:.4f} \tval_acc: {:.4f}'. format(epoch + 1, args.getint('Train', 'epochs'), training_loss, training_accuracy, validation_loss, validation_accuracy)) print("=" * 50) with open(log_file, 'a') as f: f.write('[Epoch: {} / {}]\ttrain_loss: {:.4f} \ttrain_acc: {:.4f} \tval_loss: {:.4f} \tval_acc: {:.4f}\n'. format(epoch + 1, args.getint('Train', 'epochs'), training_loss, training_accuracy, validation_loss, validation_accuracy)) f.write('=' * 50) # learning rate scheduling if args.get('Train', 'scheduler') == 'step': if args.get('Train', 'optimizer') == 'SGD' and ((epoch + 1) % 3 == 0) and lr_half_cnt < 10: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr /= 2 lr_half_cnt += 1 print('Decreasing learning rate to {0}'.format(current_lr)) with open(log_file, 'a') as f: f.write('Decreasing learning rate to {0}\n'.format(current_lr)) for param_group in optimizer.param_groups: param_group['lr'] = current_lr if args.getboolean('Log', 'checkpoint'): state = {'epoch': epoch, 'optimizer': optimizer.state_dict(), 'best_f1': best_f1} if args.getint('Log', 'save_interval') > 0 and epoch % args.getint('Log', 'save_interval') == 0: save_checkpoint(model, state, optimizer, args, epoch, validation_loss, validation_accuracy, validation_f1) if validation_f1 > best_f1: best_f1 = validation_f1 best_epoch = epoch save_checkpoint(model, state, optimizer, args, epoch, validation_loss, validation_accuracy, validation_f1) if args.getboolean('Train', 'early_stopping'): if epoch - best_epoch > args.getint('Train', 'patience') > 0: print("Early-stopping: Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format( epoch, validation_loss, best_epoch)) break except KeyboardInterrupt: print('Exit Keyboard interrupt\n') save_checkpoint(model, state, optimizer, args, epoch, validation_loss, validation_accuracy, validation_f1)
def train(opts): # device = torch.device('cpu') if not torch.cuda.is_available or opts.cpu else torch.device('cuda') device = torch.device("cuda") print(device) # load dataset dataset_train = MyDataset('train.txt') dataset_test = MyDataset('test.txt') # define training and validation data loaders data_loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=opts.batch_size, shuffle=True, num_workers=1) data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=opts.batch_size, shuffle=False, num_workers=1) model = Net() # model = nn.DataParallel(model) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr, betas=(0.9, 0.99)) # optimizer = torch.optim.Adamax(model.parameters(),lr=opts.lr,betas=(0.9,0.999),eps=1e-8,weight_decay=0.1) # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, # step_size=50, # gamma=0.1) # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[50,100,150,200,250,300],gamma=0.1) weights = torch.FloatTensor([6, 2, 5, 1]).to(device) loss_fct = MSELoss() # print("Model's state_dict:") for param_tensor in model.state_dict(): print(param_tensor, "\t", model.state_dict()[param_tensor].size()) # Print optimizer's state_dict print("Optimizer's state_dict:") for var_name in optimizer.state_dict(): print(var_name, "\t", optimizer.state_dict()[var_name]) train_loss_list = [] train_acc_list = [] test_loss_list = [] test_acc_list = [] # writer = SummaryWriter(log_dir='') for epoch in range(opts.epochs): train_batch_num = 0 train_loss = 0.0 model.train() counts = 0 for seq, label in data_loader_train: seq = seq.to(device) label = label.to(device) seq = seq.unsqueeze(1) optimizer.zero_grad() pred = model(seq) loss = loss_fct(pred, label.view(-1)) loss.backward() optimizer.step() train_batch_num += 1 train_loss += loss.item() predict = pred.argmax(dim=1, keepdims=True) counts += predict.cpu().eq( label.cpu().view_as(predict)).sum().item() avg_acc = counts * 1.0 / len(data_loader_train.dataset) train_loss_list.append(train_loss / len(data_loader_train.dataset)) train_acc_list.append(avg_acc) # writer.add_graph(model, seq) # write csv file train_loss_dataframe = pd.DataFrame(data=train_loss_list) train_acc_dataframe = pd.DataFrame(data=train_acc_list) train_loss_dataframe.to_csv('./output_results/train_loss.csv', index=False) train_acc_dataframe.to_csv('./output_results/train_accuracy.csv', index=False) model.eval() # for name,layer in model._modules.items(): # # view feature map # seq_1 = seq.transpose(0,1) # seq_grid = vutils.make_grid(seq_1,normalize=True,scale_each=True) # writer.add_image(f'{name}_feature_maps',seq_grid,global_step=0) test_y = [] test_y_pred = [] counts = 0 test_loss = 0 test_batch_num = 0 outs = [] labels = [] with torch.no_grad(): for test_seq, test_label in data_loader_test: test_seq = test_seq.to(device) test_label = test_label.to(device) test_seq = test_seq.unsqueeze(1) t_pred = model(test_seq) outs.append(t_pred.cpu()) labels.append(test_label.cpu()) # accuracy loss = loss_fct(t_pred, test_label.view(-1)) test_loss += loss.item() test_batch_num += 1 test_y += list(test_label.data.cpu().numpy().flatten()) test_y_pred += list(t_pred.data.cpu().numpy().flatten()) predict = t_pred.argmax(dim=1, keepdims=True) counts += predict.cpu().eq( test_label.cpu().view_as(predict)).sum().item() outs = torch.cat(outs, dim=0) labels = torch.cat(labels).reshape(-1) avg_acc = counts * 1.0 / len(data_loader_test.dataset) test_acc_list.append(avg_acc) test_loss_list.append(test_loss / len(data_loader_test.dataset)) print( 'epoch: %d, train loss: %.4f, test loss: %.4f,test accuracy: %.4f' % (epoch, train_loss / train_batch_num, test_loss / test_batch_num, avg_acc)) # writer.add_scalar('scalar/train_loss', train_loss / train_batch_num, epoch) # writer.add_scalar('scalar/test_loss', test_loss / test_batch_num, epoch) # write csv file test_loss_dataframe = pd.DataFrame(data=test_loss_list) test_acc_dataframe = pd.DataFrame(data=test_acc_list) test_loss_dataframe.to_csv('./output_results/test_loss.csv', index=False) test_acc_dataframe.to_csv('./output_results/test_accuracy.csv', index=False) # writer.close() draw_test_info(test_loss_list, test_acc_list) draw_train_info(train_loss_list, train_acc_list) draw_roc_confusion(outs, labels)
decoder = net.decoder vgg = net.vgg vgg.load_state_dict(torch.load(args.vgg)) vgg = nn.Sequential(*list(vgg.children())[:31]) network = net.Net(vgg, decoder) network.train() network.to(device) content_tf = train_transform() style_tf = train_transform() # content_dataset = FlatFolderDataset(args.content_dir, content_tf) # style_dataset = FlatFolderDataset(args.style_dir, style_tf) content_dataset = MyDataset(dataroot=args.dataroot, datalist='files/list_train.txt', is_content=True) style_dataset = MyDataset(dataroot=args.dataroot, datalist='files/list_train.txt', is_content=False) content_iter = iter(data.DataLoader( content_dataset, batch_size=args.batch_size, sampler=InfiniteSamplerWrapper(content_dataset), num_workers=args.n_threads)) style_iter = iter(data.DataLoader( style_dataset, batch_size=args.batch_size, sampler=InfiniteSamplerWrapper(style_dataset), num_workers=args.n_threads)) optimizer = torch.optim.Adam(network.decoder.parameters(), lr=args.lr) for i in tqdm(range(args.max_iter)): adjust_learning_rate(optimizer, iteration_count=i)