#opt = torch.optim.SGD(net.parameters(), lr=learning_rate) opt = Ranger(net.parameters(),lr=learning_rate) today=str(datetime.date.today()) logger = get_log(model_name + today +'_log.txt') #scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=5,eta_min=4e-08) #scheduler = LR_Scheduler(args.lr_scheduler, args.lr, # args.n_epoch, len(train_loader), logger=logger, # lr_step=args.lr_step) # scheduler = PolynomialLRDecay(opt, max_decay_steps=100, end_learning_rate=0.0001, power=2.0) for epoch in range(num_epochs): logger.info("Epoch: {}/{}".format(epoch + 1, num_epochs)) scheduler.step() #scheduler(opt,i,.step() train_hist = train(train_loader, num_classes, device, net, opt, criterion) logger.info( ('loss={}'.format(train_hist["loss"]), 'precision={}'.format(train_hist["precision"]), 'recall={}'.format(train_hist["recall"]), 'f_score={}'.format(train_hist["f_score"]), 'oa={}'.format(train_hist["oa"]))) for k, v in train_hist.items(): history["train " + k].append(v) val_hist = validate(val_loader, num_classes, device, net, criterion) logger.info(('loss={}'.format(val_hist["loss"]), 'precision={}'.format(val_hist["precision"]),
def train(fold: int, verbose: int = 100) -> None: split_dataset('./data/dirty_mnist_2nd_answer.csv') df = pd.read_csv('./data/split_kfold.csv') df_train = df[df['kfold'] != fold].reset_index(drop=True) df_valid = df[df['kfold'] == fold].reset_index(drop=True) df_train.drop(['kfold'], axis=1).to_csv(f'./data/train-kfold-{fold}.csv', index=False) df_valid.drop(['kfold'], axis=1).to_csv(f'./data/valid-kfold-{fold}.csv', index=False) trainset = MnistDataset('./data/train', f'./data/train-kfold-{fold}.csv', transforms_train, a_train) train_loader = DataLoader(trainset, batch_size=config.batch_size, shuffle=True) validset = MnistDataset('./data/train', f'./data/valid-kfold-{fold}.csv', transforms_test, None) valid_loader = DataLoader(validset, batch_size=8, shuffle=False) num_epochs = config.epochs device = 'cuda' model = MnistModel().to(device) optimizer = optim.Adam(model.parameters(), lr=config.lr) decay_steps = (len(trainset) // config.batch_size) * config.epochs scheduler = PolynomialLRDecay(optimizer, max_decay_steps=decay_steps, end_learning_rate=1e-6, power=0.9) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0) # optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, weight_decay=0.9, momentum=0.9) criterion = torch.nn.BCELoss() for epoch in range(num_epochs): model.train() for i, (images, targets) in enumerate(train_loader): optimizer.zero_grad() images = images.to(device) targets = targets.to(device) outputs = model(images) loss = criterion(outputs, targets) loss.backward() optimizer.step() scheduler.step() if (i + 1) % verbose == 0: outputs = outputs > 0.5 acc = (outputs == targets).float().mean() print( f'Fold {fold} | Epoch {epoch} | Train_L: {loss.item():.7f} | Train_A: {acc.item():.7f}' ) model.eval() valid_acc = 0.0 valid_loss = 0.0 with torch.no_grad(): for i, (images, targets) in enumerate(valid_loader): images = images.to(device) targets = targets.to(device) outputs = model(images) loss = criterion(outputs, targets) valid_loss += loss.item() outputs = outputs > 0.5 valid_acc += (outputs == targets).float().mean() print( f'Fold {fold} | Epoch {epoch} | valid_L: {valid_loss / (i + 1):.7f} | valid_A: {valid_acc / (i + 1):.7f}\n' ) if epoch > num_epochs - 10 and epoch < num_epochs - 1: torch.save(model.state_dict(), f'./data/efficientnet7-f{fold}-{epoch}.pth')
valid_loss = 0 pred_list = [] label_list = [] epoch_loss = 0 for img, label in (train_loader): img = torch.tensor(img, device=device, dtype=torch.float32) label = torch.tensor(label, device=device, dtype=torch.float32) optimizer.zero_grad() pred = model(img) loss = criterion(pred, label) loss.backward() optimizer.step() plr.step() pred = pred.argmax(1).detach().cpu().numpy() label = label.detach().cpu().numpy().argmax(1) pred_list += list(pred) label_list += list(label) epoch_loss += loss.item() train_epoch_accuracy = accuracy_score(label_list, pred_list) train_epoch_loss = epoch_loss / len(train_loader) pred_list = [] label_list = [] epoch_loss = 0 model.eval()
def main(args): # file path image_path = './train_image' path = './data' label_path = 'training data dic.txt' # Hyper Parameters PrograssiveModelDict = None if args.method == "efficientnet" or args.method == "efficientnetV2": METHOD = f"{args.method}-{args.method_level}" if args.method == "efficientnetV2": PrograssiveModelDict = PrograssiveBounds[args.method][ args.method_level] elif args.method == 'regnet': METHOD = args.method else: METHOD = args.method + args.method_level # Environment if args.use_gpu and torch.cuda.is_available(): device = torch.device('cuda') torch.backends.cudnn.benchmark = True else: device = torch.device('cpu') print('Warning! Using CPU.') Epoch = args.epochs BATCH_SIZE = args.batchsize lr = args.learning_rate split_rate = args.split_rate resize = args.resize resize_size = args.resize_size num_classes = 801 valid_batch_size = args.validbatchsize CHECKPOINT_FOLDER = args.checkpoint_root + METHOD + '/' START_EPOCH = getFinalEpoch( args=args, CHECKPOINT_FOLDER=CHECKPOINT_FOLDER) + 1 if getFinalEpoch( args=args, CHECKPOINT_FOLDER=CHECKPOINT_FOLDER) is not None else 0 is_useweight = True print("init data folder") Path(CHECKPOINT_FOLDER).mkdir(exist_ok=True, parents=True) label_dic = load_label_dic(label_path) word_dic = load_word_dic(label_path) transform = transforms.Compose([ transforms.ToTensor(), ]) clean_image_path = './color_dataset/' synthesis_path = './synthesis/' # clean_transform = transforms.Compose([ # transforms.Grayscale(num_output_channels=1), # transforms.Resize((resize_size, resize_size)), # transforms.ToTensor(), # ]) train_dataset = [] valid_dataset = [] for idx, dir_ in enumerate(os.listdir(clean_image_path)): # if args.pretrain_cleandataset: dataset = ChineseHandWriteDataset(root=clean_image_path + dir_, label_dic=label_dic, transform=transform, resize=resize, resize_size=resize_size) # dataset = CleanDataset(root=synthesis_path + dir_, label_dic=label_dic, transform=transform, resize=resize, # resize_size=resize_size, randaug=args.method=="efficientnetV2") train_set_size = int(len(dataset) * split_rate) valid_set_size = len(dataset) - train_set_size train_set, valid_set = data.random_split( dataset, [train_set_size, valid_set_size], torch.Generator().manual_seed(args.seed)) train_dataset.append(train_set) valid_dataset.append(valid_set) train_dataset = data.ConcatDataset(train_dataset) valid_dataset = data.ConcatDataset(valid_dataset) train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=args.num_workers) valid_dataloader = DataLoader(valid_dataset, batch_size=valid_batch_size, pin_memory=False, num_workers=args.num_workers) print(f"model is {METHOD}") model = switchModel(in_features=train_dataset[0][0].shape[0], num_classes=num_classes, args=args, METHOD=METHOD) if args.load_model: modelPath = getModelPath(CHECKPOINT_FOLDER=CHECKPOINT_FOLDER, args=args) if modelPath != "": model.load_state_dict(torch.load(modelPath)) model.to(device) # get each class weight weights = None if is_useweight: weights = getWeights(root=clean_image_path, split_rate=split_rate) # Label smoothing # loss = SmoothCrossEntropyLoss(weight=weights).to(device) # Focal Loss loss = FocalLoss(weight=weights).to(device) optimizer = optim.AdamW(model.parameters(), lr=lr) scheduler_poly_lr_decay = PolynomialLRDecay( optimizer, max_decay_steps=100, end_learning_rate=args.ending_learning_rate, power=2.0) print("------------------ training start -----------------") result_param = { 'training_loss': [], 'training_accuracy': [], 'validation_loss': [], 'validation_accuracy': [] } for epoch in range(START_EPOCH, Epoch): batchI = 0 scheduler_poly_lr_decay.step(epoch) progressive = None if PrograssiveModelDict is not None: randaugment = RandAugment() progressive = prograssiveNow(epoch, Epoch, PrograssiveModelDict) randaugment.m = progressive["randarg"] since = time.time() running_training_loss = 0 running_training_correct = 0 running_valid_loss = 0 running_valid_correct = 0 dataset.train() model.train() train_bar = tqdm(train_dataloader) for imgst, label, folder, filename in train_bar: label = label.to(device) if progressive is not None: imgst, label = mixup(imgst, label, progressive["mix"]) toPIL = transforms.ToPILImage() transform = transforms.Compose([ transforms.Resize((int(progressive["imgsize"]), int(progressive["imgsize"]))), transforms.ToTensor(), ]) imgs = torch.zeros( (imgst.size()[0], 3, int(progressive["imgsize"]), int(progressive["imgsize"])) ) #int(progressive["imgsize"]),int(progressive["imgsize"]))) for i in range(imgst.size()[0]): imgs[i] = transform(randaugment(toPIL(imgst[i]))) imgs = imgs.to(device) # torchvision.utils.save_image(imgs,f"preprocessImgs/{epoch}-{batchI}.jpg") setDropout(model, progressive["drop"]) optimizer.zero_grad() out = model(imgs) loss_val = loss(out, label) _, pred_class = torch.max(out.data, 1) running_training_correct += torch.sum(pred_class == label) running_training_loss += loss_val loss_val.backward() optimizer.step() train_bar.set_description( desc='[%d/%d] | Train Loss:%.4f' % (epoch + 1, Epoch, loss_val.item() / len(imgs))) with torch.no_grad(): dataset.eval() model.eval() if progressive is not None: setDropout(model, 0) val_bar = tqdm(valid_dataloader) for imgs, label, folder, filename in val_bar: imgs = imgs.to(device) label = label.to(device) out = model(imgs) loss_val = loss(out, label) val_bar.set_description( desc='[%d/%d] | Validation Loss:%.4f' % (epoch + 1, Epoch, loss_val.item() / len(imgs))) _, pred_class = torch.max(out.data, 1) running_valid_correct += torch.sum(pred_class == label) running_valid_loss += loss_val result_param['training_loss'].append(running_training_loss.item() / len(train_dataset) * BATCH_SIZE) result_param['training_accuracy'].append( running_training_correct.item() / len(train_dataset)) result_param['validation_loss'].append( running_valid_loss.item() / len(valid_dataset) * valid_batch_size) result_param['validation_accuracy'].append( running_valid_correct.item() / len(valid_dataset)) print( "Epoch:{} Train Loss:{:.4f}, Train Accuracy:{:.4f}, Validation Loss:{:.4f}, Validation Accuracy:{:.4f}, Learning Rate:{:.4f}" .format(epoch + 1, result_param['training_loss'][-1], result_param['training_accuracy'][-1], result_param['validation_loss'][-1], result_param['validation_accuracy'][-1], optimizer.param_groups[0]['lr'])) now_time = time.time() - since print("Training time is:{:.0f}m {:.0f}s".format( now_time // 60, now_time % 60)) torch.save( model.state_dict(), str('./checkpoints/' + METHOD + '/' + "EPOCH_" + str(epoch) + ".pkl")) out_file = open( str('./checkpoints/' + METHOD + '/' + 'result_param.json'), "w+") json.dump(result_param, out_file, indent=4) if args.xgboost: print("---------------Two stage - XGboost---------------------") with torch.no_grad(): x_valid, y_valid = [], [] val_bar = tqdm(valid_dataloader) for imgs, label in val_bar: imgs = imgs.to(device) label = label.to(device) # to numpy imgs = CustomPredict(model, imgs).cpu().detach().numpy() label = label.cpu().detach().numpy() if not len(x_valid): x_valid, y_valid = imgs, label else: x_valid, y_valid = np.concatenate( (x_valid, imgs)), np.concatenate((y_valid, label)) xgb_train, xgb_label = [], [] train_bar = tqdm(train_dataloader) for imgs, label in train_bar: imgs = imgs.to(device) label = label.to(device) # to numpy imgs = CustomPredict(model, imgs).cpu().detach().numpy() label = label.cpu().detach().numpy() if not len(xgb_train): xgb_train, xgb_label = imgs, label else: xgb_train, xgb_label = np.concatenate( (xgb_train, imgs)), np.concatenate((xgb_label, label)) dval = xgboost.DMatrix(x_valid, y_valid) dtrain = xgboost.DMatrix(xgb_train, xgb_label) params = { 'max_depth': 5, # the maximum depth of each tree 'eta': lr, # the training step for each iteration 'objective': 'multi:softmax', # multiclass classification using the softmax objective 'num_class': 801, # the number of classes that exist in this datset 'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist', } xgbmodel = xgboost.Booster() # xgbmodel.load_model('xgboost.model') xgbmodel = xgboost.train(params, dtrain, num_boost_round=100, evals=[(dval, 'val'), (dtrain, 'train')]) print(sum(xgbmodel.predict(dval) == y_valid) / len(y_valid)) xgbmodel.save_model('xgboost.model')
valid_best_accuracy=0 for epoch in range(epochs): model.train() batch_accuracy_list = [] batch_loss_list = [] start=time.time() for n, (X, y) in enumerate((train_loader)): X = torch.tensor(X, device=device, dtype=torch.float32) y = torch.tensor(y, device=device, dtype=torch.float32) y_hat = model(X) optimizer.zero_grad() loss = criterion(y_hat, y) loss.backward() optimizer.step() scheduler_poly_lr_decay.step() y_hat = y_hat.cpu().detach().numpy() y_hat = y_hat>0.5 y = y.cpu().detach().numpy() batch_accuracy = (y_hat == y).mean() batch_accuracy_list.append(batch_accuracy) batch_loss_list.append(loss.item()) model.eval() valid_batch_accuracy=[] valid_batch_loss = [] with torch.no_grad(): for n_valid, (X_valid, y_valid) in enumerate((valid_loader)): X_valid = torch.tensor(X_valid, device=device)#, dtype=torch.float32)
'\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( test_loss, test_acc, len(test_loader), test_acc / len(test_loader))) return np.round(test_acc / len(test_loader), 2) best_prec1 = 0. for epoch in range(args.start_epoch, args.epochs): if args.lr_decay == 'stepwise': # step-wise LR schedule if epoch in args.schedule: for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 elif args.lr_decay == 'poly': # Poly LR schedule scheduler_poly_lr_decay.step(epoch) else: raise NotImplementedError train(epoch) prec1 = test() history_score[epoch][2] = prec1 np.savetxt(os.path.join(args.save, 'record.txt'), history_score, fmt='%10.5f', delimiter=',') is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) model_rounded = round_shift_weights(model, clone=True) save_checkpoint( { 'epoch': epoch + 1,
import torch from torch_poly_lr_decay import PolynomialLRDecay if __name__ == '__main__': v = torch.zeros(10) optim = torch.optim.SGD([v], lr=0.01) scheduler = PolynomialLRDecay(optim, max_decay_steps=19, end_learning_rate=0.0001, power=2.0) for epoch in range(1, 20): scheduler.step(epoch) print(epoch, optim.param_groups[0]['lr'])