def train(): # Init Training data_loaders = get_data_loader() coconut_model = CoconutModel(num_of_classes=args.num_of_classes, feature_size=args.feature_size) center_loss = CenterLoss(num_classes=args.num_of_classes, feat_dim=args.feature_size, use_gpu=torch.cuda.is_available()) coconut_model.to(device) center_loss.to(device) params = list(coconut_model.parameters()) + list( center_loss.parameters()) + list(coconut_model.bert_model.parameters()) optimizer = RAdam(params=params, lr=args.lr, betas=(0.0, 0.999), eps=1e-3, weight_decay=args.l2_reg) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[80, 150], gamma=0.1) starting_epoch = 0 if args.resume: checkpoints = load_model(model=coconut_model, optimizer=optimizer, lr_scheduler=lr_scheduler, center_loss=center_loss) (starting_epoch, coconut_model, optimizer, lr_scheduler, center_loss) = checkpoints for epoch in range(starting_epoch, args.epoch): train_model(epoch=epoch, model=coconut_model, optimizer=optimizer, loader=data_loaders["train_loader"], center_loss=center_loss) lr_scheduler.step() eval_model(epoch=epoch, model=coconut_model, loader=data_loaders["dev_loader"]) save_mode(epoch=epoch, model=coconut_model, optimizer=optimizer, lr_scheduler=lr_scheduler, center_loss=center_loss) return
def train_net(net, train_loader, test_loader, lr, device, prefix): global tensorboard_writer tensorboard_writer = SummaryWriter(comment = prefix) # set net on gpu net.to(device) # loss and optimizer criterion = nn.CrossEntropyLoss() criterion_cent = CenterLoss(num_classes = 10, feat_dim = 128) criterion_cent.to(device) optimizer = optim.SGD(net.parameters(), lr = lr, momentum = MOMENTUM, weight_decay = WEIGHT_DECAY) optimizer_cent = optim.SGD(criterion_cent.parameters(), lr = lr, momentum = MOMENTUM) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones = MILESTONES, gamma = GAMMA) scheduler_cent = lr_scheduler.MultiStepLR(optimizer_cent, milestones = MILESTONES, gamma = GAMMA) # initial test eval_net(net, test_loader, 0, device) # epochs for epoch in range(EPOCHS): # train net.train() scheduler.step() scheduler_cent.step() for i, (images, labels) in enumerate(train_loader): net.zero_grad() optimizer.zero_grad() optimizer_cent.zero_grad() images = images.to(device) labels = labels.to(device) features, outputs = net(images) loss_xent = criterion(outputs, labels) loss_cent = 0.1 * criterion(features, labels) loss = loss_xent + loss_cent loss.backward() optimizer.step() optimizer_cent.step() print(f'epoch {epoch+1:3d}, {i:3d}|{len(train_loader):3d}, loss_xent: {loss_xent.item():2.4f}, loss_cent: {loss_cent.item():2.4f} ', end = '\r') tensorboard_writer.add_scalars('train_loss', {'train_loss_xent': loss_xent.item(), 'train_loss_cent': loss_cent.item()}, epoch * len(train_loader) + i) eval_net(net, test_loader, epoch + 1, device) torch.save(net.state_dict(), f'zoo/{prefix}_params.pth')