def main(): parser = argparse.ArgumentParser() parser.add_argument('--epochs', default=100, type=int, help='epoch number') parser.add_argument('--start_epoch', default=0, type=int, help='start epoch number') parser.add_argument('-b', '--batch_size', default=4, type=int, help='mini-batch size') parser.add_argument('--lr', '--learning_rate', default=1e-4, type=float, help='initial learning rate') parser.add_argument('--weight-decay', default=0.0, type=float, help='weight decay') parser.add_argument('-c', '--continue', dest='continue_path', type=str, required=False) parser.add_argument('--exp_name', default=config.exp_name, type=str, required=False) parser.add_argument('--valid', action='store_true') parser.add_argument('--style_loss', action='store_true') args = parser.parse_args() print(args) config.exp_name = args.exp_name config.make_dir() save_args(args, config.log_dir) net = network() vgg = vgg_for_style_transfer() net = torch.nn.DataParallel(net).cuda() vgg = torch.nn.DataParallel(vgg).cuda() sess = Session(config, net=net) train_loader = get_dataloaders(os.path.join(config.data_dir, 'train.json'), batch_size=args.batch_size, shuffle=True) valid_loader = get_dataloaders(os.path.join(config.data_dir, 'val.json'), batch_size=args.batch_size, shuffle=True) if args.continue_path and os.path.exists(args.continue_path): sess.load_checkpoint(args.continue_path) clock = sess.clock tb_writer = sess.tb_writer criterion = nn.L1Loss().cuda() optimizer = optim.Adam(sess.net.parameters(), args.lr, weight_decay=args.weight_decay) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=10, verbose=True) for e in range(args.epochs): train_model(train_loader, sess.net, vgg, criterion, optimizer, clock.epoch, tb_writer) valid_out = valid_model(valid_loader, sess.net, vgg, criterion, optimizer, clock.epoch, tb_writer) tb_writer.add_scalar('train/learning_rate', optimizer.param_groups[-1]['lr'], clock.epoch) scheduler.step(valid_out['epoch_loss']) if valid_out['epoch_loss'] < sess.best_val_loss: sess.best_val_loss = valid_out['epoch_loss'] sess.save_checkpoint('best_model.pth.tar') if clock.epoch % 10 == 0: sess.save_checkpoint('epoch{}.pth.tar'.format(clock.epoch)) sess.save_checkpoint('latest.pth.tar') clock.tock()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epochs', default=50, type=int, help='epoch number') parser.add_argument('-b', '--batch_size', default=64, type=int, help='mini-batch size') parser.add_argument('--lr', '--learning_rate', default=1e-4, type=float, help='initial learning rate') parser.add_argument('-c', '--continue', dest='continue_path', type=str, required=False) parser.add_argument('--state_dict', default=None, type=str, required=False, help='state_dict when doing full training ') parser.add_argument('--exp_name', default=config.exp_name, type=str, required=False) parser.add_argument('--drop_rate', default=0, type=float, required=False) parser.add_argument('--local', action='store_true', help='train local branch') args = parser.parse_args() print(args) config.exp_name = args.exp_name config.make_dir() save_args(args, config.log_dir) # get network if args.state_dict is not None: state_dict = torch.load(args.state_dict) net = fusenet() net.load_state_dict(state_dict) net.set_fcweights() else: global_branch_state = torch.load(GLOBAL_BRANCH_DIR) local_branch_state = torch.load(LOCAL_BRANCH_DIR) net = fusenet(global_branch_state, local_branch_state) net.to(config.device) sess = Session(config, net=net) # get dataloader train_loader = get_dataloaders('train', batch_size=args.batch_size, shuffle=True) valid_loader = get_dataloaders('valid', batch_size=args.batch_size, shuffle=False) if args.continue_path and os.path.exists(args.continue_path): sess.load_checkpoint(args.continue_path) # start session clock = sess.clock tb_writer = sess.tb_writer sess.save_checkpoint('start.pth.tar') # set criterion, optimizer and scheduler criterion = nn.BCELoss().cuda() if args.local: # train local branch optimizer = optim.Adam(sess.net.module.local_branch.parameters(), args.lr) else: # train final fc layer optimizer = optim.Adam(sess.net.classifier.parameters(), args.lr) scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.1, patience=10, verbose=True) # start training for e in range(args.epochs): train_out = train_model(train_loader, sess.net, criterion, optimizer, clock.epoch) valid_out = valid_model(valid_loader, sess.net, criterion, optimizer, clock.epoch) tb_writer.add_scalars('loss', {'train': train_out['epoch_loss'], 'valid': valid_out['epoch_loss']}, clock.epoch) tb_writer.add_scalars('acc', {'train': train_out['epoch_acc'], 'valid': valid_out['epoch_acc']}, clock.epoch) tb_writer.add_scalar('auc', valid_out['epoch_auc'], clock.epoch) tb_writer.add_scalar('learning_rate', optimizer.param_groups[-1]['lr'], clock.epoch) scheduler.step(valid_out['epoch_auc']) if valid_out['epoch_auc'] > sess.best_val_acc: sess.best_val_acc = valid_out['epoch_auc'] sess.save_checkpoint('best_model.pth.tar') if clock.epoch % 10 == 0: sess.save_checkpoint('epoch{}.pth.tar'.format(clock.epoch)) sess.save_checkpoint('latest.pth.tar') clock.tock()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epochs', default=50, type=int, help='epoch number') parser.add_argument('-b', '--batch_size', default=256, type=int, help='mini-batch size') parser.add_argument('--lr', '--learning_rate', default=1e-3, type=float, help='initial learning rate') parser.add_argument('-c', '--continue', dest='continue_path', type=str, required=False) parser.add_argument('--exp_name', default=config.exp_name, type=str, required=False) parser.add_argument('--drop_rate', default=0, type=float, required=False) parser.add_argument('--only_fc', action='store_true', help='only train fc layers') parser.add_argument('--net', default='densenet169', type=str, required=False) parser.add_argument('--local', action='store_true', help='train local branch') args = parser.parse_args() args.batch_size = 32 args.epochs = 150 args.net = 'densenet169' print(args) config.exp_name = args.exp_name config.make_dir() save_args(args, config.log_dir) # get network if args.net == 'resnet50': net = resnet50(pretrained=True, drop_rate=args.drop_rate) elif args.net == 'resnet101': net = resnet101(pretrained=True, drop_rate=args.drop_rate) elif args.net == 'densenet121': net = models.densenet121(pretrained=True) net.classifier = nn.Sequential(nn.Linear(1024, 1), nn.Sigmoid()) elif args.net == 'densenet169': net = densenet169(pretrained=True, drop_rate=args.drop_rate) elif args.net == 'fusenet': global_branch = torch.load(GLOBAL_BRANCH_DIR)['net'] local_branch = torch.load(LOCAL_BRANCH_DIR)['net'] net = fusenet(global_branch, local_branch) del global_branch, local_branch else: raise NameError net = net.cuda() sess = Session(config, net=net) # get dataloader # train_loader = get_dataloaders('train', batch_size=args.batch_size, # shuffle=True, is_local=args.local) # # valid_loader = get_dataloaders('valid', batch_size=args.batch_size, # shuffle=False, is_local=args.local) train_loader = get_dataloaders('train', batch_size=args.batch_size, num_workers=4, shuffle=True) valid_loader = get_dataloaders('valid', batch_size=args.batch_size, shuffle=False) if args.continue_path and os.path.exists(args.continue_path): sess.load_checkpoint(args.continue_path) # start session clock = sess.clock tb_writer = sess.tb_writer sess.save_checkpoint('start.pth.tar') # set criterion, optimizer and scheduler criterion = nn.BCELoss().cuda() # not used if args.only_fc: optimizer = optim.Adam(sess.net.module.classifier.parameters(), args.lr) else: optimizer = optim.Adam(sess.net.parameters(), args.lr) scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.1, patience=10, verbose=True) # start training for e in range(args.epochs): train_out = train_model(train_loader, sess.net, criterion, optimizer, clock.epoch) valid_out = valid_model(valid_loader, sess.net, criterion, optimizer, clock.epoch) tb_writer.add_scalars('loss', { 'train': train_out['epoch_loss'], 'valid': valid_out['epoch_loss'] }, clock.epoch) tb_writer.add_scalars('acc', { 'train': train_out['epoch_acc'], 'valid': valid_out['epoch_acc'] }, clock.epoch) tb_writer.add_scalar('auc', valid_out['epoch_auc'], clock.epoch) tb_writer.add_scalar('learning_rate', optimizer.param_groups[-1]['lr'], clock.epoch) scheduler.step(valid_out['epoch_auc']) if valid_out['epoch_auc'] > sess.best_val_acc: sess.best_val_acc = valid_out['epoch_auc'] sess.save_checkpoint('best_model.pth.tar') if clock.epoch % 10 == 0: sess.save_checkpoint('epoch{}.pth.tar'.format(clock.epoch)) sess.save_checkpoint('latest.pth.tar') clock.tock()