def train(): log_dir = os.path.join(args.log_dir, args.dataset) if not os.path.exists(log_dir): os.makedirs(log_dir) writer = SummaryWriter(log_dir) saved_checkpoint_dir = os.path.join(args.checkpoint_dir, args.dataset) if not os.path.exists(saved_checkpoint_dir): os.makedirs(saved_checkpoint_dir) for epoch in range(args.epoches + 1): outputs = model(adj, features) loss = get_loss(outputs, y_train, train_mask) val_loss = get_loss(outputs, y_val, val_mask).detach().numpy() model.eval() outputs = model(adj, features) train_accuracy = get_accuracy(outputs, y_train, train_mask) val_accuracy = get_accuracy(outputs, y_val, val_mask) model.train() writer.add_scalars('loss', { 'train_loss': loss.detach().numpy(), 'val_loss': val_loss }, epoch) writer.add_scalars('accuracy', { 'train_ac': train_accuracy, 'val_ac': val_accuracy }, epoch) if epoch % args.log_interval == 0: print( "Epoch: %d, train loss: %f, val loss: %f, train ac: %f, val ac: %f" % (epoch, loss.detach().numpy(), val_loss, train_accuracy, val_accuracy)) if epoch % args.checkpoint_interval == 0: torch.save( model.state_dict(), os.path.join(saved_checkpoint_dir, "gcn_%d.pth" % epoch)) optimizer.zero_grad() # Important loss.backward() optimizer.step() writer.close()
def train(args): set_random_seed(42) if(args.model=='lstm_diff'): model = ULSTMNet(count_channels(args.channels), 1, args.image_size) elif(args.model=='lstm_decoder'): model = Unet_LstmDecoder(count_channels(args.channels), all_masks=args.allmasks) else: print('Unknown LSTM model. Return to the default model.') model = ULSTMNet(count_channels(args.channels), 1, args.image_size) if torch.cuda.is_available(): model.cuda() print('Loading model') model, device = UtilsFactory.prepare_model(model) print(device) optimizer = get_optimizer(args.optimizer, args.lr, model) criterion = get_loss(args.loss) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[10, 40, 80, 150, 300], gamma=0.2 ) save_path = os.path.join( args.logdir, args.name ) os.system(f"mkdir {save_path}") train_df = pd.read_csv(args.train_df) val_df = pd.read_csv(args.val_df) train_dataset = LstmDataset(args.neighbours, train_df, 'train',args.channels, args.dataset_path, args.image_size, args.batch_size, args.allmasks) valid_dataset = LstmDataset(args.neighbours, val_df, 'valid',args.channels, args.dataset_path, args.image_size, args.batch_size, args.allmasks) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=sampler is None, num_workers=args.num_workers, sampler=sampler(train_df)) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=args.num_workers) loaders = collections.OrderedDict() loaders['train'] = train_loader loaders['valid'] = valid_loader runner = SupervisedRunner() if args.model_weights_path: checkpoint = torch.load(args.model_weights_path, map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback() ], logdir=save_path, num_epochs=args.epochs, verbose=True ) infer_loader = collections.OrderedDict([('infer', loaders['valid'])]) runner.infer( model=model, loaders=infer_loader, callbacks=[ CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'), InferCallback() ], ) '''
def train(args): set_random_seed(42) model = get_model(args.network, args.classification_head) print('Loading model') model.encoder.conv1 = nn.Conv2d(count_channels(args.channels) * args.neighbours, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) model, device = UtilsFactory.prepare_model(model) train_df = pd.read_csv(args.train_df).to_dict('records') val_df = pd.read_csv(args.val_df).to_dict('records') ds = Dataset(args.channels, args.dataset_path, args.image_size, args.batch_size, args.num_workers, args.neighbours, args.classification_head) loaders = ds.create_loaders(train_df, val_df) save_path = os.path.join(args.logdir, args.name) optimizer = get_optimizer(args.optimizer, args.lr, model) if not args.classification_head: scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[10, 40, 80, 150, 300], gamma=0.1) criterion = get_loss(args.loss) runner = SupervisedRunner() if args.model_weights_path: checkpoint = torch.load(args.model_weights_path, map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback()], logdir=save_path, num_epochs=args.epochs, verbose=True) infer_loader = collections.OrderedDict([('infer', loaders['valid'])]) runner.infer( model=model, loaders=infer_loader, callbacks=[ CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'), InferCallback() ], ) else: criterion = get_loss('multi') net = Model(model, optimizer, criterion, batch_metrics=[ classification_head_accuracy, segmentation_head_dice ]) net = net.to(device) net.fit_generator(loaders['train'], loaders['valid'], epochs=args.epochs, callbacks=[ ModelCheckpoint( f'{save_path}/checkpoints/best.pth', ), MultiStepLR(milestones=[10, 40, 80, 150, 300], gamma=0.1) ])
def train(args): set_random_seed(42) if(args.model=='unet'): model = Unet(count_channels(args.channels)*args.neighbours, 1) elif(args.model=='unet3d'): model = Unet3D(count_channels(args.channels), 1) elif(args.model=='siamdiff'): model = SiamUnet_diff(count_channels(args.channels), 1) elif(args.model=='siamconc'): model = SiamUnet_conc(count_channels(args.channels), 1) else: print('Unknown siamese model. Return to the default model.') model = Unet(count_channels(args.channels)*2, 1) if torch.cuda.is_available(): model.cuda() print('Loading model') model, device = UtilsFactory.prepare_model(model) print(device) optimizer = get_optimizer(args.optimizer, args.lr, model) criterion = get_loss(args.loss) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[10, 40, 80, 150, 300], gamma=0.2 ) save_path = os.path.join( args.logdir, args.name ) os.system(f"mkdir {save_path}") train_df = pd.read_csv(args.train_df) val_df = pd.read_csv(args.val_df) test_df = pd.read_csv(args.test_df) train_dataset = SiamDataset(args.neighbours, train_df, 'train',args.channels, args.dataset_path, args.image_size, args.batch_size) valid_dataset = SiamDataset(args.neighbours, val_df, 'valid',args.channels, args.dataset_path, args.image_size, args.batch_size) test_dataset = SiamDataset(args.neighbours, test_df, 'test',args.channels, args.dataset_path, args.image_size, args.batch_size) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=sampler is None, num_workers=args.num_workers, sampler=sampler(train_df)) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=args.num_workers) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=args.num_workers) if args.model_weights_path: checkpoint = torch.load(args.model_weights_path, map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) # model training model_trainer = Trainer(model, args.lr, args.batch_size, args.epochs, criterion, optimizer, scheduler, train_loader, valid_loader, test_loader, save_path) if args.mode=='train': model_trainer.start() elif args.mode=='eval': model_trainer.evaluate(args.image_size, args.channels, DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=args.num_workers), phase='train') model_trainer.evaluate(args.image_size, args.channels, DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=args.num_workers), phase='val') model_trainer.evaluate(args.image_size, args.channels, DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=args.num_workers), phase='test') else: print(f'Unknown mode {args.mode}.')