def main(): """Main Function.""" # dataloader parameters gpu = torch.cuda.is_available() train_path = 'data/train_data.txt' valid_path = 'data/valid_data.txt' batch_size = 20 sequence_len = 50 num_workers = 2 # training parameters max_epochs = 200 learning_rate = 1e-4 criterion = nn.CrossEntropyLoss() # get dataloaders dataloaders, dataset_sizes = get_loaders(train_path, valid_path, batch_size, sequence_len, num_workers, gpu) # create network and optimizier net = SingleFrame('VGGNet19') print(net) optimizer = torch.optim.Adam(net.parameters(), learning_rate) # train the network net, val_acc, losses, accuracies = train_network(net, dataloaders, dataset_sizes, batch_size, sequence_len, criterion, optimizer, max_epochs, gpu) print('Best Validation Acc:', val_acc) # plot plot_data(losses, accuracies, 'outputs/online/SingleFramePlots.png') # save network torch.save(net.state_dict(), 'outputs/online/SingleFrameParams.pkl')
def main(): """ Main Function. """ # dataloader parameters data_path = 'data/kitti2012/training' receptive_size = 9 max_disp = 128 batch_size = 5 num_workers = 0 # training parameters learning_rate = 1e-2 max_epochs = 2 criterion = nn.CrossEntropyLoss() # create network net = SiameseNetwork() print(net) # create dataloader dataloaders, dataset_sizes = get_loaders(data_path, receptive_size, max_disp, batch_size, num_workers) # create optimizer p = net.parameters() optimizer = torch.optim.Adagrad(p, learning_rate) # train the network train(net, dataloaders, dataset_sizes, criterion, optimizer, max_epochs)
def train(self, architecture, fold, lr, batch_size, epochs, iter_size, epoch_size=None, validation_size=None, patience=4, optim="adam", ignore_prev_best_loss=False, cached_part=0.0, crop_central=False): train_loader, valid_loader, num_classes = get_loaders(batch_size, train_transform=train_augm(), valid_transform=valid_augm(), n_fold=fold, cached_part=cached_part, crop_central=crop_central) validation_size = len(valid_loader) * batch_size model = get_model(num_classes, architecture) criterion = CrossEntropyLoss(size_average=False) self.ignore_prev_best_loss = ignore_prev_best_loss self.lr = lr self.model = model self.root = Path('../results/{}'.format(architecture)) self.fold = fold self.optim = optim train_kwargs = dict( args=dict(iter_size=iter_size, n_epochs=epochs, batch_size=batch_size, epoch_size=epoch_size), model=model, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, validation_size=validation_size, patience=patience ) self._train(**train_kwargs)
def main(): ''' Main function ''' parser = argparse.ArgumentParser() parser.add_argument('-epoch', type=int, default=20) parser.add_argument('-batch_size', type=int, default=32) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-embedding_size', type=float, default=300) parser.add_argument('-image_hidden_size', type=float, default=2048) parser.add_argument('-log', type=str, default="./log/") parser.add_argument('-save_model', default=True) parser.add_argument('-save_mode', type=str, choices=['all', 'best'], default='best') parser.add_argument('-test_mode', action='store_true', default=False) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-device', type=str, default='0') opt = parser.parse_args() opt.cuda = not opt.no_cuda #========= Loading Dataset =========# torch.manual_seed(1234) training_data, validation_data, test_data, vocab_cap, vocab_tag = dataloader.get_loaders( opt) #========= Preparing Model =========# print(opt) device = torch.device(f'cuda:{opt.device}' if opt.cuda else 'cpu') dan = Ingres2Recipe(len(vocab_cap), len(vocab_tag), opt.embedding_size, opt.image_hidden_size, opt.dropout).to(device) optimizer = optim.Adam( dan.parameters(), # eps=1e-09, lr=0.003) betas=(0.9, 0.98), eps=1e-09, lr=0.003) loss_fn = nn.BCEWithLogitsLoss( F.softmax(torch.FloatTensor(vocab_tag.word_count).to(device), dim=-1) * 10) #loss_fn = nn.BCEWithLogitsLoss() if not opt.test_mode: train(dan, training_data, validation_data, optimizer, loss_fn, device, opt) model_name = 'model.chkpt' checkpoint = torch.load(f"./models/{model_name}", map_location=device) dan.load_state_dict(checkpoint['model']) test(dan, validation_data, loss_fn, device, opt)
def train(self, architecture, fold, lr, batch_size, epochs, epoch_size, validation_size, iter_size, patience=4, optim="adam", ignore_prev_best_loss=False): print("Start training with following params:", f"architecture = {architecture}", f"fold = {fold}", f"lr = {lr}", f"batch_size = {batch_size}", f"epochs = {epochs}", f"epoch_size = {epoch_size}", f"validation_size = {validation_size}", f"iter_size = {iter_size}", f"optim = {optim}", f"patience = {patience}") train_loader, valid_loader, num_classes = get_loaders( batch_size, train_transform=train_augm(), valid_transform=valid_augm(), n_fold=fold) model = get_model(num_classes, architecture) criterion = CrossEntropyLoss(size_average=False) self.ignore_prev_best_loss = ignore_prev_best_loss self.lr = lr self.model = model self.root = Path(f"../results/{architecture}") self.fold = fold self.optim = optim self.train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) self.lr_logger = VisdomPlotLogger( 'line', opts={'title': 'Train Learning Rate'}) self.test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}) self.test_accuracy_logger = VisdomPlotLogger( 'line', opts={'title': 'Test Accuracy'}) train_kwargs = dict(args=dict(iter_size=iter_size, n_epochs=epochs, batch_size=batch_size, epoch_size=epoch_size), model=model, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, validation_size=validation_size, patience=patience) self._train(**train_kwargs)
def preload_data(self, ep, sz, bs, trndir, valdir, **kwargs): # dummy ep var to prevent error """Pre-initializes data-loaders. Use set_data to start using it.""" if sz == 128: val_bs = 512 elif sz == 224: val_bs = 192 else: val_bs = 128 return dataloader.get_loaders(trndir, valdir, bs=bs, val_bs=val_bs, sz=sz, workers=args.workers, distributed=args.distributed, **kwargs)
def main(config): device = torch.device('cpu') if config.gpu < 0 else torch.device( 'cuda:{:d}'.format(config.gpu)) train_data_loader, val_data_loader, test_data_loader = get_loaders(config) model = model_maker(config).to(device) optimizer = optim.Adam(model.parameters()) criterion = nn.NLLLoss() trainer = Trainer(model, optimizer, criterion, config, train_data_loader, val_data_loader) trainer.train(train_data_loader, val_data_loader, config) torch.save({ 'model': trainer.model.state_dict(), 'config': config }, config.model_file_name)
def main(): """Main Function.""" # dataloader parameters gpu = torch.cuda.is_available() train_path = 'data/train_data.txt' valid_path = 'data/valid_data.txt' batch_size = 2 sequence_len = 50 window_size = 5 flow = False num_workers = 2 # network parameters model = 'VGGNet19' rnn_hidden = 512 rnn_layers = 1 # training parameters max_epochs = 1 learning_rate = 1e-4 criterion = nn.CrossEntropyLoss() # get loaders dataloaders, dataset_sizes = get_loaders(train_path, valid_path, batch_size, sequence_len, window_size, flow, num_workers, gpu) # create network and optimizer net = SingleStream(model, rnn_hidden, rnn_layers, pretrained=True) print(net) optimizer = torch.optim.Adam(net.parameters(), learning_rate) # train the network net, val_acc, losses, accuracies = train_network(net, dataloaders, dataset_sizes, batch_size, sequence_len, window_size, criterion, optimizer, max_epochs, gpu) # plot if flow: s_plots = 'outputs/online/SingleStreamFlowPlots.png' s_params = 'outputs/online/SingleStreamFlowParams.pkl' else: s_plots = 'outputs/online/SingleStreamAppPlots.png' s_params = 'outputs/online/SingleStreamAppParams.pkl' # plot plot_data(losses, accuracies, s_plots) # save network torch.save(net.state_dict(), s_params)
def preload_data(self, ep, sz, bs, trndir, valdir, **kwargs): # dummy ep var to prevent error if 'lr' in kwargs: del kwargs['lr'] # in case we mix schedule and data phases """Pre-initializes data-loaders. Use set_data to start using it.""" if sz == 128: val_bs = max(bs, 512) elif sz == 224: val_bs = max(bs, 256) else: val_bs = max(bs, 128) return dataloader.get_loaders(trndir, valdir, bs=bs, val_bs=val_bs, sz=sz, workers=args.workers, distributed=args.distributed, **kwargs)
def main(): """Main Function.""" # dataloader parameters train_path = 'data/train_data.txt' valid_path = 'data/valid_data.txt' batch_size = 1 num_workers = 2 # network parameters models = ['VGGNet11', 'VGGNet16', 'VGGNet19', 'ResNet18', 'ResNet34'] hidden_size = 128 rnn_layers = 1 pretrained = True finetuned = False # training parameters learning_rate = 1e-4 max_epochs = 100 criterion = nn.CrossEntropyLoss() # for each hyper-parameter for i, model in enumerate(models): print('Model:', model) best_acc = 0 # get dataloaders dataloaders, dataset_sizes = get_loaders(train_path, valid_path, batch_size, num_workers, shuffle=True) print('Dataset Sizes:', dataset_sizes) # create network object net = SingleStream(model, hidden_size, rnn_layers, pretrained, finetuned) # create optimizer p = list(net.lstm.parameters()) + list(net.fc.parameters()) optimizer = torch.optim.Adam(p, learning_rate) # train the network net, valid_acc, losses, accuracies = train_network( net, dataloaders, dataset_sizes, criterion, optimizer, max_epochs) # plot statistics print('Best Validation Accuracy:', round(valid_acc * 100, 2)) plot_data(losses, accuracies, 'outputs/SingleStream-{}.png'.format(i)) print() # save best network to disk if valid_acc > best_acc: torch.save(net.state_dict(), 'outputs/SingleStream-net_params.pkl')
def main(): """Main Function.""" # dataloader parameters gpu = torch.cuda.is_available() train_path = 'data/train_data.txt' valid_path = 'data/valid_data.txt' batch_size = 2 sequence_len = 10 num_workers = 2 # network parameters spat_model = 'VGGNet11' temp_model = 'VGGNet11' rnn_hidden = 32 rnn_layers = 1 # training parameters max_epochs = 2 learning_rate = 1e-4 window_size = 5 criterion = nn.CrossEntropyLoss() # get loaders dataloaders, dataset_sizes = get_loaders(train_path, valid_path, batch_size, sequence_len, flow, num_workers, gpu) # create network and optimizer net = TwoStreamFusion(spat_model, temp_model, rnn_hidden, rnn_layers, pretrained=False) print(net) optimizer = torch.optim.Adam(net.parameters(), learning_rate) # train the network net, val_acc, losses, accuracies = train_network( net, dataloaders, dataset_sizes, batch_size, sequence_len - 1, window_size, criterion, optimizer, max_epochs, gpu) # plot plot_data(losss, accuracies, 'outputs/online/TwoStreamPlots.png') # save network torch.save(net.state_dict(), 'outputs/online/TwoStreamParams.pkl')
def main(): """Main Function.""" # dataloaders parameters gpu = torch.cuda.is_available() train_path = 'data/train_data.txt' valid_path = 'data/valid_data.txt' test_path = 'data/test_data.txt' batch_size = 32 num_workers = 2 # network parameters model = 'VGGNet19' rnn_hidden = 512 rnn_layers = 2 # training parameters max_epochs = 100 learning_rate = 1e-4 criterion = nn.CrossEntropyLoss() # create dataloaders dataloaders, dataset_sizes = get_loaders(train_path, valid_path, batch_size, num_workers, gpu=True) print('Dataset Sizes:') print(dataset_sizes) # create network object and optimizer net = SingleStream(model, rnn_hidden, rnn_layers, pretrained=True) print(net) optimizer = torch.optim.Adam(net.parameters(), learning_rate) # train the network net, val_acc, losses, accuracies = train_network(net, dataloaders, dataset_sizes, batch_size, criterion, optimizer, max_epochs, gpu) # plot plot_data(losses, accuracies, 'outputs/offline/SingleStreamPlots.png') # save network torch.save(net.state_dict(), 'outputs/offline/SingleStreamParams.pkl')
def run(setting, n, save_dir, folder, early_stop=True, split=0.75, init_dict=None): name = convert_to_name(setting) model_save_dir = save_dir + '/' history = { "loss": [], "acc": [], "normacc": [], "ipacc": [], "npacc": [], "confmat": [], "best_avg": 0 } mod = model.get_pretrained_model(layer_names=setting["layers"], type_init=setting["init"]).to(device) if init_dict is not None: mod.load_state_dict(init_dict) optim = model.get_optimizer(mod, feature_extract=True, lr=setting["lr"], mom=setting["mom"]) criterion = nn.CrossEntropyLoss() Path(model_save_dir + name + "/" + n).mkdir(parents=True, exist_ok=True) torch.save(mod.state_dict(), model_save_dir + name + "/" + n + '/epoch_0') stop = False if early_stop: dataloaders = dataloader.get_loaders(BATCH_SIZE, split) while not stop: print(stopcrit.checks) mod, valloss, valacc, confmat = traintest.trainepoch( mod, dataloaders, criterion, optim, device) #normalacc, ipacc, npacc = accs_from_confmat(confmat) history["loss"].append(valloss) history["acc"].append(valacc) #history["normacc"].append(normalacc) #history["ipacc"].append(ipacc) #history["npacc"].append(npacc) history["confmat"].append(confmat) stop = stopcrit.check(valacc, mod.state_dict()) else: dataloaders = dataloader.get_loaders(BATCH_SIZE, split) for epoch in range(EPOCHS): if split == 1.0: validate = False else: validate = True mod, valloss, valacc, confmat = traintest.trainepoch( mod, dataloaders, criterion, optim, device, validate) if valloss is not None: #normalacc, ipacc, npacc = accs_from_confmat(confmat) history["loss"].append(valloss) history["acc"].append(valacc) #history["normacc"].append(normalacc) #history["ipacc"].append(ipacc) #history["npacc"].append(npacc) history["confmat"].append(confmat) stop = stopcrit.check(valacc, mod.state_dict()) if split != 1.0: history["best_avg"] = stopcrit.last_avg torch.save( stopcrit.best_model_dict, model_save_dir + name + "/" + n + '/epoch_' + str(stopcrit.best_check)) plot_run(name, n, history, folder) best_acc = stopcrit.best_val best_epoch = stopcrit.best_check stopcrit.reset() else: torch.save(mod.state_dict(), model_save_dir + name + "/" + n + "/epoch_" + str(EPOCHS)) best_acc = None best_epoch = None return history, best_acc, best_epoch
from pathlib import Path import random import os import dataloader import model import traintest from stopping_criterion import StoppingCriterion import matplotlib.pyplot as plt import pickle import torch.nn.functional as F setting = {"layers": ["layer4"], "init": "normal", "lr": 0.01, "mom": 0.01} torch.manual_seed(9) random.seed(9) test_dataloader = dataloader.get_loaders(1)["test"] stopcrit = StoppingCriterion(20, 5) RUNS = 30 EPOCHS = 75 BATCH_SIZE = 16 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') def convert_to_name(setting): return str(setting["layers"]) + '_' + setting["init"] + "_" + str( setting["lr"]) + "_" + str(setting["mom"])
def run(args: argparse.Namespace) -> None: # save args to dict d = vars(args) d['time'] = str(datetime.datetime.now()) save_dict_to_file(d,args.workdir) temperature: float = 0.1 n_class: int = args.n_class metric_axis: List = args.metric_axis lr: float = args.l_rate dtype = eval(args.dtype) # Proper params savedir: str = args.workdir n_epoch: int = args.n_epoch net, optimizer, device, loss_fns, loss_weights, loss_fns_source, loss_weights_source, scheduler = setup(args, n_class, dtype) print(f'> Loss weights cons: {loss_weights}, Loss weights source:{loss_weights_source}') shuffle = False #if args.mix: # shuffle = True #print("args.dataset",args.dataset) loader, loader_val = get_loaders(args, args.dataset,args.source_folders, args.batch_size, n_class, args.debug, args.in_memory, dtype, False,fix_size=[0,0]) target_loader, target_loader_val = get_loaders(args, args.target_dataset,args.target_folders, args.batch_size, n_class, args.debug, args.in_memory, dtype, shuffle,fix_size=[0,0]) num_steps = n_epoch * len(loader) #print(num_steps) print("metric axis",metric_axis) best_dice_pos: Tensor = np.zeros(1) best_dice: Tensor = np.zeros(1) best_2d_dice: Tensor = np.zeros(1) best_3d_dice: Tensor = np.zeros(1) best_3d_dice_source: Tensor = np.zeros(1) print("Results saved in ", savedir) print(">>> Starting the training") for i in range(n_epoch): tra_losses_vec, tra_target_vec,tra_source_vec = do_epoch(args, "train", net, device, loader, i, loss_fns, loss_weights, loss_fns_source, loss_weights_source, args.resize, num_steps, n_class, metric_axis, savedir="", optimizer=optimizer, target_loader=target_loader) with torch.no_grad(): val_losses_vec, val_target_vec,val_source_vec = do_epoch(args, "val", net, device, loader_val, i, loss_fns, loss_weights, loss_fns_source, loss_weights_source, args.resize, num_steps, n_class,metric_axis, savedir=savedir, target_loader=target_loader_val) #if i == 0: # keep_tra_baseline_target_vec = tra_baseline_target_vec # keep_val_baseline_target_vec = val_baseline_target_vec # print(keep_val_baseline_target_vec) # print(val_target_vec) # df_t_tmp = pd.DataFrame({ # "val_dice_3d": [val_target_vec[0]], # "val_dice_3d_sd": [val_target_vec[1]]}) df_s_tmp = pd.DataFrame({ "tra_dice_3d": [tra_source_vec[0]], "tra_dice_3d_sd": [tra_source_vec[1]], "val_dice_3d": [val_source_vec[0]], "val_dice_3d_sd": [val_source_vec[1]]}) if i == 0: df_s = df_s_tmp else: df_s = df_s.append(df_s_tmp) df_s.to_csv(Path(savedir, "_".join((args.source_folders.split("'")[1],"source", args.csv))), float_format="%.4f", index_label="epoch") df_t_tmp = pd.DataFrame({ "tra_loss_inf":[tra_losses_vec[0]], "tra_loss_cons":[tra_losses_vec[1]], "tra_loss_fs":[tra_losses_vec[2]], "val_loss_inf":[val_losses_vec[0]], "val_loss_cons":[val_losses_vec[1]], "val_loss_fs":[val_losses_vec[2]], "tra_dice_3d": [tra_target_vec[0]], "tra_dice_3d_sd": [tra_target_vec[1]], "tra_dice": [tra_target_vec[2]], "val_dice_3d": [val_target_vec[0]], "val_dice_3d_sd": [val_target_vec[1]], 'val_dice': [val_target_vec[2]]}) if i == 0: df_t = df_t_tmp else: df_t = df_t.append(df_t_tmp) df_t.to_csv(Path(savedir, "_".join((args.target_folders.split("'")[1],"target", args.csv))), float_format="%.4f", index_label="epoch") # Save model if better current_val_target_2d_dice = val_target_vec[2] ''' if current_val_target_2d_dice > best_2d_dice: best_epoch = i best_2d_dice = current_val_target_2d_dice with open(Path(savedir, "best_epoch_2.txt"), 'w') as f: f.write(str(i)) best_folder_2d = Path(savedir, "best_epoch_2d") if best_folder_2d.exists(): rmtree(best_folder_2d) copytree(Path(savedir, f"iter{i:03d}"), Path(best_folder_2d)) torch.save(net, Path(savedir, "best_2d.pkl")) ''' current_val_target_3d_dice = val_target_vec[0] if current_val_target_3d_dice > best_3d_dice: best_epoch = i best_3d_dice = current_val_target_3d_dice with open(Path(savedir, "best_epoch_3d.txt"), 'w') as f: f.write(str(i)) best_folder_3d = Path(savedir, "best_epoch_3d") if best_folder_3d.exists(): rmtree(best_folder_3d) copytree(Path(savedir, f"iter{i:03d}"), Path(best_folder_3d)) torch.save(net, Path(savedir, "best_3d.pkl")) #Save source model if better current_val_source_3d_dice = val_source_vec[0] if current_val_source_3d_dice > best_3d_dice_source: best_epoch = i best_3d_dice_s = current_val_source_3d_dice with open(Path(savedir, "best_epoch_3d_source.txt"), 'w') as f: f.write(str(i)) torch.save(net, Path(savedir, "best_3d_source.pkl")) if i == n_epoch - 1: with open(Path(savedir, "last_epoch.txt"), 'w') as f: f.write(str(i)) last_folder = Path(savedir, "last_epoch") if last_folder.exists(): rmtree(last_folder) copytree(Path(savedir, f"iter{i:03d}"), Path(last_folder)) torch.save(net, Path(savedir, "last.pkl")) # remove images from iteration rmtree(Path(savedir, f"iter{i:03d}")) if args.flr==False: #adjust_learning_rate(optimizer, i, args.l_rate, n_epoch, 0.9) exp_lr_scheduler(optimizer, i, args.lr_decay) print("Results saved in ", savedir)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=int, default=0, help='0: original dataset, 1: re-split dataset') parser.add_argument('--train_emb', action='store_true', help='Train word embedding for SQLNet(requires pretrained model).') parser.add_argument('--resume', default=None, help='resume from pretrained model.') parser.add_argument('--epoch', type=int, default=20, help='number of epoches') parser.add_argument('--batch_size', type=int, default=2, help='batch size') parser.add_argument('--logging_step', type=int, default=50, help='logging step') parser.add_argument('--lr_update', type=int, default=10, help='lr update') parser.add_argument('--learning_rate', type=float, default=1e-3, help='lr update') parser.add_argument('--prefix', type=str, default='bS2_', help='prefix of saved model') parser.add_argument('--withtab', type=int, default=1, help='sample from content vector') parser.add_argument('--teacher_forcing_fraction', type=float, default=1.0, help='fraction of batches that will use teacher forcing during training') parser.add_argument('--scheduled_teacher_forcing', action='store_true', help='Linearly decrease the teacher forcing fraction ' 'from 1.0 to 0.0 over the specified number of epocs') args = parser.parse_args() if args.scheduled_teacher_forcing: schedule = np.arange(1.0, 0.0, -1.0/args.epoch) else: schedule = np.ones(args.epoch) * args.teacher_forcing_fraction train_loader, val_loader = data.get_loaders(args.batch_size, 8) test_loader=data.get_test_loader('test', args.batch_size, 8) if args.withtab: model = QG() else: model = QG() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['model']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, start_epoch)) model.teach_forcing=0.0 model.mask_tf=True print('dev set') validate(args, val_loader, model) print('test set') validate(args, test_loader, model) else: print("=> no checkpoint found at '{}'".format(args.resume))
outputs, attention = model(inputs, None, None) outputs = torch.transpose(outputs, 0, 1) zip_list = zip(outputs.tolist(), order_list) sorted_list = sorted(zip_list, key=lambda x: x[1], reverse=False) output = torch.tensor([x[0] for x in sorted_list]) for i in range(output.size(0)): end_pos = output[i].tolist().index(dl.char2idx['?']) if dl.char2idx['?'] in output[i].tolist() \ else output[i].size(0) pred = "".join(dl.idx2char[o] for o in output[i].tolist()[:end_pos]) pred_final.append(pred) result = pd.DataFrame(np.array(pred_final)) result.columns = ['Predicted'] result.to_csv('submission.csv', index_label='Id') if __name__ == "__main__": train_loader, val_loader, test_loader = dl.get_loaders() model = models.Seq2Seq(40, 128, dl.get_char_length()) model = model.to(device) if train_mode: train(model, train_loader, val_loader) with torch.no_grad(): run_test(model, test_loader)
def run(args: argparse.Namespace) -> None: # save args to dict d = vars(args) d['time'] = str(datetime.datetime.now()) d['server']=platform.node() save_dict_to_file(d,args.workdir) temperature: float = 0.1 n_class: int = args.n_class metric_axis: List = args.metric_axis lr: float = args.l_rate dtype = eval(args.dtype) # Proper params savedir: str = args.workdir n_epoch: int = args.n_epoch net, optimizer, device, loss_fns, loss_weights, scheduler, n_epoch = setup(args, n_class, dtype) shuffle = True print(args.target_folders) target_loader, target_loader_val = get_loaders(args, args.target_dataset,args.target_folders, args.batch_size, n_class, args.debug, args.in_memory, dtype, shuffle, "target", args.val_target_folders) print("metric axis",metric_axis) best_dice_pos: Tensor = np.zeros(1) best_dice: Tensor = np.zeros(1) best_hd3d_dice: Tensor = np.zeros(1) best_3d_dice: Tensor = 0 best_2d_dice: Tensor = 0 print("Results saved in ", savedir) print(">>> Starting the training") for i in range(n_epoch): if args.mode =="makeim": with torch.no_grad(): val_losses_vec, val_target_vec,val_source_vec = do_epoch(args, "val", net, device, i, loss_fns, loss_weights, args.resize, n_class,metric_axis, savedir=savedir, target_loader=target_loader_val, best_dice3d_val=best_3d_dice) tra_losses_vec = val_losses_vec tra_target_vec = val_target_vec tra_source_vec = val_source_vec else: tra_losses_vec, tra_target_vec,tra_source_vec = do_epoch(args, "train", net, device, i, loss_fns, loss_weights, args.resize, n_class, metric_axis, savedir=savedir, optimizer=optimizer, target_loader=target_loader, best_dice3d_val=best_3d_dice) with torch.no_grad(): val_losses_vec, val_target_vec,val_source_vec = do_epoch(args, "val", net, device, i, loss_fns, loss_weights, args.resize, n_class,metric_axis, savedir=savedir, target_loader=target_loader_val, best_dice3d_val=best_3d_dice) current_val_target_3d_dice = val_target_vec[0] if args.dice_3d: if current_val_target_3d_dice > best_3d_dice: best_3d_dice = current_val_target_3d_dice with open(Path(savedir, "3dbestepoch.txt"), 'w') as f: f.write(str(i)+','+str(best_3d_dice)) best_folder_3d = Path(savedir, "best_epoch_3d") if best_folder_3d.exists(): rmtree(best_folder_3d) if args.saveim: copytree(Path(savedir, f"iter{i:03d}"), Path(best_folder_3d)) torch.save(net, Path(savedir, "best_3d.pkl")) if not(i % 10) : print("epoch",str(i),savedir,'best 3d dice',best_3d_dice) torch.save(net, Path(savedir, "epoch_"+str(i)+".pkl")) if i == n_epoch - 1: with open(Path(savedir, "last_epoch.txt"), 'w') as f: f.write(str(i)) last_folder = Path(savedir, "last_epoch") if last_folder.exists(): rmtree(last_folder) if args.saveim: copytree(Path(savedir, f"iter{i:03d}"), Path(last_folder)) torch.save(net, Path(savedir, "last.pkl")) # remove images from iteration if args.saveim: rmtree(Path(savedir, f"iter{i:03d}")) if args.source_metrics: df_s_tmp = pd.DataFrame({ "val_dice_3d": [val_source_vec[0]], "val_dice_3d_sd": [val_source_vec[1]], "val_dice_2d": [val_source_vec[2]]}) if i == 0: df_s = df_s_tmp else: df_s = df_s.append(df_s_tmp) df_s.to_csv(Path(savedir, "_".join((args.source_folders.split("'")[1],"source", args.csv))), float_format="%.4f", index_label="epoch") df_t_tmp = pd.DataFrame({ "epoch":i, "tra_loss_s":[tra_losses_vec[0]], "tra_loss_cons":[tra_losses_vec[1]], "tra_loss_tot":[tra_losses_vec[2]], "tra_size_mean":[tra_losses_vec[3]], "tra_size_mean_pos":[tra_losses_vec[4]], "val_loss_s":[val_losses_vec[0]], "val_loss_cons":[val_losses_vec[1]], "val_loss_tot":[val_losses_vec[2]], "val_size_mean":[val_losses_vec[3]], "val_size_mean_pos":[val_losses_vec[4]], "val_gt_size_mean":[val_losses_vec[5]], "val_gt_size_mean_pos":[val_losses_vec[6]], 'tra_dice': [tra_target_vec[4]], 'val_asd': [val_target_vec[2]], 'val_asd_sd': [val_target_vec[3]], 'val_hd': [val_target_vec[4]], 'val_hd_sd': [val_target_vec[5]], 'val_dice': [val_target_vec[6]], "val_dice_3d_sd": [val_target_vec[1]], "val_dice_3d": [val_target_vec[0]]}) if i == 0: df_t = df_t_tmp else: df_t = df_t.append(df_t_tmp) df_t.to_csv(Path(savedir, "_".join((args.target_folders.split("'")[1],"target", args.csv))), float_format="%.4f", index=False) if args.flr==False: exp_lr_scheduler(optimizer, i, args.lr_decay,args.lr_decay_epoch) print("Results saved in ", savedir, "best 3d dice",best_3d_dice)
def main(): ''' Main function ''' parser = argparse.ArgumentParser() parser.add_argument('-epoch', type=int, default=20) parser.add_argument('-batch_size', type=int, default=32) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-embedding_size', type=float, default=300) parser.add_argument('-learning_rate', type=float, default=0.0003) parser.add_argument('-name', type=str, default=None, choices=['all', 'all_but_discharge', 'physician', 'discharge', 'physician_nursing']) parser.add_argument('-task', type=str, default=None, choices=['mortality', 'readmission']) parser.add_argument('-data_name', type=str, default=None) parser.add_argument('-period', type=str, choices=['24', '48', 'retro']) parser.add_argument('-segment', type=str, default=None) parser.add_argument('-text_length', type=int, help='text length', default=None) parser.add_argument('-feature', action='store_true', default=False) parser.add_argument('-text', action='store_true', default=False) parser.add_argument('-log', type=str, default="/data/joe/physician_notes/Deep-Average-Network/log/") parser.add_argument('-save_model', default=True) parser.add_argument('-save_mode', type=str, choices=['all', 'best'], default='best') parser.add_argument('-test_mode', action='store_true', default=False) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-device', type=str, default='0') opt = parser.parse_args() opt.cuda = not opt.no_cuda opt.compare_note = None #========= Loading Dataset =========# torch.manual_seed(1234) training_data, validation_data, test_data, vocab, feature_len = dataloader.get_loaders(opt, is_test=opt.test_mode, is_feature = opt.feature) #========= Preparing Model =========# print(opt) device = torch.device(f'cuda:{opt.device}' if opt.cuda else 'cpu') dan = DAN(len(vocab), opt.embedding_size, feature_len, opt.dropout, opt.feature, opt.text).to(device) loss_fn = nn.CrossEntropyLoss() model_name = opt.task+'_'+opt.name +'_'+ opt.period + '.chkpt' if opt.text: model_name = "text_" + model_name if opt.feature: model_name = "feature_" + model_name checkpoint = torch.load(f"/data/joe/physician_notes/Deep-Average-Network/models/{model_name}", map_location=device) dan.load_state_dict(checkpoint['model']) results = {} for note_id in Constants.note_type[opt.name]: opt.compare_note = note_id training_data, validation_data, test_data, vocab, feature_len = dataloader.get_loaders(opt, is_test=opt.test_mode, is_feature = opt.feature) res = test(dan, test_data, loss_fn, device, opt) results[note_id] = res #predict_prob(dan, test_data, loss_fn, device, opt) TEST_NOTE_PATH = f"/data/joe/physician_notes/mimic-data/{opt.task}/{opt.name}_note_test_{opt.period}.csv" test_file = pd.read_csv(TEST_NOTE_PATH) df = pd.DataFrame(results) df.insert(0,'stay', test_file['stay']) if not os.path.exists('/home/joe/physician_notes/models/DeepAverageNetwork/compare_notes/'): os.mkdir('/home/joe/physician_notes/models/DeepAverageNetwork/compare_notes/') model_name = opt.task+'_'+opt.name +'_'+ opt.period + '.csv' if opt.text: model_name = "text_" + model_name if opt.feature: model_name = "feature_" + model_name if opt.segment: model_name = opt.segment+ "_" + model_name df.to_csv(f'/home/joe/physician_notes/models/DeepAverageNetwork/compare_notes/{model_name}', index=False)
def run(args: argparse.Namespace) -> Dict[str, Tensor]: n_class: int = args.n_class lr: float = args.l_rate savedir: str = args.workdir n_epoch: int = args.n_epoch val_f: int = args.val_loader_id loss_fns: List[List[Callable]] loss_weights: List[List[float]] net, optimizer, device, loss_fns, loss_weights, scheduler = setup(args, n_class) train_loaders: List[DataLoader] val_loaders: List[DataLoader] train_loaders, val_loaders = get_loaders(args, args.dataset, args.batch_size, n_class, args.debug, args.in_memory) n_tra: int = sum(len(tr_lo.dataset) for tr_lo in train_loaders) # Number of images in dataset l_tra: int = sum(len(tr_lo) for tr_lo in train_loaders) # Number of iteration per epc: different if batch_size > 1 n_val: int = sum(len(vl_lo.dataset) for vl_lo in val_loaders) l_val: int = sum(len(vl_lo) for vl_lo in val_loaders) n_loss: int = max(map(len, loss_fns)) best_dice: Tensor = torch.zeros(1).to(device).type(torch.float32) best_epoch: int = 0 metrics = {"val_dice": torch.zeros((n_epoch, n_val, n_class), device=device).type(torch.float32), "val_batch_dice": torch.zeros((n_epoch, l_val, n_class), device=device).type(torch.float32), "val_loss": torch.zeros((n_epoch, l_val, len(loss_fns[val_f])), device=device).type(torch.float32), "tra_dice": torch.zeros((n_epoch, n_tra, n_class), device=device).type(torch.float32), "tra_batch_dice": torch.zeros((n_epoch, l_tra, n_class), device=device).type(torch.float32), "tra_loss": torch.zeros((n_epoch, l_tra, n_loss), device=device).type(torch.float32)} if args.compute_haussdorf: metrics["val_haussdorf"] = torch.zeros((n_epoch, n_val, n_class), device=device).type(torch.float32) if args.compute_miou: metrics["val_mIoUs"] = torch.zeros((n_epoch, n_class), device=device).type(torch.float32) metrics["tra_mIoUs"] = torch.zeros((n_epoch, n_class), device=device).type(torch.float32) print("\n>>> Starting the training") for i in range(n_epoch): # Do training and validation loops tra_loss, tra_dice, tra_batch_dice, _, tra_mIoUs = do_epoch("train", net, device, train_loaders, i, loss_fns, loss_weights, n_class, savedir=savedir if args.save_train else "", optimizer=optimizer, metric_axis=args.metric_axis, compute_miou=args.compute_miou, temperature=args.temperature) with torch.no_grad(): val_loss, val_dice, val_batch_dice, val_haussdorf, val_mIoUs = do_epoch("val", net, device, val_loaders, i, [loss_fns[val_f]], [loss_weights[val_f]], n_class, savedir=savedir, metric_axis=args.metric_axis, compute_haussdorf=args.compute_haussdorf, compute_miou=args.compute_miou, temperature=args.temperature) # Sort and save the metrics for k in metrics: assert metrics[k][i].shape == eval(k).shape, (metrics[k][i].shape, eval(k).shape, k) metrics[k][i] = eval(k) for k, e in metrics.items(): np.save(Path(savedir, f"{k}.npy"), e.cpu().numpy()) df = pd.DataFrame({"tra_loss": metrics["tra_loss"].mean(dim=(1, 2)).cpu().numpy(), "val_loss": metrics["val_loss"].mean(dim=(1, 2)).cpu().numpy(), "tra_dice": metrics["tra_dice"][:, :, -1].mean(dim=1).cpu().numpy(), "val_dice": metrics["val_dice"][:, :, -1].mean(dim=1).cpu().numpy(), "tra_batch_dice": metrics["tra_batch_dice"][:, :, -1].mean(dim=1).cpu().numpy(), "val_batch_dice": metrics["val_batch_dice"][:, :, -1].mean(dim=1).cpu().numpy()}) df.to_csv(Path(savedir, args.csv), float_format="%.4f", index_label="epoch") # Save model if better current_dice: Tensor = val_dice[:, args.metric_axis].mean() if current_dice > best_dice: best_epoch = i best_dice = current_dice if args.compute_haussdorf: best_haussdorf = val_haussdorf[:, args.metric_axis].mean() with open(Path(savedir, "best_epoch.txt"), 'w') as f: f.write(str(i)) best_folder = Path(savedir, "best_epoch") if best_folder.exists(): rmtree(best_folder) copytree(Path(savedir, f"iter{i:03d}"), Path(best_folder)) torch.save(net, Path(savedir, "best.pkl")) optimizer, loss_fns, loss_weights = scheduler(i, optimizer, loss_fns, loss_weights) # if args.schedule and (i > (best_epoch + 20)): if args.schedule and (i % (best_epoch + 20) == 0): # Yeah, ugly but will clean that later for param_group in optimizer.param_groups: lr *= 0.5 param_group['lr'] = lr print(f'>> New learning Rate: {lr}') if i > 0 and not (i % 5): maybe_hauss = f', Haussdorf: {best_haussdorf:.3f}' if args.compute_haussdorf else '' print(f">> Best results at epoch {best_epoch}: DSC: {best_dice:.3f}{maybe_hauss}") # Because displaying the results at the end is actually convenient maybe_hauss = f', Haussdorf: {best_haussdorf:.3f}' if args.compute_haussdorf else '' print(f">> Best results at epoch {best_epoch}: DSC: {best_dice:.3f}{maybe_hauss}") for metric in metrics: if "val" in metric or "loss" in metric: # Do not care about training values, nor the loss (keep it simple) print(f"\t{metric}: {metrics[metric][best_epoch].mean(dim=0)}") return metrics
def run(args: argparse.Namespace) -> None: # Hardcoded shitz n_class: int = args.n_class lr: float = args.l_rate # Proper params savedir: str = args.workdir n_epoch: int = args.n_epoch net, optimizer, device, loss_fns, loss_weights, scheduler = setup( args, n_class) train_loader, val_loader = get_loaders(args, args.dataset, args.batch_size, n_class, args.debug, args.in_memory) n_tra: int = len(train_loader.dataset) # Number of images in dataset l_tra: int = len( train_loader ) # Number of iteration per epoch: different if batch_size > 1 n_val: int = len(val_loader.dataset) l_val: int = len(val_loader) best_dice: Tensor = torch.zeros(1).to(device).type(torch.float32) best_dice_2: Tensor = torch.zeros(1).to(device).type(torch.float32) best_epoch: int = 0 metrics = { "val_dice": torch.zeros((n_epoch, n_val, n_class), device=device).type(torch.float32), "val_batch_dice": torch.zeros((n_epoch, l_val, n_class), device=device).type(torch.float32), "val_loss": torch.zeros((n_epoch, l_val), device=device).type(torch.float32), "tra_dice": torch.zeros((n_epoch, n_tra, n_class), device=device).type(torch.float32), "tra_batch_dice": torch.zeros((n_epoch, l_tra, n_class), device=device).type(torch.float32), "tra_loss": torch.zeros((n_epoch, l_tra), device=device).type(torch.float32) } if args.compute_haussdorf: metrics["val_haussdorf"] = torch.zeros( (n_epoch, n_val, n_class), device=device).type(torch.float32) print(">>> Starting the training") for i in range(n_epoch): # Do training and validation loops tra_loss, tra_dice, tra_batch_dice, _ = do_epoch( "train", net, device, train_loader, i, loss_fns, loss_weights, n_class, optimizer=optimizer, metric_axis=args.metric_axis) with torch.no_grad(): val_loss, val_dice, val_batch_dice, val_haussdorf = do_epoch( "val", net, device, val_loader, i, loss_fns, loss_weights, n_class, savedir=savedir, metric_axis=args.metric_axis, compute_haussdorf=args.compute_haussdorf) # Sort and save the metrics for k in metrics: assert metrics[k][i].shape == eval(k).shape, (metrics[k][i].shape, eval(k).shape) metrics[k][i] = eval(k) for k, e in metrics.items(): np.save(Path(savedir, f"{k}.npy"), e.cpu().numpy()) df = pd.DataFrame({ "tra_loss": metrics["tra_loss"].mean(dim=1).cpu().numpy(), "val_loss": metrics["val_loss"].mean(dim=1).cpu().numpy(), "tra_dice": metrics["tra_dice"][:, :, -1].mean(dim=1).cpu().numpy(), "val_dice": metrics["val_dice"][:, :, -1].mean(dim=1).cpu().numpy(), "tra_batch_dice": metrics["tra_batch_dice"][:, :, -1].mean(dim=1).cpu().numpy(), "val_batch_dice": metrics["val_batch_dice"][:, :, -1].mean(dim=1).cpu().numpy() }) df.to_csv(Path(savedir, args.csv), float_format="%.4f", index_label="epoch") # Save model if better current_dice: Tensor = val_dice[:, args.metric_axis].mean() if current_dice > best_dice: best_epoch = i best_dice = current_dice if args.compute_haussdorf: best_haussdorf = val_haussdorf[:, args.metric_axis].mean() with open(Path(savedir, "best_epoch.txt"), 'w') as f: f.write(str(i)) best_folder = Path(savedir, "best_epoch") if best_folder.exists(): rmtree(best_folder) copytree(Path(savedir, f"iter{i:03d}"), Path(best_folder)) torch.save(net, Path(savedir, "best.pkl")) if current_dice > best_dice_2 and i >= 20: best_epoch = i best_dice_2 = current_dice with open(Path(savedir, "best_epoch2.txt"), 'w') as f: f.write(str(i)) f.write(':') f.write(str(best_dice_2)) torch.save(net, Path(savedir, "best2.pkl")) optimizer, loss_fns, loss_weights = scheduler(i, optimizer, loss_fns, loss_weights) # if args.schedule and (i > (best_epoch + 20)): if args.schedule and (i % (best_epoch + 20) == 0): # Yeah, ugly but will clean that later for param_group in optimizer.param_groups: lr *= 0.5 param_group['lr'] = lr print(f'> New learning Rate: {lr}') if i > 0 and not (i % 5): maybe_hauss = f', Haussdorf: {best_haussdorf:.3f}' if args.compute_haussdorf else '' print( f"> Best results at epoch {best_epoch}: DSC: {best_dice:.3f}{maybe_hauss}" )
def run(args: argparse.Namespace) -> None: # save args to dict d = vars(args) d['time'] = str(datetime.datetime.now()) save_dict_to_file(d,args.workdir) n_class: int = args.n_class lr: float = args.l_rate dtype = eval(args.dtype) # Proper params savedir: str = args.workdir n_epoch: int = args.n_epoch net, optimizer, device, loss_fns, loss_weights, loss_fns_source, loss_weights_source, scheduler = setup(args, n_class, dtype) print(f'> Loss weights cons: {loss_weights}, Loss weights source:{loss_weights_source}, Loss weights adv: {args.lambda_adv_target}') shuffle = False if args.mix: shuffle = True loader, loader_val = get_loaders(args, args.dataset,args.folders, args.batch_size, n_class, args.debug, args.in_memory, dtype, False) target_loader, target_loader_val = get_loaders(args, args.target_dataset,args.target_folders, args.batch_size, n_class, args.debug, args.in_memory, dtype, shuffle) n_tra: int = len(loader.dataset) # Number of images in dataset l_tra: int = len(loader) # Number of iteration per epoch: different if batch_size > 1 n_val: int = len(loader_val.dataset) l_val: int = len(loader_val) num_steps = n_epoch * len(loader) best_dice_pos: Tensor = np.zeros(1) best_dice: Tensor = np.zeros(1) best_3d_dice: Tensor = np.zeros(1) print(">>> Starting the training") for i in range(n_epoch): # Do training and validation loops tra_losses_vec, tra_source_vec, tra_target_vec, tra_baseline_target_vec = do_epoch(args, "train", net, device, loader, i, loss_fns, loss_weights, loss_fns_source, loss_weights_source, args.resize, num_steps, n_class, savedir=savedir, optimizer=optimizer, target_loader=target_loader, lambda_adv_target = args.lambda_adv_target) with torch.no_grad(): val_losses_vec, val_source_vec, val_target_vec, val_baseline_target_vec = do_epoch(args, "val", net, device, loader_val, i, loss_fns, loss_weights,loss_fns_source, loss_weights_source, args.resize, num_steps, n_class, savedir=savedir, target_loader=target_loader_val, lambda_adv_target=args.lambda_adv_target ) if i == 0: keep_tra_baseline_target_vec = tra_baseline_target_vec keep_val_baseline_target_vec = val_baseline_target_vec df_s_tmp = pd.DataFrame({"tra_dice": tra_source_vec[0], "tra_dice_pos": tra_source_vec[1], "tra_dice_neg": tra_source_vec[2], "tra_dice_3d": tra_source_vec[3], "tra_dice_3d_sd": tra_source_vec[4], "tra_haussdorf": tra_source_vec[5], "tra_loss_seg": tra_losses_vec[0], "tra_loss_adv": tra_losses_vec[1], "tra_loss_inf": tra_losses_vec[2], "tra_loss_cons": tra_losses_vec[3], "tra_loss_D": tra_losses_vec[4], "val_dice": val_source_vec[0], "val_dice_pos": val_source_vec[1], "val_dice_neg": val_source_vec[2], "val_dice_3d": val_source_vec[3], "val_dice_3d_sd": val_source_vec[4], "val_haussdorf": val_source_vec[5], "val_loss_seg": val_losses_vec[0]}, index=[i]) df_t_tmp = pd.DataFrame({ "tra_dice": tra_target_vec[0], "tra_dice_pos": tra_target_vec[1], "tra_dice_neg": tra_target_vec[2], "tra_dice_3d": tra_target_vec[3], "tra_dice_3d_sd": tra_target_vec[4], "tra_haussdorf": tra_target_vec[5], "tra_dice_3d_baseline": keep_tra_baseline_target_vec[0], "tra_dice_3d_sd_baseline": keep_tra_baseline_target_vec[1], "val_dice": val_target_vec[0], "val_dice_pos": val_target_vec[1], "val_dice_neg": val_target_vec[2], "val_dice_3d": val_target_vec[3], "val_dice_3d_sd": val_target_vec[4], "val_haussdorf": val_target_vec[5], "val_dice_3d_baseline": keep_val_baseline_target_vec[0], "val_dice_3d_sd_baseline": keep_val_baseline_target_vec[1]}, index=[i]) if i == 0: df_s = df_s_tmp df_t = df_t_tmp else: df_s = df_s.append(df_s_tmp) df_t = df_t.append(df_t_tmp) df_s.to_csv(Path(savedir, args.csv), float_format="%.4f", index_label="epoch") df_t.to_csv(Path(savedir, "_".join(("target", args.csv))), float_format="%.4f", index_label="epoch") # Save model if better current_val_target_3d_dice = val_target_vec[3] if current_val_target_3d_dice > best_3d_dice: best_epoch = i best_3d_dice = current_val_target_3d_dice with open(Path(savedir, "best_epoch_3d.txt"), 'w') as f: f.write(str(i)) best_folder_3d = Path(savedir, "best_epoch_3d") if best_folder_3d.exists(): rmtree(best_folder_3d) copytree(Path(savedir, f"iter{i:03d}"), Path(best_folder_3d)) torch.save(net, Path(savedir, "best_3d.pkl")) # remove images from iteration rmtree(Path(savedir, f"iter{i:03d}")) if args.scheduler: optimizer, loss_fns, loss_weights = scheduler(i, optimizer, loss_fns, loss_weights) if (i % (best_epoch + 20) == 0) and i > 0 : for param_group in optimizer.param_groups: lr *= 0.5 param_group['lr'] = lr print(f'> New learning Rate: {lr}')
def main(): ''' Main function ''' parser = argparse.ArgumentParser() parser.add_argument('-epoch', type=int, default=20) parser.add_argument('-batch_size', type=int, default=32) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-embedding_size', type=float, default=300) parser.add_argument('-learning_rate', type=float, default=0.0003) parser.add_argument('-name', type=str, default=None, choices=[ 'all', 'all_but_discharge', 'physician', 'discharge', 'physician_nursing' ]) parser.add_argument('-task', type=str, default=None, choices=['mortality', 'readmission']) parser.add_argument('-data_name', type=str, default=None) parser.add_argument('-period', type=str, choices=['24', '48', 'retro']) parser.add_argument('-data_dir', type=str, required=True) parser.add_argument('-feature', action='store_true', default=False) parser.add_argument('-text', action='store_true', default=False) parser.add_argument('-compare_note', action='store_true', default=False) parser.add_argument('-text_length', action='store_true', default=False) parser.add_argument('-segment', type=str, default=None) parser.add_argument( '-log', type=str, default="/data/joe/physician_notes/Deep-Average-Network/log/") parser.add_argument('-save_model', default=True) parser.add_argument('-save_mode', type=str, choices=['all', 'best'], default='best') parser.add_argument('-test_mode', action='store_true', default=False) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-device', type=str, default='0') opt = parser.parse_args() opt.cuda = not opt.no_cuda opt.log = f"{opt.data_dir}/Deep-Average-Network/log/" if not os.path.exists(opt.log): os.mkdir(opt.log) # ========= Loading Dataset ========= # torch.manual_seed(1234) training_data, validation_data, test_data, vocab, feature_len = dataloader.get_loaders( opt, is_test=opt.test_mode, is_feature=opt.feature) # ========= Preparing Model ========= # print(opt) device = torch.device(f'cuda:{opt.device}' if opt.cuda else 'cpu') dan = DAN(len(vocab), opt.embedding_size, feature_len, opt.dropout, opt.feature, opt.text).to(device) optimizer = optim.AdamW(dan.parameters(), betas=(0.9, 0.98), eps=1e-09, lr=opt.learning_rate) loss_fn = nn.CrossEntropyLoss() valid_best_scores = None if not opt.test_mode: valid_best_scores = train(dan, training_data, validation_data, optimizer, loss_fn, device, opt) model_name = opt.task + '_' + opt.name + '_' + opt.period + '.chkpt' if opt.text: model_name = "text_" + model_name if opt.feature: model_name = "feature_" + model_name checkpoint = torch.load( f"{opt.data_dir}/Deep-Average-Network/models/{model_name}", map_location=device) dan.load_state_dict(checkpoint['model']) test(dan, training_data, validation_data, test_data, loss_fn, device, opt)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=int, default=0, help='0: original dataset, 1: re-split dataset') parser.add_argument('--train_emb', action='store_true', help='Train word embedding for SQLNet(requires pretrained model).') parser.add_argument('--resume', default=None, help='resume from pretrained model.') parser.add_argument('--epoch', type=int, default=10, help='number of epoches') parser.add_argument('--batch_size', type=int, default=2, help='batch size') parser.add_argument('--logging_step', type=int, default=50, help='logging step') parser.add_argument('--lr_update', type=int, default=10, help='lr update') parser.add_argument('--learning_rate', type=float, default=1e-3, help='lr update') parser.add_argument('--prefix', type=str, default='', help='prefix of saved model') parser.add_argument('--withtab', type=int, default=1, help='sample from content vector') parser.add_argument('--teacher_forcing_fraction', type=float, default=1.0, help='fraction of batches that will use teacher forcing during training') parser.add_argument('--scheduled_teacher_forcing', action='store_true', help='Linearly decrease the teacher forcing fraction ' 'from 1.0 to 0.0 over the specified number of epocs') args = parser.parse_args() if args.scheduled_teacher_forcing: schedule = np.arange(1.0, 0.0, -1.0/args.epoch) else: schedule = np.ones(args.epoch) * args.teacher_forcing_fraction train_loader, val_loader = data.get_loaders(args.batch_size, 8) if args.withtab: model = QG() else: model = QG() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['model']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, start_epoch)) model.teach_forcing=0.0 model.mask_tf=True validate(args, val_loader, model) else: print("=> no checkpoint found at '{}'".format(args.resume)) best_rsum=0 for epoch in range(args.epoch): # adjust_learning_rate(args, model.optimizer, epoch) # # train for one epoch model.teach_forcing=schedule[epoch] model.mask_tf=False train(args, train_loader, model, epoch) model.teach_forcing=0.0 print("Mask During Inference") model.mask_tf=True rsum = validate(args, val_loader, model) is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) is_best=True print(args.prefix) save_checkpoint({ 'epoch': epoch + 1, 'model': model.state_dict(), }, is_best, filename= '.pth.tar'.format(epoch), prefix=args.prefix)
def run(args: argparse.Namespace) -> dict[str, Tensor]: n_class: int = args.n_class lr: float = args.l_rate savedir: Path = Path(args.workdir) n_epoch: int = args.n_epoch val_f: int = args.val_loader_id loss_fns: list[list[Callable]] loss_weights: list[list[float]] net, optimizer, device, loss_fns, loss_weights, scheduler = setup(args, n_class) train_loaders: list[DataLoader] val_loaders: list[DataLoader] train_loaders, val_loaders = get_loaders(args, args.dataset, args.batch_size, n_class, args.debug, args.in_memory, args.dimensions, args.use_spacing) n_tra: int = sum(len(tr_lo.dataset) for tr_lo in train_loaders) # Number of images in dataset l_tra: int = sum(len(tr_lo) for tr_lo in train_loaders) # Number of iteration per epc: different if batch_size > 1 n_val: int = sum(len(vl_lo.dataset) for vl_lo in val_loaders) l_val: int = sum(len(vl_lo) for vl_lo in val_loaders) n_loss: int = max(map(len, loss_fns)) best_dice: Tensor = torch.tensor(0, dtype=torch.float32) best_epoch: int = 0 metrics: dict[str, Tensor] = {"val_dice": torch.zeros((n_epoch, n_val, n_class), dtype=torch.float32), "val_loss": torch.zeros((n_epoch, l_val, len(loss_fns[val_f])), dtype=torch.float32), "tra_dice": torch.zeros((n_epoch, n_tra, n_class), dtype=torch.float32), "tra_loss": torch.zeros((n_epoch, l_tra, n_loss), dtype=torch.float32)} if args.compute_3d_dice: metrics["val_3d_dsc"] = torch.zeros((n_epoch, l_val, n_class), dtype=torch.float32) best_3d_dsc: Tensor = torch.tensor(0, dtype=torch.float32) tra_req_metrics: list[str] = [k.removeprefix("tra_") for k in metrics.keys() if "tra_" in k] val_req_metrics: list[str] = [k.removeprefix("val_") for k in metrics.keys() if "val_" in k] print("\n>>> Starting the training") for i in range(n_epoch): # Do training and validation loops tra_metrics = do_epoch("train", net, device, train_loaders, i, loss_fns, loss_weights, n_class, savedir=savedir if args.save_train else None, optimizer=optimizer, metric_axis=args.metric_axis, requested_metrics=tra_req_metrics, temperature=args.temperature) with torch.no_grad(): val_metrics = do_epoch("val", net, device, val_loaders, i, [loss_fns[val_f]], [loss_weights[val_f]], n_class, savedir=savedir, metric_axis=args.metric_axis, requested_metrics=val_req_metrics, temperature=args.temperature) # Sort and save the metrics for mode, mode_metrics in zip(["tra_", "val_"], [tra_metrics, val_metrics]): for k in mode_metrics: key: str = f"{mode}{k}" assert metrics[key][i].shape == mode_metrics[k].shape, \ (metrics[key][i].shape, mode_metrics[k].shape, k) metrics[key][i] = mode_metrics[k] for k, e in metrics.items(): np.save(savedir / f"{k}.npy", e.cpu().numpy()) df = pd.DataFrame({"tra_loss": metrics["tra_loss"].mean(dim=(1, 2)).numpy(), "val_loss": metrics["val_loss"].mean(dim=(1, 2)).numpy(), "tra_dice": metrics["tra_dice"][:, :, -1].mean(dim=1).numpy(), "val_dice": metrics["val_dice"][:, :, -1].mean(dim=1).numpy()}) df.to_csv(savedir / args.csv, float_format="%.4f", index_label="epoch") # Save model if better current_dice: Tensor = metrics["val_dice"][i, :, args.metric_axis].mean() if current_dice > best_dice: best_epoch = i best_dice = current_dice if "val_3d_dsc" in metrics: best_3d_dsc = metrics["val_3d_dsc"][i, :, args.metric_axis].mean() with open(savedir / "best_epoch.txt", 'w') as f: f.write(str(i)) best_folder = savedir / "best_epoch" if best_folder.exists(): rmtree(best_folder) copytree(savedir / f"iter{i:03d}", Path(best_folder)) torch.save(net, savedir / "best.pkl") optimizer, loss_fns, loss_weights = scheduler(i, optimizer, loss_fns, loss_weights, net, device, train_loaders, args) # if args.schedule and (i > (best_epoch + 20)): if args.schedule and (i % (best_epoch + 20) == 0): # Yeah, ugly but will clean that later for param_group in optimizer.param_groups: lr *= 0.5 param_group['lr'] = lr print(f'>> New learning Rate: {lr}') if i > 0 and not (i % 5): maybe_3d = f', 3d_DSC: {best_3d_dsc:.3f}' if args.compute_3d_dice else '' print(f">> Best results at epoch {best_epoch}: DSC: {best_dice:.3f}{maybe_3d}") # Because displaying the results at the end is actually convenient: maybe_3d = f', 3d_DSC: {best_3d_dsc:.3f}' if args.compute_3d_dice else '' print(f">> Best results at epoch {best_epoch}: DSC: {best_dice:.3f}{maybe_3d}") for metric in metrics: # Do not care about training values, nor the loss (keep it simple): if "val" in metric or "loss" in metric: print(f"\t{metric}: {metrics[metric][best_epoch].mean(dim=0)}") return metrics