def loss_compare(savestr=("poswei", "maxpool", "poswei"), model_design_name=("seqDNC", "lstmnorm", "taco")): logfile = "log/final_" + datetime_filename() + ".txt" models = load_models(savestr=savestr, model_design_name=model_design_name) for mdn in model_design_name: bs = 8 num_workers = 8 small_target = True ig = InputGenH(small_target=small_target) if small_target: outputlen = 2975 else: outputlen = 5951 # use validation for the moment testds = ig.get_test() test = DataLoader(dataset=testds, batch_size=bs, num_workers=num_workers, collate_fn=pad_collate) valid_iterator = iter(test) model = next(models) model = model.cuda() loss = 0 val_batch = 25 oo = torch.zeros((val_batch * bs, outputlen)) tt = torch.zeros((val_batch * bs, outputlen)) for i in range(val_batch): (input, target, loss_type) = next(valid_iterator) dl = run_one_patient(model, input, target, loss_type, 1e-5) if dl is not None: loss += dl[0] oo[i * 8:i * 8 + 8, :] = dl[1] tt[i * 8:i * 8 + 8, :] = dl[2] else: raise ValueError("val_loss is none") loss = loss / val_batch # averaging the metrics is not the correct approach. # we need to concatenate all results to calculate the metrics. sen = sensitivity(oo, tt) spe = specificity(oo, tt) f1 = f1score(oo, tt) prec = precision(oo, tt) acc = accuracy(oo, tt) logprint( logfile, "%s. loss : %.7f, sensitivity: %.5f, specificity: %.5f, precision: %.5f, f1: %.5f, accuracy: %.5f" % (mdn, loss, sen, spe, prec, f1, acc))
def validationonly(savestr): ''' :return: ''' lr = 1e-2 optim = None logfile = "vallog.txt" num_workers = 8 ig = InputGenH() trainds = ig.get_train() validds = ig.get_valid() testds = ig.get_test() validdl = DataLoader(dataset=validds, batch_size=8, num_workers=num_workers, collate_fn=pad_collate) print("Using", num_workers, "workers for validation set") # testing whether this LSTM works is basically a question whether lstm = lstmwrapperJ() # load model: print("loading model") lstm, optim, starting_epoch, starting_iteration = load_model( lstm, optim, 0, 0, savestr) lstm = lstm.cuda() if optim is None: optimizer = torch.optim.Adam(lstm.parameters(), lr=lr) else: # print('use Adadelta optimizer with learning rate ', lr) # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr) optimizer = optim real_criterion = nn.SmoothL1Loss() binary_criterion = nn.BCEWithLogitsLoss() traindl = None total_epochs = None iter_per_epoch = None # starting with the epoch after the loaded one validate(lstm, optimizer, real_criterion, binary_criterion, traindl, validdl, int(starting_epoch), total_epochs, int(starting_iteration), iter_per_epoch, logfile)
def validationonly(savestr, beta, epoch=0, iteration=0): """ :param savestr: :param epoch: default to 0 if loading the highest model :param iteration: ditto :return: """ lr = 1e-3 optim = None logfile = "vallog.txt" num_workers = 8 ig = InputGenH() # multiprocessing disabled, because socket request seems unstable. # performance should not be too bad? validds = ig.get_valid() validdl = DataLoader(dataset=validds, num_workers=num_workers, batch_size=param_bs, collate_fn=pad_collate) valid_iterator = iter(validdl) print("Using", num_workers, "workers for validation set") computer = SeqDNC(x=param_x, h=param_h, L=param_L, v_t=param_v_t, W=param_W, R=param_R, N=param_N, bs=param_bs) # load model: print("loading model") computer, optim, starting_epoch, starting_iteration = load_model( computer, optim, epoch, iteration, savestr) computer = computer.cuda() real_criterion = nn.SmoothL1Loss() binary_criterion = nn.BCEWithLogitsLoss() # starting with the epoch after the loaded one running_loss = [] valid_batches = 500 for i in range(valid_batches): input, target, loss_type = next(valid_iterator) val_loss = run_one_patient(computer, input, target, None, None, loss_type, real_criterion, binary_criterion, beta, validate=True) if val_loss is not None: printloss = float(val_loss[0]) running_loss.append((printloss)) if logfile: with open(logfile, 'a') as handle: handle.write( "validation. count: %4d, val loss : %.10f \n" % (i, printloss)) print("validation. count: %4d, val loss: %.10f" % (i, printloss)) print(np.mean(running_loss))
def main(load, savestr='default', lr=1e-3, beta=0.01): """ :param load: :param savestr: :param lr: :param curri: :return: """ total_epochs = 1 iter_per_epoch = int(saturation / param_bs) optim = None starting_epoch = 0 starting_iteration = 0 logfile = "log/dnc_" + savestr + "_" + datetime_filename() + ".txt" num_workers = 16 # ig=InputGenG(small_target=True) ig = InputGenH(small_target=True) trainds = ig.get_train() validds = ig.get_valid() traindl = DataLoader(dataset=trainds, batch_size=param_bs, num_workers=num_workers, collate_fn=pad_collate, pin_memory=True) validdl = DataLoader(dataset=validds, batch_size=param_bs, num_workers=num_workers // 2, collate_fn=pad_collate, pin_memory=True) print("Using", num_workers, "workers for training set") computer = SeqDNC(x=param_x, h=param_h, L=param_L, v_t=param_v_t, W=param_W, R=param_R, N=param_N) # load model: if load: print("loading model") computer, optim, starting_epoch, starting_iteration = load_model( computer, optim, starting_epoch, starting_iteration, savestr) computer = computer.cuda() if optim is None: optimizer = torch.optim.Adam(computer.parameters(), lr=lr) else: # print('use Adadelta optimizer with learning rate ', lr) # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr) optimizer = optim for group in optimizer.param_groups: print("Currently using a learing rate of ", group["lr"]) # creating the positive_weights with open("/infodev1/rep/projects/jason/pickle/dcc.pkl", "rb") as f: # loaded here is a vector where v_i is the number of times death label i has occured weights = pickle.load(f) negs = 59652 - weights weights[weights < 4] = 3 weights = negs / weights weights = torch.from_numpy(weights).float().cuda() weights = Variable(weights) real_criterion = TOELoss() # this parameter does not appear in PyTorch 0.3.1 binary_criterion = WeightedBCELLoss(pos_weight=weights) # starting with the epoch after the loaded one train(computer, optimizer, real_criterion, binary_criterion, traindl, validdl, int(starting_epoch), total_epochs, int(starting_iteration), iter_per_epoch, savestr, beta, logfile)
def main(load=False, lr=1e-3, beta=1e-3, savestr="", kill_time=True): total_epochs = 1 iter_per_epoch = 10000 lr = lr optim = None starting_epoch = 0 starting_iteration = 0 logfile = "log/taco_" + savestr + "_" + datetime_filename() + ".txt" num_workers = 4 ig = InputGenH(small_target=True) validds = ig.get_valid() trainds = ig.get_train() validdl = DataLoader(dataset=validds, batch_size=8, num_workers=num_workers, collate_fn=pad_collate, pin_memory=True) traindl = DataLoader(dataset=trainds, batch_size=8, num_workers=num_workers, collate_fn=pad_collate, pin_memory=True) print("Using", num_workers, "workers for training set") computer = Tacotron() # load model: if load: print("loading model") computer, optim, starting_epoch, starting_iteration = load_model( computer, optim, starting_epoch, starting_iteration, savestr) computer = computer.cuda() if optim is None: print("Using Adam with lr", lr) optimizer = torch.optim.Adam( [i for i in computer.parameters() if i.requires_grad], lr=lr) else: # print('use Adadelta optimizer with learning rate ', lr) # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr) optimizer = optim for group in optimizer.param_groups: print("Currently using a learing rate of ", group["lr"]) # creating the positive_weights with open("/infodev1/rep/projects/jason/pickle/dcc.pkl", "rb") as f: # loaded here is a vector where v_i is the number of times death label i has occured weights = pickle.load(f) negs = 59652 - weights weights[weights < 4] = 3 weights = negs / weights weights = torch.from_numpy(weights).float().cuda() weights = Variable(weights) binary_criterion = WeightedBCELLoss(pos_weight=weights) # starting with the epoch after the loaded one real_criterion = TOELoss() train(computer, optimizer, real_criterion, binary_criterion, traindl, validdl, int(starting_epoch), total_epochs, int(starting_iteration), iter_per_epoch, savestr, beta, logfile, kill_time)