def __init__(self, vh: VisdomHandler, single_stage: bool): self.__vh = vh self.__train_loss = LossTracker() if single_stage else MultipleLossTracker() self.__val_loss = LossTracker() if single_stage else MultipleLossTracker() self.__best_val_loss = 100.0 self.__best_mean, self.__best_median, self.__best_trimean = 100.0, 100.0, 100.0 self.__best_bst25, self.__best_wst25, self.__best_pct95 = 100.0, 100.0, 100.0
def main(): evaluator = Evaluator() path_to_log = os.path.join(PATH_TO_LOGS, "{}_{}_{}".format(MODEL_TYPE, DATA_FOLDER, str(time.time()))) os.makedirs(path_to_log) path_to_metrics_log = os.path.join(path_to_log, "metrics.csv") path_to_experiment_log = os.path.join(path_to_log, "experiment.json") log_experiment(MODEL_TYPE, DATA_FOLDER, LEARNING_RATE, path_to_experiment_log) print("\nLoading data from '{}':".format(DATA_FOLDER)) training_set = TemporalColorConstancy(mode="train", split_folder=DATA_FOLDER) train_loader = DataLoader(dataset=training_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) test_set = TemporalColorConstancy(mode="test", split_folder=DATA_FOLDER) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=8) training_set_size, test_set_size = len(training_set), len(test_set) print("Training set size: ... {}".format(training_set_size)) print("Test set size: ....... {}\n".format(test_set_size)) model = MODELS[MODEL_TYPE]() if RELOAD_CHECKPOINT: print('\n Reloading checkpoint - pretrained model stored at: {} \n'.format(PATH_TO_PTH_CHECKPOINT)) model.load(PATH_TO_PTH_CHECKPOINT) else: if PATH_TO_PTH_SUBMODULE != '': print('\n Loading pretrained submodules stored at: {} \n'.format(PATH_TO_PTH_SUBMODULE)) model.load_submodules(PATH_TO_PTH_SUBMODULE) model.print_network() model.log_network(path_to_log) model.set_optimizer(learning_rate=LEARNING_RATE) print('\n Training starts... \n') best_val_loss, best_metrics = 100.0, evaluator.get_best_metrics() train_l1, train_l2, train_l3, train_mal = LossTracker(), LossTracker(), LossTracker(), LossTracker() val_l1, val_l2, val_l3, val_mal = LossTracker(), LossTracker(), LossTracker(), LossTracker() for epoch in range(EPOCHS): model.train_mode() train_l1.reset() train_l2.reset() train_l3.reset() train_mal.reset() start = time.time() for i, (sequence, mimic, label, file_name) in enumerate(train_loader): model.reset_gradient() sequence, mimic, label = sequence.to(DEVICE), mimic.to(DEVICE), label.to(DEVICE) o1, o2, o3 = model.predict(sequence, mimic) l1, l2, l3, mal = model.compute_loss([o1, o2, o3], label) mal.backward() model.optimize() train_l1.update(l1.item()) train_l2.update(l2.item()) train_l3.update(l3.item()) train_mal.update(mal.item()) if i % 5 == 0: print("[ Epoch: {}/{} - Batch: {}/{} ] | " "[ Train L1: {:.4f} | Train L2: {:.4f} | Train L3: {:.4f} | Train MAL: {:.4f} ]" .format(epoch, EPOCHS, i, training_set_size, l1.item(), l2.item(), l3.item(), mal.item())) train_time = time.time() - start log_time(time=train_time, time_type="train", path_to_log=path_to_experiment_log) val_l1.reset() val_l2.reset() val_l3.reset() val_mal.reset() start = time.time() if epoch % 5 == 0: print("\n--------------------------------------------------------------") print("\t\t Validation") print("--------------------------------------------------------------\n") with torch.no_grad(): model.evaluation_mode() evaluator.reset_errors() for i, (sequence, mimic, label, file_name) in enumerate(test_loader): sequence, mimic, label = sequence.to(DEVICE), mimic.to(DEVICE), label.to(DEVICE) o1, o2, o3 = model.predict(sequence, mimic) l1, l2, l3, mal = model.compute_loss([o1, o2, o3], label) val_l1.update(l1.item()) val_l2.update(l2.item()) val_l3.update(l3.item()) val_mal.update(mal.item()) evaluator.add_error(l3.item()) if i % 5 == 0: print("[ Epoch: {}/{} - Batch: {}/{} ] | " "[ Val L1: {:.4f} | Val L2: {:.4f} | Val L3: {:.4f} | Val MAL: {:.4f} ]" .format(epoch, EPOCHS, i, test_set_size, l1.item(), l2.item(), l3.item(), mal.item())) print("\n--------------------------------------------------------------\n") val_time = time.time() - start log_time(time=val_time, time_type="val", path_to_log=path_to_experiment_log) metrics = evaluator.compute_metrics() print("\n********************************************************************") print(" Train Time ... : {:.4f}".format(train_time)) print(" Train MAL .... : {:.4f}".format(train_mal.avg)) print(" Train L1 ..... : {:.4f}".format(train_l1.avg)) print(" Train L2 ..... : {:.4f}".format(train_l2.avg)) print(" Train L3 ..... : {:.4f}".format(train_l3.avg)) if val_time > 0.1: print("....................................................................") print(" Val Time ..... : {:.4f}".format(val_time)) print(" Val MAL ...... : {:.4f}".format(val_mal.avg)) print(" Val L1 ....... : {:.4f}".format(val_l1.avg)) print(" Val L2 ....... : {:.4f}".format(val_l2.avg)) print(" Val L3 ....... : {:.4f} (Best: {:.4f})".format(val_l3.avg, best_val_loss)) print("....................................................................") print_val_metrics(metrics, best_metrics) print("********************************************************************\n") if 0 < val_l3.avg < best_val_loss: best_val_loss = val_l3.avg best_metrics = evaluator.update_best_metrics() print("Saving new best model... \n") model.save(os.path.join(path_to_log, "model.pth")) log_metrics(train_mal.avg, val_mal.avg, metrics, best_metrics, path_to_metrics_log)
def main(): evaluator = Evaluator() for n in range(NUM_FOLDS): path_to_log = os.path.join(PATH_TO_LOGS, "ctccnet2_fold_{}_{}".format(n, time.time())) os.makedirs(path_to_log) path_to_metrics_log = os.path.join(path_to_log, "metrics.csv") path_to_experiment_log = os.path.join(path_to_log, "experiment.json") log_experiment("ctccnet2", "fold_{}".format(n), LEARNING_RATE, path_to_experiment_log) print("\n Loading data for FOLD {}:".format(n)) training_set = GrayBall(mode="train", fold=n, num_folds=NUM_FOLDS, return_labels=True) train_loader = DataLoader(dataset=training_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) test_set = GrayBall(mode="test", fold=n, num_folds=NUM_FOLDS, return_labels=True) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=8) training_set_size, test_set_size = len(training_set), len(test_set) print("\n TRAINING SET") print("\t Size: ..... {}".format(training_set_size)) print("\t Scenes: ... {}".format(training_set.get_scenes())) print("\n TEST SET") print("\t Size: ..... {}".format(test_set_size)) print("\t Scenes: ... {}".format(test_set.get_scenes())) model = ModelCTCCNet2() model.print_network() model.log_network(path_to_log) model.set_optimizer(learning_rate=LEARNING_RATE) print('\n Training starts... \n') best_val_loss, best_metrics = 100.0, evaluator.get_best_metrics() train_losses, train_losses_cor, val_losses, val_losses_cor = [], [], [], [] for _ in range(NUM_STAGES + 1): train_losses.append(LossTracker()) train_losses_cor.append(LossTracker()) val_losses.append(LossTracker()) val_losses_cor.append(LossTracker()) for epoch in range(EPOCHS): model.train_mode() for tl, tlc in zip(train_losses, train_losses_cor): tl.reset() tlc.reset() start = time.time() for i, (seq_temp, seq_shot, labels, _) in enumerate(train_loader): seq_temp, seq_shot, labels = seq_temp.to(DEVICE), seq_shot.to(DEVICE), labels.to(DEVICE) outputs = model.predict(seq_temp, seq_shot, return_preds=True) cas_loss, cas_mal, cor_loss, cor_mal = model.compute_corr_loss(outputs, labels) for (tl, sl), (tlc, slc) in zip(zip(train_losses[:-1], cas_loss), zip(train_losses_cor[:-1], cor_loss)): tl.update(sl.item()) tlc.update(slc.item()) train_losses[-1].update(cas_mal.item()) train_losses[-1].update(cor_mal.item()) if i % 5 == 0: mal = cas_mal.item() + cor_mal.item() tl_log = " | ".join(["L{}: {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(cas_loss)]) tlc_log = " | ".join(["L{}: {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(cor_loss)]) print(" TRAIN: [ Epoch: {}/{} - Batch: {}/{} ] | Loss: {:.4f} |" " Cascade: [ {} | MAL: {:.4f} ] |" " Corrections: [ {} | MAL: {:.4f} ]" .format(epoch + 1, EPOCHS, i + 1, training_set_size, mal, tl_log, cas_mal.item(), tlc_log, cor_mal.item())) train_time = time.time() - start log_time(time=train_time, time_type="train", path_to_log=path_to_experiment_log) for vl, vlc in zip(val_losses, val_losses_cor): vl.reset() vlc.reset() start = time.time() if epoch % 5 == 0: print("\n--------------------------------------------------------------") print("\t\t Validation") print("--------------------------------------------------------------\n") with torch.no_grad(): model.evaluation_mode() evaluator.reset_errors() for i, (seq_temp, seq_shot, labels, _) in enumerate(test_loader): seq_temp, seq_shot, labels = seq_temp.to(DEVICE), seq_shot.to(DEVICE), labels.to(DEVICE) outputs = model.predict(seq_temp, seq_shot, return_preds=True) cas_loss, cas_mal, cor_loss, cor_mal = model.get_corr_loss(outputs, labels) losses = zip(zip(val_losses[:-1], cas_loss), zip(val_losses_cor[:-1], cor_loss)) for (vl, sl), (vlc, slc) in losses: vl.update(sl.item()) vlc.update(slc.item()) val_losses[-1].update(cas_mal.item()) val_losses[-1].update(cor_mal.item()) evaluator.add_error(cas_loss[-1].item()) if i % 5 == 0: mal = cas_mal.item() + cor_mal.item() log_cas = ["L{}: {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(cas_loss)] log_cas = " | ".join(log_cas) log_cor = ["L{}: {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(cor_loss)] log_cor = " | ".join(log_cor) print(" VAL: [ Epoch: {}/{} - Batch: {}/{} ] | Loss: {:.4f} |" " Cascade: [ {} | MAL: {:.4f} ] |" " Corrections: [ {} | MAL: {:.4f} ]" .format(epoch + 1, EPOCHS, i + 1, test_set_size, mal, log_cas, cas_mal.item(), log_cor, cor_mal.item())) print("\n--------------------------------------------------------------\n") val_time = time.time() - start log_time(time=val_time, time_type="val", path_to_log=path_to_experiment_log) metrics = evaluator.compute_metrics() print("\n********************************************************************") print(" Train Time ....... : {:.4f}".format(train_time)) tl_log = " | ".join(["L{} {:.4f}".format(i + 1, tl.avg) for i, tl in enumerate(train_losses[:-1])]) print(" AVG Train Loss ... : [ {} | MAL: {:.4f} ]".format(tl_log, train_losses[-1].avg)) if val_time > 0.1: print("....................................................................") print(" Val Time ......... : {:.4f}".format(val_time)) vl_log = " | ".join(["L{} {:.4f}".format(i + 1, vl.avg) for i, vl in enumerate(val_losses[:-1])]) print(" AVG Val Loss: .... : [ {} | MAL: {:.4f} ]".format(vl_log, val_losses[-1].avg)) print("....................................................................") print_val_metrics(metrics, best_metrics) print("********************************************************************\n") curr_val_loss = val_losses[-2].avg if 0 < curr_val_loss < best_val_loss: best_val_loss = curr_val_loss best_metrics = evaluator.update_best_metrics() print("Saving new best model... \n") model.save(os.path.join(path_to_log, "model.pth")) log_metrics(train_losses[-1].avg, val_losses[-1].avg, metrics, best_metrics, path_to_metrics_log)
def main(): evaluator = Evaluator() for n in range(NUM_FOLDS): path_to_log = os.path.join( PATH_TO_LOGS, "{}_fold_{}_{}".format(MODEL_TYPE, n, time.time())) os.makedirs(path_to_log) path_to_metrics_log = os.path.join(path_to_log, "metrics.csv") path_to_experiment_log = os.path.join(path_to_log, "experiment.json") log_experiment(MODEL_TYPE, "fold_{}".format(n), LEARNING_RATE, path_to_experiment_log) print("\n Loading data for FOLD {}:".format(n)) training_set = GrayBall(mode="train", fold=n, num_folds=NUM_FOLDS) train_loader = DataLoader(dataset=training_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) test_set = GrayBall(mode="test", fold=n, num_folds=NUM_FOLDS) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=8) training_set_size, test_set_size = len(training_set), len(test_set) print("\n TRAINING SET") print("\t Size: ..... {}".format(training_set_size)) print("\t Scenes: ... {}".format(training_set.get_scenes())) print("\n TEST SET") print("\t Size: ..... {}".format(test_set_size)) print("\t Scenes: ... {}".format(test_set.get_scenes())) model = MODELS[MODEL_TYPE]() if RELOAD_CHECKPOINT: print( '\n Reloading checkpoint - pretrained model stored at: {} \n'. format(PATH_TO_PTH_CHECKPOINT)) model.load(PATH_TO_PTH_CHECKPOINT) model.print_network() model.log_network(path_to_log) model.set_optimizer(learning_rate=LEARNING_RATE) print('\n Training starts... \n') best_val_loss, best_metrics = 100.0, evaluator.get_best_metrics() train_loss, val_loss = LossTracker(), LossTracker() for epoch in range(EPOCHS): # --- Training --- model.train_mode() train_loss.reset() start = time.time() for i, data in enumerate(train_loader): model.reset_gradient() sequence, mimic, label, file_name = data sequence = sequence.unsqueeze(1).to(DEVICE) if len( sequence.shape) == 4 else sequence.to(DEVICE) mimic = mimic.to(DEVICE) label = label.to(DEVICE) loss = model.compute_loss(sequence, label, mimic) model.optimize() train_loss.update(loss) if i % 5 == 0: print( "[ Epoch: {}/{} - Item: {}/{} ] | [ Train loss: {:.4f} ]" .format(epoch, EPOCHS, i, training_set_size, loss)) train_time = time.time() - start log_time(time=train_time, time_type="train", path_to_log=path_to_experiment_log) # --- Validation --- start = time.time() val_loss.reset() if epoch % 5 == 0: print( "\n--------------------------------------------------------------" ) print("\t\t Validation") print( "--------------------------------------------------------------\n" ) with torch.no_grad(): model.evaluation_mode() evaluator.reset_errors() for i, data in enumerate(test_loader): sequence, mimic, label, file_name = data sequence = sequence.unsqueeze(1).to(DEVICE) if len( sequence.shape) == 4 else sequence.to(DEVICE) mimic = mimic.to(DEVICE) label = label.to(DEVICE) o = model.predict(sequence, mimic) loss = model.get_angular_loss(o, label).item() val_loss.update(loss) evaluator.add_error(loss) if i % 5 == 0: print( "[ Epoch: {}/{} - Item: {}/{}] | Val loss: {:.4f} ]" .format(epoch, EPOCHS, i, test_set_size, loss)) print( "\n--------------------------------------------------------------\n" ) val_time = time.time() - start log_time(time=val_time, time_type="val", path_to_log=path_to_experiment_log) metrics = evaluator.compute_metrics() print( "\n********************************************************************" ) print(" Train Time ... : {:.4f}".format(train_time)) print(" Train Loss ... : {:.4f}".format(train_loss.avg)) if val_time > 0.1: print( "...................................................................." ) print(" Val Time ..... : {:.4f}".format(val_time)) print(" Val Loss ..... : {:.4f}".format(val_loss.avg)) print( "...................................................................." ) print_val_metrics(metrics, best_metrics) print( "********************************************************************\n" ) if 0 < val_loss.avg < best_val_loss: best_val_loss = val_loss.avg evaluator.update_best_metrics() print("Saving new best model... \n") model.save(os.path.join(path_to_log, "model.pth")) log_metrics(train_loss.avg, val_loss.avg, metrics, best_metrics, path_to_metrics_log)
def main(opt): data_folder = opt.data_folder epochs = opt.epochs learning_rate = opt.lr evaluator = Evaluator() path_to_log = os.path.join( "train", "tcc", "logs", "ctccnet2_{}_{}".format(data_folder, str(time.time()))) os.makedirs(path_to_log) path_to_metrics_log = os.path.join(path_to_log, "metrics.csv") path_to_experiment_log = os.path.join(path_to_log, "experiment.json") log_experiment("ctccnet2", data_folder, learning_rate, path_to_experiment_log) print("\n Loading data from '{}':".format(data_folder)) training_set = TemporalColorConstancy(mode="train", split_folder=data_folder) train_loader = DataLoader(dataset=training_set, batch_size=1, shuffle=True, num_workers=8) test_set = TemporalColorConstancy(mode="test", split_folder=data_folder) test_loader = DataLoader(dataset=test_set, batch_size=1, num_workers=8) training_set_size, test_set_size = len(training_set), len(test_set) print("Training set size: ... {}".format(training_set_size)) print("Test set size: ....... {}\n".format(test_set_size)) model = ModelCTCCNet2() if RELOAD_CHECKPOINT: print('\n Reloading checkpoint - pretrained model stored at: {} \n'. format(PATH_TO_PTH_CHECKPOINT)) model.load(PATH_TO_PTH_CHECKPOINT) model.print_network() model.log_network(path_to_log) model.set_optimizer(learning_rate) print('\n Training starts... \n') best_val_loss, best_metrics = 100.0, evaluator.get_best_metrics() train_losses, val_losses = [], [] for _ in range(NUM_STAGES + 1): train_losses.append(LossTracker()) val_losses.append(LossTracker()) for epoch in range(epochs): model.train_mode() for tl in train_losses: tl.reset() start = time.time() for i, (seq_temp, seq_shot, label, _) in enumerate(train_loader): seq_temp, seq_shot, label = seq_temp.to(DEVICE), seq_shot.to( DEVICE), label.to(DEVICE) outputs = model.predict(seq_temp, seq_shot) stages_loss, mal = model.compute_loss(outputs, label) for tl, sl in zip(train_losses[:-1], stages_loss): tl.update(sl.item()) train_losses[-1].update(mal.item()) if i % 5 == 0: tl_log = " | ".join([ "TL{} {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(stages_loss) ]) print( "[ Epoch: {}/{} - Batch: {}/{} ] | [ {} | Train MAL: {:.4f} ]" .format(epoch + 1, epochs, i + 1, training_set_size, tl_log, stages_loss[-1].item())) train_time = time.time() - start log_time(time=train_time, time_type="train", path_to_log=path_to_experiment_log) for vl in val_losses: vl.reset() start = time.time() if epoch % 5 == 0: print( "\n--------------------------------------------------------------" ) print("\t\t Validation") print( "--------------------------------------------------------------\n" ) with torch.no_grad(): model.evaluation_mode() evaluator.reset_errors() for i, (seq_temp, seq_shot, label, _) in enumerate(test_loader): seq_temp, seq_shot, label = seq_temp.to( DEVICE), seq_shot.to(DEVICE), label.to(DEVICE) outputs = model.predict(seq_temp, seq_shot) stages_loss, mal = model.get_loss(outputs, label) for vl, sl in zip(val_losses[:-1], stages_loss): vl.update(sl.item()) val_losses[-1].update(mal.item()) evaluator.add_error(stages_loss[-1].item()) if i % 5 == 0: vl_log = [ "VL{} {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(stages_loss) ] vl_log = " | ".join(vl_log) print( "[ Epoch: {}/{} - Batch: {}/{} ] | [ {} | Val MAL: {:.4f} ]" .format(epoch + 1, epochs, i + 1, test_set_size, vl_log, stages_loss[-1].item())) print( "\n--------------------------------------------------------------\n" ) val_time = time.time() - start log_time(time=val_time, time_type="val", path_to_log=path_to_experiment_log) metrics = evaluator.compute_metrics() print( "\n********************************************************************" ) print(" Train Time ....... : {:.4f}".format(train_time)) tl_log = " | ".join([ "L{} {:.4f}".format(i + 1, tl.avg) for i, tl in enumerate(train_losses[:-1]) ]) print(" AVG Train Loss ... : [ {} | MAL: {:.4f} ]".format( tl_log, train_losses[-1].avg)) if val_time > 0.1: print( "...................................................................." ) print(" Val Time ......... : {:.4f}".format(val_time)) vl_log = " | ".join([ "L{} {:.4f}".format(i + 1, vl.avg) for i, vl in enumerate(val_losses[:-1]) ]) print(" AVG Val Loss: .... : [ {} | MAL: {:.4f} ]".format( vl_log, val_losses[-1].avg)) print( "...................................................................." ) print_val_metrics(metrics, best_metrics) print( "********************************************************************\n" ) curr_val_loss = val_losses[-2].avg if 0 < curr_val_loss < best_val_loss: best_val_loss = curr_val_loss best_metrics = evaluator.update_best_metrics() print("Saving new best model... \n") model.save(os.path.join(path_to_log, "model.pth")) log_metrics(train_losses[-1].avg, val_losses[-1].avg, metrics, best_metrics, path_to_metrics_log)
def __init__(self): self.__loss = LossTracker() self.__loss1 = LossTracker() self.__loss2 = LossTracker() self.__loss3 = LossTracker()