def test_train_1(self): with pytest.raises(AssertionError) as error: train_engine = TrainEngine(model=None) train_engine.train(options=None) self.assertEqual(str(error.value), "Model has not been specified")
def test_train_5(self): with pytest.raises(AssertionError) as error: proto_net = ProtoNetTUF( encoder=linear(in_features=2, out_features=1)) train_engine = TrainEngine(model=proto_net) # optimizer to be used for learning optimizer = optim.Adam(params=proto_net.parameters(), lr=0.1, weight_decay=0.01) # how to reduce the learning rate lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, gamma=0.01, step_size=0.5, verbose=True) options = { "optimizer": optimizer, "lr_scheduler": lr_scheduler, "max_epochs": 0 } train_engine.train(options=options) self.assertEqual(str(error.value), "Invalid number of max epochs")
def test_trainer_7(self): with pytest.raises(AssertionError) as error: proto_net = ProtoNetTUF( encoder=linear(in_features=2, out_features=3)) train_engine = TrainEngine(model=proto_net) # optimizer to be used for learning optimizer = optim.Adam(params=proto_net.parameters(), lr=0.1, weight_decay=0.01) # how to reduce the learning rate lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, gamma=0.01, step_size=0.5, verbose=True) options = { "optimizer": optimizer, "lr_scheduler": lr_scheduler, "max_epochs": 2, "device": "cpu", "iterations": 1, "sample_loader": None, "num_support_tr": 6 } train_engine.train(options=options) self.assertEqual(str(error.value), "Sample loader has not been specified")
def test_train_2(self): with pytest.raises(AssertionError) as error: proto_net = ProtoNetTUF(encoder=None) train_engine = TrainEngine(model=proto_net) train_engine.train(options=None) self.assertEqual(str(error.value), "Training options not specified")
def test_train_3(self): with pytest.raises(AssertionError) as error: proto_net = ProtoNetTUF(encoder=None) train_engine = TrainEngine(model=proto_net) options = {"lr_scheduler": None} train_engine.train(options=options) self.assertEqual(str(error.value), "Learning scheduler has not been specified")
def test_trainer_8(self): init_seed(options={"seed": 0}) # learning rate scheduler step lr_scheduler_step = 15 num_support_tr = 6 num_query_tr = 12 num_samples = num_support_tr + num_query_tr # number of random classes per episode for training # this should be equal or less than the unique number # of classes in the dataset classes_per_it = 3 iterations = 10 proto_net = ProtoNetTUF(encoder=linear(in_features=2, out_features=3)) train_engine = TrainEngine(model=proto_net) # optimizer to be used for learning optimizer = optim.Adam(params=proto_net.parameters(), lr=0.1, weight_decay=0.001) # how to reduce the learning rate lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=optimizer, gamma=0.01, step_size=lr_scheduler_step, verbose=True) train_loader = TUFDataset(filename=Path("./test_data/train_data.csv"), dataset_type="train") sampler = BatchSampler(labels=train_loader.labels, classes_per_it=classes_per_it, num_samples=num_samples, iterations=iterations, mode="train") dataloader = torch.utils.data.DataLoader(train_loader, batch_sampler=sampler) options = { "optimizer": optimizer, "lr_scheduler": lr_scheduler, "max_epochs": 1, "device": "cpu", "sample_loader": dataloader, "iterations": iterations, "num_support_tr": num_support_tr } train_engine.train(options=options)
def train(configuration: dict) -> None: dirs = os.listdir(configuration["save_model_path"]) if configuration["model_name"] in dirs: raise ValueError(f"Directory {configuration['model_name']} exists") # create directory if it doesnt exist output_path = Path(configuration["save_model_path"] + "/" + configuration["model_name"]) # create the output directory os.mkdir(path=output_path) configuration["save_model_path"] = str(output_path) with open(output_path / "config.json", 'w', newline="\n") as fh: # save the configuration in the output json.dump(configuration, fh) device = configuration['device'] if device == 'gpu' and not torch.cuda.is_available(): print( "{0} You specified CUDA as device but PyTorch configuration does not support CUDA" .format(WARNING)) print("{0} Setting device to cpu".format(WARNING)) configuration['device'] = 'cpu' # initialize seed for random generation utilities init_seed(options=configuration) # the model to train model = ProtoNetTUF.build_network(encoder=convolution_with_linear_softmax( in_channels=2, out_channels=1, kernel_size=1, in_features=configuration["in_features"], out_features=len(configuration["classes"])), options=configuration) # initialize the optimizer optim = torch.optim.Adam( params=model.parameters(), lr=configuration["optimizer"]["lr"], weight_decay=configuration["optimizer"]["weight_decay"]) # initialize scheduler for learning rate decay # Decays the learning rate of each parameter group by gamma every step_size epochs. # Notice that such decay can happen simultaneously with other changes # to the learning rate from outside this scheduler. # When last_epoch=-1, sets initial lr as lr. lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=optim, gamma=configuration["lr_scheduler"]["gamma"], step_size=configuration["lr_scheduler"]["step_size"]) train_dataset = TUFDataset(filename=Path(configuration["train_dataset"]), dataset_type="train", classes=configuration["classes"]) print(f"{INFO} Training dataset size {len(train_dataset)} ") # number of samples for training # num_support_tr is the number of support points per class # num_query_tr is the number of query points per class num_samples = configuration["num_support_tr"] + configuration[ "num_query_tr"] sampler = BatchSampler(labels=train_dataset.labels, classes_per_it=len(configuration["classes"]), num_samples=num_samples, iterations=configuration["iterations"], mode="train") dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=sampler) # options for the training engine options = TrainEngine.build_options( optimizer=optim, lr_scheduler=lr_scheduler, max_epochs=configuration["max_epochs"], iterations=configuration["iterations"], device=configuration["device"], sample_loader=dataloader, num_support_tr=configuration["num_support_tr"]) options = extend_options_from_config(configuration=configuration, options=options) if configuration["validate"]: num_support_validation = configuration["num_support_validation"] num_query_validation = configuration["num_query_validation"] num_samples_validation = num_query_validation + num_support_validation print(f"{INFO} Number of samples validation {num_samples_validation}") validation_dataset = TUFDataset(filename=Path( configuration["validate_dataset"]), dataset_type="validate", classes=configuration["classes"]) print(f"{INFO} Validation dataset size {len(validation_dataset)} ") val_sampler = BatchSampler(labels=validation_dataset.labels, classes_per_it=len( configuration["classes"]), num_samples=num_samples_validation, iterations=configuration["iterations"], mode="validate") validation_dataloader = torch.utils.data.DataLoader( validation_dataset, batch_sampler=val_sampler) options["validation_dataloader"] = validation_dataloader options["num_support_validation"] = configuration[ "num_support_validation"] # train the model engine = TrainEngine(model=model) engine.train(options=options) engine_state = engine.state x = [epoch for epoch in range(configuration["max_epochs"])] train_loss = engine_state["average_train_loss"] validation_loss = engine_state["average_validation_loss"] plt.plot(x, train_loss, 'r*', label="Train loss") plt.plot(x, validation_loss, 'bo', label="Validation loss") plt.xlabel("Epoch") plt.ylabel("Average Loss") plt.legend(loc="upper right") plt.title( "Train vs Validation loss. $\eta=${0}, Iterations/epoch {1}".format( configuration["optimizer"]["lr"], configuration["iterations"])) plt.savefig( Path(configuration["save_model_path"] + "/" + "train_validation_loss.png")) plt.close() train_acc = engine_state["average_train_acc"] validation_acc = engine_state["average_validation_acc"] plt.plot(x, train_acc, 'r*', label="Train accuracy") plt.plot(x, validation_acc, 'bo', label="Validation accuracy") plt.xlabel("Epoch") plt.ylabel("Average Accuracy") plt.legend(loc="upper right") plt.title("Train vs Validation accuracy. $\eta=${0}, Iterations/epoch {1}". format(configuration["optimizer"]["lr"], configuration["iterations"])) plt.savefig( Path(configuration["save_model_path"] + "/" + "train_validation_accuracy.png"))
def test_construction(self): try: train_engine = TrainEngine() except: self.fail("TrainEngine construction failed")
def main(): parser = argparse.ArgumentParser( description="Used to train TensorFlow model") parser.add_argument( "train_files_path", metavar="path", help="Path to the training files.", ) parser.add_argument( "test_files_path", metavar="path", help="Path to the test files", ) parser.add_argument( "--checkpoints_save_path", dest="checkpoints_save_path", metavar="path", type=str, help="Path where the checkpoints should be saved.", ) parser.add_argument( "--last_checkpoint_path", dest="last_checkpoint_path", metavar="path", type=str, help="Path to the last checkpoint to continue training.", ) parser.add_argument( "--plot_history", dest="plot_history", metavar="boolean (default: false)", type=bool, help="Plots the model training history", ) plot_history = False args = parser.parse_args() if args.plot_history: plot_history = args.plot_history setup_environment() train_files_path = args.train_files_path eval_files_path = args.test_files_path input_shape = (240, 240, 1) generator_config = ImageGeneratorConfig() generator_config.loop_count = 10 generator_config.horizontal_flip = True generator_config.zoom_range = 0.3 generator_config.width_shift_range = 0.3 generator_config.height_shift_range = 0.3 generator_config.rotation_range = 10 train_x, train_y, eval_x, eval_y = load_dataset(train_files_path, input_shape, validation_split=0) eval_x, eval_y, _, _, = load_dataset(eval_files_path, input_shape, validation_split=0) backbone = backbones.SegmentationVanillaUnet(input_shape) # optimizer = SGD(lr=0.0001, momentum=0.9, decay=0.0005) optimizer = Adam(lr=0.00001) train_engine = TrainEngine( input_shape, backbone.model, optimizer, loss="binary_crossentropy", checkpoints_save_path=args.checkpoints_save_path, checkpoint_save_period=100, last_checkpoint_path=args.last_checkpoint_path, ) loss, acc, val_loss, val_acc = train_engine.train( train_x, train_y, eval_x, eval_y, epochs=50, batch_size=10, image_generator_config=generator_config, ) if plot_history: plots.plot_history(loss, acc, val_loss, val_acc) for idx in range(len(eval_x[:2])): predictions = train_engine.model.predict(np.array( [eval_x[idx]], dtype=np.float32), batch_size=1) plots.plot_prediction(predictions, [eval_x[idx]], input_shape) K.clear_session()
from data.robotic_dataloader import get_train_dataloader, get_val_dataloader from utils.options import Options from data.utils.prepare_data import get_split from train_engine import TrainEngine if __name__ == '__main__': opt = Options().opt train_files, test_files = get_split(opt.fold) train_dataloader = get_train_dataloader(train_files, opt) val_dataloader = get_val_dataloader(test_files, opt) engine = TrainEngine(opt) engine.set_data(train_dataloader, val_dataloader) engine.train_model()