def run(cfg: DictConfig) -> None: """ Run pytorch-lightning model Args: new_dir: cfg: hydra config """ set_seed(cfg.training.seed) run_name = os.path.basename(os.getcwd()) hparams = flatten_omegaconf(cfg) cfg.callbacks.model_checkpoint.params.filepath = os.getcwd() + cfg.callbacks.model_checkpoint.params.filepath callbacks = [] for callback in cfg.callbacks.other_callbacks: if callback.params: callback_instance = load_obj(callback.class_name)(**callback.params) else: callback_instance = load_obj(callback.class_name)() callbacks.append(callback_instance) loggers = [] if cfg.logging.log: for logger in cfg.logging.loggers: if 'experiment_name' in logger.params.keys(): logger.params['experiment_name'] = run_name loggers.append(load_obj(logger.class_name)(**logger.params)) callbacks.append(EarlyStopping(**cfg.callbacks.early_stopping.params)) trainer = pl.Trainer( logger=loggers, # early_stop_callback=EarlyStopping(**cfg.callbacks.early_stopping.params), checkpoint_callback=ModelCheckpoint(**cfg.callbacks.model_checkpoint.params), callbacks=callbacks, **cfg.trainer, ) model = load_obj(cfg.training.lightning_module_name)(hparams=hparams, cfg=cfg) dm = load_obj(cfg.datamodule.data_module_name)(hparams=hparams, cfg=cfg) trainer.fit(model, dm) if cfg.general.save_pytorch_model and cfg.general.save_best: if os.path.exists(trainer.checkpoint_callback.best_model_path): # type: ignore best_path = trainer.checkpoint_callback.best_model_path # type: ignore # extract file name without folder save_name = os.path.basename(os.path.normpath(best_path)) model = model.load_from_checkpoint(best_path, hparams=hparams, cfg=cfg, strict=False) model_name = f'saved_models/best_{save_name}'.replace('.ckpt', '.pth') torch.save(model.model.state_dict(), model_name) else: os.makedirs('saved_models', exist_ok=True) model_name = 'saved_models/last.pth' torch.save(model.model.state_dict(), model_name) if cfg.general.convert_to_jit and os.path.exists(trainer.checkpoint_callback.best_model_path): # type: ignore convert_to_jit(model, save_name, cfg)
def test_f1score_metric(average: str) -> None: set_seed(42) labels = torch.randint(1, 10, (4096, 100)).flatten() predictions = torch.randint(1, 10, (4096, 100)).flatten() labels_numpy = labels.numpy() predictions_numpy = predictions.numpy() f1_metric = F1Score(average) my_pred = f1_metric(predictions, labels) f1_pred = f1_score(labels_numpy, predictions_numpy, average=average) assert np.isclose(my_pred.item(), f1_pred.item())
def get_test_data(self): """ Get the Testing Data Loader :return: The Wrapped Data Loader """ set_seed(self.seed) data = CustomDataset(self.test_files) dl = DataLoader(data, batch_size=self.batch_size, shuffle=True, num_workers=self.workers, worker_init_fn=seed_worker) return WrappedDataLoader(dl, self.wrapped_function)
def make_prediction(cfg: DictConfig) -> None: """ Run pytorch-lightning model inference Args: cfg: hydra config Returns: None """ set_seed(cfg.training.seed) model_name = glob.glob( f'outputs/{cfg.inference.run_name}/saved_models/*')[0] lit_model = LitM5NBeats.load_from_checkpoint(checkpoint_path=model_name, cfg=cfg) net = lit_model.net datasets = get_datasets(cfg) loader = torch.utils.data.DataLoader(datasets[cfg.inference.mode], batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers, shuffle=False) y_pred: List[np.array] = [] device = cfg.data.device net.to(device) net.eval() for _, (x, y, scales, weights) in enumerate(loader): forecast, loss = net(x.float().to(device), y.float().to(device), scales.to(device), weights.to(device)) y_pred.extend(forecast.cpu().detach().numpy()) y_pred = np.array(y_pred) sub = pd.read_csv(f'{cfg.data.folder_path}/data/sample_submission.csv') sub.iloc[:30490, 1:] = y_pred sub.iloc[30490:, 1:] = y_pred sub.to_csv(f'subs/{cfg.inference.run_name}_{cfg.inference.mode}.csv', index=False)
def run(cfg: DictConfig) -> None: """ Run pytorch-lightning model Args: new_dir: cfg: hydra config """ set_seed(cfg.training.seed) hparams = flatten_omegaconf(cfg) cfg.callbacks.model_checkpoint.params.filepath = os.getcwd() + cfg.callbacks.model_checkpoint.params.filepath callbacks = [] for callback in cfg.callbacks.other_callbacks: if callback.params: callback_instance = load_obj(callback.class_name)(**callback.params) else: callback_instance = load_obj(callback.class_name)() callbacks.append(callback_instance) loggers = [] if cfg.logging.log: for logger in cfg.logging.loggers: loggers.append(load_obj(logger.class_name)(**logger.params)) trainer = pl.Trainer( logger=loggers, early_stop_callback=EarlyStopping(**cfg.callbacks.early_stopping.params), checkpoint_callback=ModelCheckpoint(**cfg.callbacks.model_checkpoint.params), callbacks=callbacks, **cfg.trainer, ) model = load_obj(cfg.training.lightning_module_name)(hparams=hparams, cfg=cfg) dm = load_obj(cfg.datamodule.data_module_name)(hparams=hparams, cfg=cfg) trainer.fit(model, dm) if cfg.general.save_pytorch_model: # save as a simple torch model # TODO save not last, but best - for this load the checkpoint and save pytorch model from it os.makedirs('saved_models', exist_ok=True) model_name = 'saved_models/best.pth' print(model_name) torch.save(model.model.state_dict(), model_name)
def run(cfg: DictConfig) -> None: """ Run pytorch-lightning model Args: cfg: hydra config """ set_seed(cfg.training.seed) hparams = flatten_omegaconf(cfg) model = LitM5NBeats(hparams=hparams, cfg=cfg) early_stopping = pl.callbacks.EarlyStopping( **cfg.callbacks.early_stopping.params) model_checkpoint = pl.callbacks.ModelCheckpoint( **cfg.callbacks.model_checkpoint.params) lr_logger = pl.callbacks.LearningRateLogger() logger = [] if cfg.logging.log: tb_logger = TensorBoardLogger(save_dir=cfg.general.save_dir) comet_logger = CometLogger( save_dir=cfg.general.save_dir, workspace=cfg.general.workspace, project_name=cfg.general.project_name, api_key=cfg.private.comet_api, experiment_name=os.getcwd().split('\\')[-1], ) # wandb_logger = WandbLogger(name=os.getcwd().split('\\')[-1], # save_dir=cfg.general.save_dir, # project=cfg.general.project_name # ) logger = [tb_logger, comet_logger] trainer = pl.Trainer( logger=logger, early_stop_callback=early_stopping, checkpoint_callback=model_checkpoint, callbacks=[lr_logger], gradient_clip_val=0.5, **cfg.trainer, ) trainer.fit(model)
def predict(cfg: DictConfig) -> None: """ Run pytorch-lightning model Args: cfg: hydra config """ set_seed(cfg.training.seed) test_dataset = get_test_dataset(cfg) path = r'wheat\outputs\2020_05_06_09_32_36\saved_models\_ckpt_epoch_0.ckpt' model = LitWheat.load_from_checkpoint(checkpoint_path=path) model.eval() test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers, shuffle=False, collate_fn=collate_fn, ) detection_threshold = 0.5 results = [] for images, _, image_ids in test_loader: images = (image.to(cfg.general) for image in images) outputs = model(images) for i, _ in enumerate(images): boxes = outputs[i]['boxes'].data.cpu().numpy() scores = outputs[i]['scores'].data.cpu().numpy() boxes = boxes[scores >= detection_threshold].astype(np.int32) scores = scores[scores >= detection_threshold] image_id = image_ids[i] result = { 'image_id': image_id, 'PredictionString': format_prediction_string(boxes, scores) } results.append(result)
def run(cfg: DictConfig) -> None: """ Run pytorch-lightning model Args: cfg: hydra config """ set_seed(cfg.training.seed) hparams = flatten_omegaconf(cfg) model = LitWheat(hparams=hparams, cfg=cfg) early_stopping = pl.callbacks.EarlyStopping( **cfg.callbacks.early_stopping.params) model_checkpoint = pl.callbacks.ModelCheckpoint( **cfg.callbacks.model_checkpoint.params) lr_logger = pl.callbacks.LearningRateLogger() tb_logger = TensorBoardLogger(save_dir=cfg.general.save_dir) # comet_logger = CometLogger(save_dir=cfg.general.save_dir, # workspace=cfg.general.workspace, # project_name=cfg.general.project_name, # api_key=cfg.private.comet_api, # experiment_name=os.getcwd().split('\\')[-1]) json_logger = JsonLogger() trainer = pl.Trainer( logger=[ tb_logger, # comet_logger, json_logger ], early_stop_callback=early_stopping, checkpoint_callback=model_checkpoint, callbacks=[lr_logger], **cfg.trainer, ) trainer.fit(model) # save as a simple torch model model_name = os.getcwd().split('\\')[-1] + '.pth' print(model_name) torch.save(model.model.state_dict(), model_name)
def bootstrap_config(config_id: str, should_make_config_immutable: bool = True) -> ConfigType: """Prepare the config object Args: config_id (str): config_id to load should_make_config_immutable (bool, optional): Should the config object be immutable. Defaults to True. Returns: ConfigType: Config Object """ config = get_config( config_id, should_make_config_immutable=should_make_config_immutable) write_debug_message( f"Starting Experiment at {time.asctime(time.localtime(time.time()))}") write_debug_message(f"torch version = {torch.__version__}") # type: ignore set_seed(seed=config.general.seed) return config
def __init__(self, model, data, opt, loss_func, epochs, dev, logger, checkpointer, verbose=False, seed=42): """ The main class used to actually train models :param model: the model to train :param data: data to use (Of type Data from this package) :param opt: optimizer to use :param loss_func: loss function to use :param epochs: number of epochs to train for :param dev: what device to use :param logger: a logger to record training :param checkpointer: a checkpointer to save the trained (and mid training) model(s) :param verbose: the verbosity of training :param seed: the seed to use for reproducibility """ set_seed(seed) self.train_dl = data.get_train_data() self.val_dl = data.get_val_data() self.loss_func = loss_func model = model.to(dev) self.logger = logger self.verbose = verbose for epoch in range(epochs): self.train_model(epoch, model, opt) self.evaluate_model(epoch, model) self.logger.print_epoch(epoch) checkpointer.save(epoch, model, opt) if logger.check_early_stopping(): break checkpointer.save_override(-1, model, add_tag="FINAL")
def run(cfg: DictConfig): """ Run pytorch-lightning model Args: cfg: hydra config Returns: """ set_seed(cfg.training.seed) model = LitBCDI(hparams=cfg) early_stopping = pl.callbacks.EarlyStopping( **cfg.callbacks.early_stopping.params) model_checkpoint = pl.callbacks.ModelCheckpoint( **cfg.callbacks.model_checkpoint.params) tb_logger = TensorBoardLogger(save_dir=cfg.general.save_dir) comet_logger = CometLogger( save_dir=cfg.general.save_dir, workspace=cfg.general.workspace, project_name=cfg.general.project_name, # api_key=cfg.private.comet_api, experiment_name=os.getcwd().split('\\')[-1]) print(cfg.trainer) trainer = pl.Trainer( logger=[tb_logger, comet_logger], # early_stop_callback=early_stopping, checkpoint_callback=model_checkpoint, # nb_sanity_val_steps=0, gradient_clip_val=0.5, **cfg.trainer) trainer.fit(model) # save as a simple torch model model_name = "{os.getcwd().split('\\')[-1]}.pth" torch.save(model.model.state_dict(), model_name)
def run_multilabel_mnist(args, exp_dir): """ Run the multilabel mnist experiment with the given arguments. Args: args: Command line args. exp_dir: Directory in which the experiment will be stored. """ # Set seed globally set_seed(args.seed) use_cuda = args.cuda and torch.cuda.is_available() device = torch.device( "cuda:{}".format(args.cuda_device_id) if use_cuda else "cpu") logger.info("Main device: %s", device) # Get the mnist loader train_loader, test_loader = load_multi_mnist(n_labels=args.n_labels, canvas_size=args.canvas_size, seed=args.seed, args=args) # Retreive model model = get_model_by_tag(args.net, device, args, args.canvas_size**2, args.n_labels) # Disable track_running_stats in batchnorm according to # https://discuss.pytorch.org/t/performance-highly-degraded-when-eval-is-activated-in-the-test-phase/3323/12 for child in model.modules(): if type(child) == nn.BatchNorm2d or type(child) == nn.BatchNorm1d: child.track_running_stats = False logger.info("Number of paramters: %s", count_params(model)) # Define optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) # Scheduler for learning rate scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) writer = SummaryWriter(log_dir=os.path.join(exp_dir, "tb-log")) data = [] # Run epochs for epoch in range(1, args.epochs + 1): # Start counting after 20 epochs, that is, first lr reduction is at epoch 30 if epoch > 20: scheduler.step() # Run train train_multilabel(model, device, train_loader, optimizer, epoch, args.log_interval) # Evaluate model on train/test data train_loss, train_acc = evaluate_model_multilabel( model, device, train_loader, "Train", args.n_labels) test_loss, test_acc = evaluate_model_multilabel( model, device, test_loader, "Test", args.n_labels) data.append([epoch, train_acc, test_acc, train_loss, test_loss]) # Collect data collect_tensorboard_info(writer, model, epoch, train_acc, test_acc, train_loss, test_loss) column_names = [ "epoch", "train_acc", "test_acc", "train_loss", "test_loss" ] store_results(result_dir=exp_dir, dataset_name="mnist", column_names=column_names, data=data)
def __init__(self): # parameters set_seed(2020) if torch.cuda.is_available(): self.device = torch.device("cuda") else: # extremely slow if use cpu to train self.device = torch.device("cpu") # dataset self.train_dataset = UnalignedDataset( osp.join("dataset", FLAGS.dataset, "train"), image_size=FLAGS.image_size) self.train_loader = DataLoader(self.train_dataset, batch_size=FLAGS.batch_size, shuffle=True, num_workers=2) self.test_dataset = UnalignedDataset( osp.join("dataset", FLAGS.dataset, "test"), is_train=True, image_size=FLAGS.image_size) self.test_loader = DataLoader(self.test_dataset, batch_size=1, # use 1 for evaluatation shuffle=False) # replay buffer self.fake_A_pool = ImageClassPool(50) self.fake_B_pool = ImageClassPool(50) # model if FLAGS.model == "cyclegan": self.model = CycleGAN().to(self.device) init_weights(self.model, init_type="kaiming") elif FLAGS.model == "accyclegan": self.model = ACCycleGAN().to(self.device) init_weights(self.model, init_type="kaiming") # loss self.criterionGAN = GANLoss("lsgan").to(self.device) if FLAGS.loss == "L1": self.criterionCycle = torch.nn.L1Loss() self.criterionIdt = torch.nn.L1Loss() elif FLAGS.loss == "patch": self.criterionCycle = PatchLoss().to(self.device) self.criterionIdt = PatchLoss().to(self.device) # opt self.optimizer_G = torch.optim.Adam( itertools.chain( self.model.netG_A.parameters(), self.model.netG_B.parameters() ), lr=FLAGS.lr, betas=(0.5, 0.999)) self.optimizer_D_A = torch.optim.Adam(self.model.netD_A.parameters(), lr=FLAGS.lr, betas=(0.5, 0.999)) self.optimizer_D_B = torch.optim.Adam(self.model.netD_B.parameters(), lr=FLAGS.lr, betas=(0.5, 0.999)) self.schedulers = [ self.get_scheduler(opt) for opt in [self.optimizer_G, self.optimizer_D_A, self.optimizer_D_B] ] # logs self.run_name = (datetime.now().strftime("%Y-%m-%d-%H:%M:%S") + "-" + FLAGS.run_name) self.log_dir = osp.join(FLAGS.logdir, self.run_name) self.writer = SummaryWriter(log_dir=self.log_dir) self.save_dir = osp.join(self.log_dir, "model_states") os.makedirs(self.save_dir, exist_ok=True) # write params to summary self.writer.add_text('Text', dict2table(FLAGS.flag_values_dict()), 0)
from src.envs import * import src.agent.model as model from src.utils.utils import set_seed from src.utils.params import ParamScheduler seed = 735249652 set_seed(seed) # Environment env = Snake(num_par_inst=500) env.set_seed(seed) hyperparams = { # General "name": "noisy_25", "num_parallel_steps": 1000000, "seed": seed, "env": env, # Training and synchronization "learning_rate": ParamScheduler(init_value=0.0005, decay_mode="step", milestones=[5000000, 50000000], milestone_factor=0.4), "replay_period": 64,
def run_cifar(args, exp_dir): """ Run the experiment with a given percentage. Args: percentage (float): Percentage of training data available. args: Command line args. Returns: Tuple[float, float, float, float]: Train acc, Test acc, Train loss, Test loss. """ # Set seed globally set_seed(args.seed) cuda_device = "cuda:{}".format(args.cuda_device_id) use_cuda = args.cuda and torch.cuda.is_available() device = torch.device(cuda_device if use_cuda else "cpu") logger.info("Main device: %s", device) bs = args.batch_size # Get the cifar loader train_loader, test_loader = get_cifar_loader(n_labels=args.cifar, use_cuda=use_cuda, args=args) # Retreive model model = get_model_by_tag( in_features=32 * 32, tag=args.net, device=device, args=args, n_labels=args.cifar, in_channels=3, ) # Disable track_running_stats in batchnorm according to # https://discuss.pytorch.org/t/performance-highly-degraded-when-eval-is-activated-in-the-test-phase/3323/12 for child in model.modules(): if type(child) == nn.BatchNorm2d or type(child) == nn.BatchNorm1d: child.track_running_stats = False logger.info("Number of paramters: %s", count_params(model)) # Define optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) # Scheduler for learning rate gamma = 0.5 scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=gamma) writer = SummaryWriter(log_dir=os.path.join(exp_dir, "tb-log")) data = [] # Run epochs for epoch in range(1, args.epochs + 1): # Start counting after 10 epochs, that is, first lr reduction is at epoch 20 if epoch > 20: scheduler.step() # Run train train(model, device, train_loader, optimizer, epoch, args.log_interval) # Evaluate model on train/test data train_loss, train_acc = evaluate(model, device, train_loader, "Train") test_loss, test_acc = evaluate(model, device, test_loader, "Test") data.append([epoch, train_acc, test_acc, train_loss, test_loss]) # Collect data collect_tensorboard_info(writer, model, epoch, train_acc, test_acc, train_loss, test_loss) column_names = [ "epoch", "train_acc", "test_acc", "train_loss", "test_loss" ] store_results( result_dir=os.path.join(args.result_dir, args.experiment_name), dataset_name="cifar%s" % args.cifar, column_names=column_names, data=data, )
def run_for_percentage(percentage: float, args) -> Tuple[float, float, float, float]: """ Run the experiment with a given percentage. Args: percentage (float): Percentage of training data available. args: Command line args. Returns: Tuple[float, float, float, float]: Train acc, Test acc, Train loss, Test loss. """ use_cuda = args.cuda and torch.cuda.is_available() # Set seed globally set_seed(args.seed) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") bs = int(60000 * percentage / 100 * 1 / 10) logger.info("Current percentage: %.2f, Batch size: %s", percentage, bs) # Get the mnist loader train_loader, test_loader = get_mnist_subset(use_cuda=use_cuda, train_bs=bs, test_bs=args.test_batch_size, p=percentage) # Retreive model model = get_model_by_tag(args.net, device) # logger.info("Number of samples: {} ({}%)".format(n_samples, p)) logger.info("Number of paramters: %s", count_params(model)) # Define optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) # Scheduler for learning rate scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.5) data = [] # Run epochs for epoch in range(1, args.epochs + 1): scheduler.step() # Run train train(model, device, train_loader, optimizer, epoch, args.log_interval) # Evaluate model on train/test data # train_loss, train_acc = evaluate_model(model, device, train_loader, "Train") # test_loss, test_acc = evaluate_model(model, device, test_loader, "Test") # data.append([epoch, train_acc, test_acc, train_loss, test_loss]) # column_names = ["epoch", "train_acc", "test_acc", "train_loss", "test_loss"] # store_results( # result_dir=os.path.join(args.result_dir, args.experiment_name), # dataset_name="mnist-p={0:.2f}".format(percentage), # column_names=column_names, # data=data, # ) # Evaluate model on train/test data train_loss, train_acc = evaluate_model(model, device, train_loader, "Train") test_loss, test_acc = evaluate_model(model, device, test_loader, "Test") return percentage, train_acc, test_acc, train_loss, test_loss
help="automata path") parser.add_argument('--model_type', type=str, default='FSARNN', help='baseline MarryUp or FSARNN') args = parser.parse_args() args_bak = deepcopy(args) assert args.farnn in [0, 1] results = {} loggers = {} seed = args.seed set_seed(args.seed) if args.model_type == 'FSARNN': automata_path_forward, automata_path_backward = get_automata_from_seed( args_bak, seed) paths = (automata_path_forward, automata_path_backward) args.automata_path_forward = automata_path_forward args.automata_path_backward = automata_path_backward train_fsa_rnn(args, paths) elif args.model_type == 'Onehot': automata_path_forward, automata_path_backward = get_automata_from_seed( args_bak, seed) paths = (automata_path_forward, automata_path_backward) args.automata_path_forward = automata_path_forward args.automata_path_backward = automata_path_backward train_onehot(args, paths)
def run_multilabel_mnist(args): """ Run the experiment with a given percentage. Args: args: Command line args. Returns: Tuple[float, float, float, float]: Train acc, Test acc, Train loss, Test loss. """ use_cuda = args.cuda and torch.cuda.is_available() # Set seed globally set_seed(args.seed) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") bs = ARGS.batch_size # Get the mnist loader train_loader, test_loader = get_multilabel_mnist_loader( n_labels=ARGS.n_labels, use_cuda=use_cuda, args=args) # Retreive model model = get_model_by_tag(args.net, device) logger.info("Number of paramters: %s", count_params(model)) # Define optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) # Scheduler for learning rate scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) data = [] # Run epochs for epoch in tqdm(range(1, args.epochs + 1)): # Start counting after 10 epochs, that is, first lr reduction is at epoch 20 if epoch > 10: scheduler.step() # Run train train_multilabel(model, device, train_loader, optimizer, epoch, args.log_interval) # Evaluate model on train/test data train_loss, train_acc = evaluate_model_multilabel( model, device, train_loader, "Train", ARGS.n_labels) test_loss, test_acc = evaluate_model_multilabel( model, device, test_loader, "Test", ARGS.n_labels) data.append([epoch, train_acc, test_acc, train_loss, test_loss]) column_names = [ "epoch", "train_acc", "test_acc", "train_loss", "test_loss" ] store_results( result_dir=os.path.join(args.result_dir, args.experiment_name), dataset_name="mnist", column_names=column_names, data=data, ) # Evaluate model on train/test data train_loss, train_acc = evaluate_model_multilabel(model, device, train_loader, "Train", ARGS.n_labels) test_loss, test_acc = evaluate_model_multilabel(model, device, test_loader, "Test", ARGS.n_labels)
def __init__(self, path, augmented, total_amt=16384, val_percent=0.25, test_amt=768, wrapped_function=None, workers=0, device=torch.device('cpu'), batch_size=64, verbose=False, seed=42): """ An object to Take Data from a path and convert it to tensors :param path: Path to Data :param augmented: whether the data is augmented or not :param total_amt: the total amount of data to use for Training and Validation :param val_percent: the percentage of data to use for Validation :param test_amt: the amount of Data to set aside for Testing :param wrapped_function: a function to apply to the data if required :param workers: the number of workers to use :param device: the device to use :param batch_size: the batch size to use :param verbose: the verbosity to use :param seed: the seed to use for reproducibility """ self.device = device self.batch_size = batch_size self.workers = workers self.verbose = verbose self.seed = seed set_seed(self.seed) self.wrapped_function = lambda x, y: mount_to_device(x, y, self.device) if wrapped_function is not None: self.wrapped_function = lambda x, y: mount_to_device(*wrapped_function(x, y), self.device) self.train_files, self.val_files, self.test_files = [], [], [] if augmented: self.get_augmented_sets(path, total_amt, val_percent, test_amt) else: self.get_non_augmented_sets(path, total_amt, val_percent, test_amt) total = self.train_files + self.val_files + self.test_files if verbose: pt, nt = self.calc_distribution(self.train_files) pv, nv = self.calc_distribution(self.val_files) pte, nte = self.calc_distribution(self.test_files) print(f"Total Size = {len(total)}") print(f"Total size of Train = {len(self.train_files)} (pos = {pt}, neg = {nt})") print(f"Total size of Validation = {len(self.val_files)} (pos = {pv}, neg = {nv})") print(f"Total size of Test = {len(self.test_files)} (pos = {pte}, neg = {nte})") if augmented: for type in ["autocontrast", "equalize", "invert", "resized", "rotated"]: ltr = len([i for i in self.train_files if type in i]) print(f"# of {type} in Train = {ltr}") lv = len([i for i in self.val_files if type in i]) print(f"# of {type} in Validation = {lv}") lte = len([i for i in self.test_files if type in i]) print(f"# of {type} in Test = {lte}") print("Checking for duplicates...") if len(total) != len(set(total)): raise RuntimeError("Something has gone wrong! there are duplicates in data") else: if verbose: print("There are no duplicates in data!")
import os from copy import deepcopy # os.system('pip3 install -U torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html') # os.system('pip install numpy cython==0.29.24') # os.system('pip install POT==0.7.0') # os.system('pip install dill==0.3.4') # os.system('pip install tqdm==4.62.2 lightgbm==3.2.1') # os.system('pip install timm') import tensorflow as tf from src.utils import get_logger from src.utils.utils import set_seed set_seed(1234) LOGGER = get_logger('GLOBAL') gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) current_path = os.path.abspath(__file__) config_file_path = os.path.abspath( os.path.join(os.path.dirname(current_path), 'config.json'))
def run(cfg: DictConfig) -> None: """ Run pytorch-lightning model Args: new_dir: cfg: hydra config """ set_seed(cfg.training.seed) run_name = os.path.basename(os.getcwd()) cfg.callbacks.model_checkpoint.params.dirpath = Path( os.getcwd(), cfg.callbacks.model_checkpoint.params.dirpath).as_posix() callbacks = [] for callback in cfg.callbacks.other_callbacks: if callback.params: callback_instance = load_obj( callback.class_name)(**callback.params) else: callback_instance = load_obj(callback.class_name)() callbacks.append(callback_instance) loggers = [] if cfg.logging.log: for logger in cfg.logging.loggers: if 'experiment_name' in logger.params.keys(): logger.params['experiment_name'] = run_name loggers.append(load_obj(logger.class_name)(**logger.params)) callbacks.append(EarlyStopping(**cfg.callbacks.early_stopping.params)) callbacks.append(ModelCheckpoint(**cfg.callbacks.model_checkpoint.params)) trainer = pl.Trainer( logger=loggers, callbacks=callbacks, **cfg.trainer, ) dm = load_obj(cfg.datamodule.data_module_name)(cfg=cfg) dm.setup() model = load_obj(cfg.training.lightning_module_name)( cfg=cfg, tag_to_idx=dm.tag_to_idx) model._vectorizer = dm._vectorizer trainer.fit(model, dm) if cfg.general.save_pytorch_model: if cfg.general.save_best: best_path = trainer.checkpoint_callback.best_model_path # type: ignore # extract file name without folder save_name = os.path.basename(os.path.normpath(best_path)) model = model.load_from_checkpoint(best_path, cfg=cfg, tag_to_idx=dm.tag_to_idx, strict=False) model_name = Path(cfg.callbacks.model_checkpoint.params.dirpath, f'best_{save_name}'.replace('.ckpt', '.pth')).as_posix() torch.save(model.model.state_dict(), model_name) else: os.makedirs('saved_models', exist_ok=True) model_name = 'saved_models/last.pth' torch.save(model.model.state_dict(), model_name)
def run_trial(self, LR, BATCH_SIZE, OPTIM, LOSS): """ Initalise all over again for training on given Learning rate, Batch size, Optimizer and loss function This function basically just initialises everything and sends it to the fitmodel :param LR: Learning Rate :param BATCH_SIZE: Batch Size :param OPTIM: Optimiser :param LOSS: Loss Function :return: The final metric to optimise score from the training """ set_seed(self.seed) NAME = f"{self.name}_{str(LR).replace('.', '_')}" NAME += f"_{BATCH_SIZE}" model = self.model_class(**self.model_kwargs) opt = OPTIM(model.parameters(), lr=LR) opt_str = str(type(opt)).split("'")[-2].split(".")[-1] NAME += f"_{opt_str}" loss_func = LOSS() loss_func_str = str(type(loss_func)).split("'")[-2].split(".")[-1] NAME += f"_{loss_func_str}" data = Data(self.DATA_DIR, self.augmented, batch_size=BATCH_SIZE, total_amt=self.total_amt, val_percent=self.val_percent, test_amt=self.test_amt, wrapped_function=self.wrapped_function, workers=self.workers, device=self.device, verbose=self.verbose, seed=self.seed) logger = Logger(NAME, self.LOG_DIR, self.metrics_to_use, train_early_stopping=self.tres, test_early_stopping=self.tes, stopping_attention=self.es_attn, overwrite=self.overwrite, verbose=self.verbose) checkpointer = Checkpoint(NAME, self.CKP_DIR, self.save_every, overwrite=self.overwrite) FitModel(model, data, opt, loss_func, self.epochs, self.device, logger, checkpointer, verbose=self.verbose, seed=self.seed) met_final = logger.test_history[self.metric_to_optimise][-1] return NAME, met_final
:param batch_sizes: a list of batch sizes to try length >= 1 :param optimisers: a list of optimizers to try length >= 1 :param losses: a list of loss functions to try length >= 1 :param SAVE_EVERY: how often to save the models and optimizers :param EPOCHS: the number of epochs to train for :param DEVICE: the device to use :param wrapped_function: a wrapped function for data loading if needed :param WORKERS: the number of workers for dataloading to use :param verbose: whether to print the status of whats happening :param overwrite: whether to overwrite previous experiments with the same name :param seed: a seed for reproducibility """ self.augmented = augmented self.seed = seed set_seed(self.seed) self.name = Name self.model_class = model_class self.epochs = EPOCHS self.device = DEVICE self.workers = WORKERS self.wrapped_function = wrapped_function self.verbose = verbose self.overwrite = overwrite self.save_every = SAVE_EVERY self.metrics_to_use = metrics_to_use self.metric_to_optimise = metric_to_optimise
def run(cfg: DictConfig) -> None: """ Run pytorch-lightning model Args: new_dir: cfg: hydra config """ set_seed(cfg.training.seed) hparams = flatten_omegaconf(cfg) cfg.callbacks.model_checkpoint.params.filepath = os.getcwd( ) + cfg.callbacks.model_checkpoint.params.filepath callbacks = [] for callback in cfg.callbacks.other_callbacks: if callback.params: callback_instance = load_obj( callback.class_name)(**callback.params) else: callback_instance = load_obj(callback.class_name)() callbacks.append(callback_instance) loggers = [] if cfg.logging.log: for logger in cfg.logging.loggers: loggers.append(load_obj(logger.class_name)(**logger.params)) callbacks.append(EarlyStopping(**cfg.callbacks.early_stopping.params)) trainer = pl.Trainer( logger=loggers, checkpoint_callback=ModelCheckpoint( **cfg.callbacks.model_checkpoint.params), callbacks=callbacks, **cfg.trainer, ) dm = load_obj(cfg.datamodule.data_module_name)(hparams=hparams, cfg=cfg) dm.setup() model = load_obj(cfg.training.lightning_module_name)( hparams=hparams, cfg=cfg, tag_to_idx=dm.tag_to_idx) model._vectorizer = dm._vectorizer # dm = load_obj(cfg.datamodule.data_module_name)(hparams=hparams, cfg=cfg) trainer.fit(model, dm) if cfg.general.save_pytorch_model: if cfg.general.save_best: best_path = trainer.checkpoint_callback.best_model_path # type: ignore # extract file name without folder and extension save_name = best_path.split('/')[-1][:-5] model = model.load_from_checkpoint(best_path, hparams=hparams, cfg=cfg, tag_to_idx=dm.tag_to_idx, strict=False) model_name = f'saved_models/{save_name}.pth' torch.save(model.model.state_dict(), model_name) else: os.makedirs('saved_models', exist_ok=True) model_name = 'saved_models/last.pth' torch.save(model.model.state_dict(), model_name)
from sklearn.metrics import classification_report import pickle # customized modules from src.utils.utils import set_seed from src.config.config import set_arguments from src.models.models import * from src.training.training import * from src.data.get_dataloaders import * device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if __name__ == "__main__": # 1. setting config = set_arguments() set_seed(config.seeds) print("Use CUDA: {}".format(torch.cuda.is_available())) # 2. read data loader_dict = create_dataloaders(data_name=config.data_name, data_dir=os.path.join( config.root, "data"), batch_size=config.batch_size, class_a_size=config.class_a_size, class_a_index=config.class_a_index, class_b_size=config.class_b_size, class_b_index=config.class_b_index, seeds=config.seeds, download_cifar10=config.download_cifar10) train_loader = loader_dict["train_loader"] test_ab_loader = loader_dict["test_ab_loader"]