def init_fn(worker_id): set_seed(worker_id + self.cntr)
def __init__(self, config=None, name=None, n_epochs=None, seed=None, base_dir=None, globs=None, resume=None, ignore_resume_config=False, resume_save_types=("model", "optimizer", "simple", "th_vars", "results"), resume_reset_epochs=True, parse_sys_argv=False, parse_config_sys_argv=True, checkpoint_to_cpu=True, safe_checkpoint_every_epoch=1, use_visdomlogger=True, visdomlogger_kwargs=None, visdomlogger_c_freq=1, use_explogger=True, explogger_kwargs=None, explogger_c_freq=100, use_telegrammessagelogger=False, telegrammessagelogger_kwargs=None, telegrammessagelogger_c_freq=1000, append_rnd_to_name=False): # super(PytorchExperiment, self).__init__() Experiment.__init__(self) if parse_sys_argv: config_path, resume_path = get_vars_from_sys_argv() if config_path: config = config_path if resume_path: resume = resume_path self._config_raw = None if isinstance(config, str): self._config_raw = Config(file_=config, update_from_argv=parse_config_sys_argv) elif isinstance(config, Config): self._config_raw = Config(config=config, update_from_argv=parse_config_sys_argv) elif isinstance(config, dict): self._config_raw = Config(config=config, update_from_argv=parse_config_sys_argv) else: self._config_raw = Config(update_from_argv=parse_config_sys_argv) self.n_epochs = n_epochs if 'n_epochs' in self._config_raw: self.n_epochs = self._config_raw["n_epochs"] if self.n_epochs is None: self.n_epochs = 0 self._seed = seed if 'seed' in self._config_raw: self._seed = self._config_raw.seed if self._seed is None: random_data = os.urandom(4) seed = int.from_bytes(random_data, byteorder="big") self._config_raw.seed = seed self._seed = seed self.exp_name = name if 'name' in self._config_raw: self.exp_name = self._config_raw["name"] if append_rnd_to_name: rnd_str = ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(5)) self.exp_name += "_" + rnd_str if 'base_dir' in self._config_raw: base_dir = self._config_raw["base_dir"] self._checkpoint_to_cpu = checkpoint_to_cpu self._safe_checkpoint_every_epoch = safe_checkpoint_every_epoch self.results = dict() # Init loggers logger_list = [] self.vlog = None if use_visdomlogger: if visdomlogger_kwargs is None: visdomlogger_kwargs = {} self.vlog = PytorchVisdomLogger(name=self.exp_name, **visdomlogger_kwargs) if visdomlogger_c_freq is not None and visdomlogger_c_freq > 0: logger_list.append((self.vlog, visdomlogger_c_freq)) self.elog = None if use_explogger: if explogger_kwargs is None: explogger_kwargs = {} self.elog = PytorchExperimentLogger(base_dir=base_dir, experiment_name=self.exp_name, **explogger_kwargs) if explogger_c_freq is not None and explogger_c_freq > 0: logger_list.append((self.elog, explogger_c_freq)) # Set results log dict to the right path self.results = ResultLogDict("results-log.json", base_dir=self.elog.result_dir) self.tlog = None if use_telegrammessagelogger: if telegrammessagelogger_kwargs is None: telegrammessagelogger_kwargs = {} self.tlog = TelegramMessageLogger(**telegrammessagelogger_kwargs, exp_name=self.exp_name) if telegrammessagelogger_c_freq is not None and telegrammessagelogger_c_freq > 0: logger_list.append((self.tlog, telegrammessagelogger_c_freq)) self.clog = CombinedLogger(*logger_list) set_seed(self._seed) # Do the resume stuff self._resume_path = None self._resume_save_types = resume_save_types self._ignore_resume_config = ignore_resume_config self._resume_reset_epochs = resume_reset_epochs if resume is not None: if isinstance(resume, str): if resume == "last": self._resume_path = os.path.join( base_dir, sorted(os.listdir(base_dir))[-1]) else: self._resume_path = resume elif isinstance(resume, PytorchExperiment): self._resume_path = resume.elog.base_dir if self._resume_path is not None and not self._ignore_resume_config: self._config_raw.update(Config(file_=os.path.join( self._resume_path, "config", "config.json")), ignore=list( map(lambda x: re.sub("^-+", "", x), sys.argv))) # self.elog.save_config(self.config, "config_pre") if globs is not None: zip_name = os.path.join(self.elog.save_dir, "sources.zip") SourcePacker.zip_sources(globs, zip_name) # Init objects in config self.config = Config.init_objects(self._config_raw) atexit.register(self.at_exit_func)
def model_run(patch_size, batch_size, odd_class, z, seed=123, log_var_std=0, n_epochs=5, model_h_size=(16, 32, 64, 256), exp_name="exp", folder_name="exp"): set_seed(seed) config = Config( patch_size=patch_size, batch_size=batch_size, odd_class=odd_class, z=z, seed=seed, log_var_std=log_var_std, n_epochs=n_epochs ) device = torch.device("cuda") datasets_common_args = { "batch_size": batch_size, "target_size": patch_size, "input_slice": [1, ], "add_noise": True, "mask_type": "gaussian", # 0.0, ## TODO "elastic_deform": False, "rnd_crop": True, "rotate": True, "color_augment": True, "add_slices": 0, } input_shape = ( datasets_common_args["batch_size"], 1, datasets_common_args["target_size"], datasets_common_args["target_size"]) train_set_args = { "base_dir": "hcp/", # "num_batches": 500, "slice_offset": 20, "num_processes": 8, } test_set_normal_args = { "base_dir": "brats17/", # "num_batches": 100, "do_reshuffle": False, "mode": "val", "num_processes": 2, "slice_offset": 20, "label_slice": 2, "only_labeled_slices": False, } test_set_unormal_args = { "base_dir": "brats17/", # "num_batches": 100, "do_reshuffle": False, "mode": "val", "num_processes": 2, "slice_offset": 20, "label_slice": 2, "only_labeled_slices": True, "labeled_threshold": 10, } test_set_all_args = { "base_dir": "brats17_test/", # "num_batches": 50, "do_reshuffle": False, "mode": "val", "num_processes": 2, "slice_offset": 20, "label_slice": 2, } train_loader = BrainDataSet(**datasets_common_args, **train_set_args) test_loader_normal = BrainDataSet(**datasets_common_args, **test_set_normal_args) test_loader_abnorm = BrainDataSet(**datasets_common_args, **test_set_unormal_args) test_loader_all = BrainDataSet(**datasets_common_args, **test_set_all_args) model = VAE(input_size=input_shape[1:], h_size=model_h_size, z_dim=z).to(device) optimizer = optim.Adam(model.parameters(), lr=1e-4) lr_scheduler = StepLR(optimizer, step_size=1) vlog = PytorchVisdomLogger(exp_name=exp_name) elog = PytorchExperimentLogger(base_dir=folder_name, exp_name=exp_name) elog.save_config(config, "config") for epoch in range(1, n_epochs + 1): train(epoch, model, optimizer, train_loader, device, vlog, elog, log_var_std) kl_roc, rec_roc, loss_roc, kl_pr, rec_pr, loss_pr, test_loss = test_slice(model, test_loader_normal, test_loader_abnorm, device, vlog, elog, input_shape, batch_size, log_var_std) with open(os.path.join(elog.result_dir, "results.json"), "w") as file_: json.dump({ "kl_roc": kl_roc, "rec_roc": rec_roc, "loss_roc": loss_roc, "kl_pr": kl_pr, "rec_pr": rec_pr, "loss_pr": loss_pr, }, file_, indent=4) elog.save_model(model, "vae") test_pixel(model, test_loader_all, device, vlog, elog, input_shape, batch_size, log_var_std) print("All done....")
def model_run(scaling, batch_size, odd_class, z, resize_data: bool, transform_image_range: bool, gpu_id: int, seed=123, log_var_std=0.0, n_epochs=25): set_seed(seed) transformation_list = [] if resize_data: size_of_image_side = 64 transformation_list.append( transforms.Resize((size_of_image_side, size_of_image_side))) else: size_of_image_side = 64 transformation_list.append( transforms.CenterCrop((size_of_image_side, size_of_image_side))) transformation_list.append(transforms.ToTensor()) if transform_image_range: transformation_list.append( transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])) transform_functions = transforms.Compose(transformation_list) config = Config(scaling=scaling, batch_size=batch_size, odd_class=odd_class, z=z, resize_data=resize_data, transform_image_range=transform_image_range, seed=seed, log_var_std=log_var_std, n_epochs=n_epochs, size_of_image_side=size_of_image_side) if gpu_id is not None: device = torch.device("cuda:{}".format(gpu_id)) else: # Use cpu device = torch.device("cpu") image_size = size_of_image_side * size_of_image_side * 3 input_shape = (batch_size, 3, size_of_image_side, size_of_image_side) model_h_size = (16, 32, 64, 256) output_activation = None if transform_image_range: output_activation = torch.nn.Tanh model = VAEConv(input_size=input_shape[1:], h_size=model_h_size, z_dim=z, output_activation=output_activation).to(device) # model = VAE(z=z, input_size=input_size).to(device) optimizer = optim.Adam(model.parameters(), lr=1e-4) lr_scheduler = StepLR(optimizer, step_size=1) kwargs = {'num_workers': 0, 'pin_memory': True} # train_set = ConcreteCracksDataset(root_dir="/home/pdeubel/PycharmProjects/data/Concrete-Crack-Images", train=True, # transform=transform_functions) # # test_set = ConcreteCracksDataset(root_dir="/home/pdeubel/PycharmProjects/data/Concrete-Crack-Images", train=False, # transform=transform_functions) train_set = ConcreteCracksDataset( root_dir="/cvhci/data/construction/Concrete-Cracks", train=True, transform=transform_functions) test_set = ConcreteCracksDataset( root_dir="/cvhci/data/construction/Concrete-Cracks", train=False, transform=transform_functions) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, **kwargs) # test_loader_abnorm = torch.utils.data.DataLoader(test_set, sampler=test_one_set, # batch_size=batch_size, shuffle=False, **kwargs) # vlog = PytorchVisdomLogger(exp_name="vae-concrete-cracks") vlog = None elog = PytorchExperimentLogger(base_dir="logs/concrete-cracks", exp_name="concrete-cracks_vae") elog.save_config(config, "config") for epoch in range(1, n_epochs + 1): train(epoch, model, optimizer, train_loader, device, scaling, vlog, elog, log_var_std) elog.save_model(model, "vae_concrete_crack") # kl_roc, rec_roc, loss_roc, kl_pr, rec_pr, loss_pr = test(model, test_loader, device, # scaling, vlog, elog, # image_size, batch_size, log_var_std) # with open(os.path.join(elog.result_dir, "results.json"), "w") as file_: # json.dump({ # "kl_roc": kl_roc, "rec_roc": rec_roc, "loss_roc": loss_roc, # "kl_pr": kl_pr, "rec_pr": rec_pr, "loss_pr": loss_pr, # }, file_, indent=4) test_loss, kl_loss, rec_loss = test(model, test_loader, device, scaling, vlog, elog, image_size, batch_size, log_var_std, size_of_image_side) with open(os.path.join(elog.result_dir, "results.json"), "w") as file_: json.dump( { "test_loss": test_loss, "kl_loss": kl_loss, "rec_loss": rec_loss }, file_, indent=4)
def __init__(self, config=None, name=None, n_epochs=None, seed=None, base_dir=None, globs=None, resume=None, ignore_resume_config=False, resume_save_types=("model", "optimizer", "simple", "th_vars", "results"), resume_reset_epochs=True, parse_sys_argv=False, checkpoint_to_cpu=True, save_checkpoint_every_epoch=1, explogger_kwargs=None, explogger_freq=1, loggers=None, append_rnd_to_name=False, default_save_types=("model", "optimizer", "simple", "th_vars", "results")): # super(PytorchExperiment, self).__init__() Experiment.__init__(self) # check for command line inputs for config_path and resume_path, # will be prioritized over config and resume! config_path_from_argv = None if parse_sys_argv: config_path_from_argv, resume_path_from_argv = get_vars_from_sys_argv( ) if resume_path_from_argv: resume = resume_path_from_argv # construct _config_raw if config_path_from_argv is None: self._config_raw = self._config_raw_from_input( config, name, n_epochs, seed, append_rnd_to_name) else: self._config_raw = Config(file_=config_path_from_argv) update_from_sys_argv(self._config_raw) # set a few experiment attributes self.n_epochs = self._config_raw["n_epochs"] self._seed = self._config_raw['seed'] set_seed(self._seed) self.exp_name = self._config_raw["name"] self._checkpoint_to_cpu = checkpoint_to_cpu self._save_checkpoint_every_epoch = save_checkpoint_every_epoch self._default_save_types = ("model", "optimizer", "simple", "th_vars", "results") self.results = dict() # get base_dir from _config_raw or store there if base_dir is not None: self._config_raw["base_dir"] = base_dir base_dir = self._config_raw["base_dir"] # Construct experiment logger (automatically activated if base_dir is there) self.loggers = {} logger_list = [] if base_dir is not None: if explogger_kwargs is None: explogger_kwargs = {} self.elog = PytorchExperimentLogger(base_dir=base_dir, exp_name=self.exp_name, **explogger_kwargs) if explogger_freq is not None and explogger_freq > 0: logger_list.append((self.elog, explogger_freq)) self.results = ResultLogDict("results-log.json", base_dir=self.elog.result_dir) else: self.elog = None # Construct other loggers if loggers is not None: for logger_name, logger_cfg in loggers.items(): _logger, log_freq = self._make_logger(logger_name, logger_cfg) self.loggers[logger_name] = _logger if log_freq is not None and log_freq > 0: logger_list.append((_logger, log_freq)) self.clog = CombinedLogger(*logger_list) # Set resume attributes and update _config_raw, # actual resuming is done automatically after setup in _setup_internal self._resume_path = None self._resume_save_types = resume_save_types self._ignore_resume_config = ignore_resume_config self._resume_reset_epochs = resume_reset_epochs if resume is not None: if isinstance(resume, str): if resume == "last": if base_dir is None: raise ValueError("resume='last' requires base_dir.") self._resume_path = os.path.join( base_dir, sorted(os.listdir(base_dir))[-1]) else: self._resume_path = resume elif isinstance(resume, PytorchExperiment): self._resume_path = resume.elog.base_dir if self._resume_path is not None and not self._ignore_resume_config: self._config_raw.update(Config(file_=os.path.join( self._resume_path, "config", "config.json")), ignore=list( map(lambda x: re.sub("^-+", "", x), sys.argv))) # Save everything we need to reproduce experiment if globs is not None and self.elog is not None: zip_name = os.path.join(self.elog.save_dir, "sources.zip") SourcePacker.zip_sources(globs, zip_name) # Init objects in config self.config = Config.init_objects(self._config_raw) atexit.register(self.at_exit_func)
def model_run(scaling, batch_size, odd_class, z, seed=123, log_var_std=0, n_epochs=25): set_seed(seed) config = Config(scaling=scaling, batch_size=batch_size, odd_class=odd_class, z=z, seed=seed, log_var_std=log_var_std, n_epochs=n_epochs) image_size = 784 input_size = image_size * scaling device = torch.device("cuda") def get_same_index(ds, label, invert=False): label_indices = [] for i in range(len(ds)): if invert: if ds[i][1] != label: label_indices.append(i) if not invert: if ds[i][1] == label: label_indices.append(i) return label_indices kwargs = {'num_workers': 1, 'pin_memory': True} train_set = datasets.FashionMNIST( '/home/david/data/datasets/fashion_mnist', train=True, download=True, transform=transforms.ToTensor()) test_set = datasets.FashionMNIST('/home/david/data/datasets/fashion_mnist', train=False, transform=transforms.ToTensor()) train_indices_zero = get_same_index(train_set, odd_class, invert=True) train_zero_set = torch.utils.data.sampler.SubsetRandomSampler( train_indices_zero) test_indices_zero = get_same_index(test_set, odd_class, invert=True) test_zero_set = torch.utils.data.sampler.SubsetRandomSampler( test_indices_zero) test_indices_ones = get_same_index(test_set, odd_class) test_one_set = torch.utils.data.sampler.SubsetRandomSampler( test_indices_ones) train_loader = torch.utils.data.DataLoader(train_set, sampler=train_zero_set, batch_size=batch_size, shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(test_set, sampler=test_zero_set, batch_size=batch_size, shuffle=False, **kwargs) test_loader_abnorm = torch.utils.data.DataLoader(test_set, sampler=test_one_set, batch_size=batch_size, shuffle=False, **kwargs) model = VAE(z=z, input_size=input_size).to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3) vlog = PytorchVisdomLogger(exp_name="vae-fmnist") elog = PytorchExperimentLogger( base_dir="/home/david/data/logs/mnist_exp_fin", exp_name="fashion-mnist_vae") elog.save_config(config, "config") for epoch in range(1, n_epochs + 1): train(epoch, model, optimizer, train_loader, device, scaling, vlog, elog, log_var_std) kl_roc, rec_roc, loss_roc, kl_pr, rec_pr, loss_pr = test( model, test_loader, test_loader_abnorm, device, scaling, vlog, elog, image_size, batch_size, log_var_std) with open(os.path.join(elog.result_dir, "results.json"), "w") as file_: json.dump( { "kl_roc": kl_roc, "rec_roc": rec_roc, "loss_roc": loss_roc, "kl_pr": kl_pr, "rec_pr": rec_pr, "loss_pr": loss_pr, }, file_, indent=4)