def __init__( self, model: Model, train_loader: DataLoader, val_loader: DataLoader, max_epoch: int = 100, save_dir: str = "swa_trainer", checkpoint_path: str = None, device="cpu", config: dict = None, **kwargs, ) -> None: super().__init__( model, train_loader, val_loader, max_epoch, save_dir, checkpoint_path, device, config, **kwargs, ) # initialize swa_model. self.swa_model = Model.initialize_from_state_dict(model.state_dict()) self.swa_model.to(self.device) self.swa_n = 0
class TestTrainer(TestCase): def setUp(self) -> None: super().setUp() arch_dict = { "name": "clusternet6cTwoHead", "input_size": 24, "num_channel": 1, "output_k_A": 50, "output_k_B": 10, "num_sub_heads": 5, } optim_dict = {"name": "Adam"} scheduler_dict = { "name": "MultiStepLR", "milestones": [10, 20, 30, 40, 50, 60, 70, 80, 90], "gamma": 1, } self.model = Model(arch_dict, optim_dict, scheduler_dict) self.scheduler = RampScheduler(100, 500, 10, 1, -5) def test_save_trainer(self): for epoch in range(50): self.model.step() self.scheduler.step()
def setUp(self) -> None: super().setUp() self._model = Model(arch=deeplabv3_resnet101(pretrained=True)) self._model_ema = EMA_Model(Model(deeplabv3_resnet101(False)), alpha=0.9, weight_decay=1e-4) # self._model_ema._model.load_state_dict(self._model.state_dict()) self._device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self._img = (ToTensor()( Image.open("img1.jpg").convert("RGB")).unsqueeze(0).to( self._device)) self._model.to(self._device) self._model_ema.to(self._device)
def setUp(self) -> None: super().setUp() arch_dict = { "name": "clusternet6cTwoHead", "input_size": 24, "num_channel": 1, "output_k_A": 50, "output_k_B": 10, "num_sub_heads": 5, } optim_dict = {"name": "Adam"} scheduler_dict = { "name": "MultiStepLR", "milestones": [10, 20, 30, 40, 50, 60, 70, 80, 90], "gamma": 1, } self.model = Model(arch_dict, optim_dict, scheduler_dict) self.scheduler = RampScheduler(100, 500, 10, 1, -5)
class TestEMA(TestCase): def setUp(self) -> None: super().setUp() self._model = Model(arch=deeplabv3_resnet101(pretrained=True)) self._model_ema = EMA_Model(Model(deeplabv3_resnet101(False)), alpha=0.9, weight_decay=1e-4) # self._model_ema._model.load_state_dict(self._model.state_dict()) self._device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self._img = (ToTensor()( Image.open("img1.jpg").convert("RGB")).unsqueeze(0).to( self._device)) self._model.to(self._device) self._model_ema.to(self._device) def test_1(self): self._model.eval() self._model_ema.eval() with torch.no_grad(): student_prediction = self._model(self._img)["out"] plt.figure(1) plt.imshow(self._img[0].cpu().numpy().transpose(1, 2, 0)) plt.contour(student_prediction.max(1)[1].cpu().detach().numpy()[0]) plt.show(block=False) self._model_ema._global_step += 1 for i in range(1000): teacher_prediction = self._model_ema(self._img)["out"] plt.figure(2) plt.clf() plt.imshow(self._img[0].cpu().numpy().transpose(1, 2, 0)) plt.contour( teacher_prediction.max(1)[1].cpu().detach().numpy()[0]) plt.show(block=False) plt.pause(0.00000003) self._model_ema.step(self._model)
def _gaussian_regularization(self, model: Model, tf1_images, tf1_pred_simplex: List[Tensor], head_name="B") -> Tensor: """ calculate predicton simplexes on gaussian noise tf1 images and the kl div of the original prediction simplex. :param tf1_images: tf1-transformed images :param tf1_pred_simplex: simplex list of tf1-transformed image prediction :return: loss """ _tf1_images_gaussian = self.gaussian_adder(tf1_images) _tf1_gaussian_simplex = model.torchnet(_tf1_images_gaussian, head=head_name) assert assert_list(simplex, tf1_pred_simplex) assert assert_list(simplex, _tf1_gaussian_simplex) assert tf1_pred_simplex.__len__() == _tf1_gaussian_simplex.__len__() reg_loss = [] for __tf1_simplex, __tf1_gaussian_simplex in zip( tf1_pred_simplex, _tf1_gaussian_simplex): reg_loss.append( self.kl_div(__tf1_gaussian_simplex, __tf1_simplex.detach())) return sum(reg_loss) / len(reg_loss) # type: ignore
assert Trainer, config.get("Trainer").get("name") return Trainer DEFAULT_CONFIG = "config_CIFAR.yaml" merged_config = ConfigManger(DEFAULT_CONFIG_PATH=DEFAULT_CONFIG, verbose=True, integrality_check=True).config train_loader_A, train_loader_B, val_loader = get_dataloader( **merged_config["DataLoader"]) # create model: model = Model( arch_dict=merged_config["Arch"], optim_dict=merged_config["Optim"], scheduler_dict=merged_config["Scheduler"], ) model = to_Apex(model, opt_level=None, verbosity=0) Trainer = get_trainer(merged_config) clusteringTrainer = Trainer(model=model, train_loader_A=train_loader_A, train_loader_B=train_loader_B, val_loader=val_loader, config=merged_config, **merged_config["Trainer"]) clusteringTrainer.start_training() clusteringTrainer.clean_up()
gdl_loss = self.abs_loss(orig_gradient_x, pred_gradient_x) + self.abs_loss( orig_gradient_y, pred_gradient_y ) return self.mse(pred, gt) + self.gdl_weight * gdl_loss img_transform = transforms.Compose( [ transforms.ToTensor(), # transforms.Normalize((0.5), (0.5)) ] ) dataset = MNIST(DATA_PATH, transform=img_transform) dataloader = DataLoader(dataset, batch_size=128, shuffle=True) model = Model() model.torchnet = autoencoder() model.optimizer = torch.optim.Adam( model.torchnet.parameters(), lr=1e-3, weight_decay=1e-5 ) config = ConfigManger().parsed_args if config["loss"] == "mse": criterion = nn.MSELoss() elif config["loss"] == "gdl": criterion = gradient_difference_loss(config["weight"]) trainer = MNISTTrainer( model=model, train_loader=dataloader, val_loader=dataloader,
**config["DataLoader"]) datainterface.drop_last = True train_loader = datainterface.ParallelDataLoader( default_mnist_img_transform["tf1"], default_mnist_img_transform["tf2"], default_mnist_img_transform["tf2"], default_mnist_img_transform["tf2"], default_mnist_img_transform["tf2"], default_mnist_img_transform["tf2"], ) datainterface.split_partitions = ["val"] datainterface.drop_last = False val_loader = datainterface.ParallelDataLoader( default_mnist_img_transform["tf3"]) model = Model(config["Arch"], config["Optim"], config["Scheduler"]) assert config["Trainer"]["name"] in ( "IIC", "IIC_enhance", "IIC_adv_enhance", "IMSAT", "IMSAT_enhance", ) if config["Trainer"]["name"] == "IMSAT": # MI(x,p) + CE(p,adv(p)) or MI(x,p) + CE(p,geom(p)) Trainer = IMSAT_Trainer elif config["Trainer"]["name"] == "IMSAT_enhance": # MI(x,p) + CE(p,adv(p)) + CE(p,geom(p)) Trainer = IMSAT_Enhanced_Trainer elif config["Trainer"]["name"] == "IIC":
img_transforms["tf1"], img_transforms["tf2"], img_transforms["tf2"], img_transforms["tf2"], img_transforms["tf2"], ) val_loader = DatasetInterface( split_partitions=val_split_partition, **merged_config["DataLoader"]).ParallelDataLoader( img_transforms["tf3"]) return train_loader_A, train_loader_B, val_loader train_loader_A, train_loader_B, val_loader = get_dataloader(merged_config) # create model: model = Model( arch_dict=merged_config.get("Arch"), optim_dict=merged_config.get("Optim"), scheduler_dict=merged_config.get("Scheduler"), ) trainer = MixUpTrainer(model=model, train_loader_A=train_loader_A, train_loader_B=train_loader_B, val_loader=val_loader, config=merged_config, **merged_config.get("Trainer")) trainer.start_training() trainer.clean_up()
val_transform=val_transform, dataloader_params=dataloader_params, ) prior = get_prior_from_dataset(unlabeled_loader.dataset) print("prior for unlabeled dataset", prior) # network part import warnings fix_all_seed(int(config.get("Seed", 0))) with warnings.catch_warnings(): warnings.filterwarnings("ignore") net = SimpleNet(1, len(unlabeled_class_sample_nums)) optim = RAdam(net.parameters(), lr=1e-4, weight_decay=1e-4) scheduler = MultiStepLR(optim, milestones=[50, 80], gamma=0.2) model = Model() model.torchnet = net model.optimizer = optim model.scheduler = scheduler # trainer part Trainer = { "SemiTrainer": SemiTrainer, "SemiEntropyTrainer": SemiEntropyTrainer, "SemiPrimalDualTrainer": SemiPrimalDualTrainer, "SemiWeightedIICTrainer": SemiWeightedIICTrainer, "SemiUDATrainer": SemiUDATrainer, }.get(config["Trainer"]["name"]) assert Trainer trainer = Trainer(
from deepclustering.manager import ConfigManger from deepclustering.model import Model, to_Apex from arch import _register_arch from data import get_dataloader from scheduler import CustomScheduler from trainer import AdaNetTrainer, VAT_Trainer _ = _register_arch # to enable the network registration DEFAULT_CONFIG_PATH = "config.yaml" config = ConfigManger(DEFAULT_CONFIG_PATH, verbose=True, integrality_check=False).config model = Model(config.get("Arch"), config.get("Optim"), config.get("Scheduler")) model = to_Apex(model, opt_level=None) label_loader, unlabel_loader, val_loader = get_dataloader( config["DataLoader"].get("name"), config["DataLoader"].get("aug", False), config.get("DataLoader"), ) scheduler = CustomScheduler(max_epoch=config["Trainer"]["max_epoch"]) assert config["Trainer"].get("name") in ("vat", "ada") Trainer = VAT_Trainer if config["Trainer"]["name"].lower( ) == "vat" else AdaNetTrainer trainer = Trainer(model=model, labeled_loader=label_loader, unlabeled_loader=unlabel_loader, val_loader=val_loader,