def test_pruning_callback() -> None: """Quantize model""" loaders = { "train": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), } model = nn.Sequential(Flatten(), nn.Linear(784, 512), nn.ReLU(), nn.Linear(512, 10)) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) runner = dl.SupervisedRunner() runner.train( model=model, callbacks=[dl.QuantizationCallback(logdir="./logs")], loaders=loaders, criterion=criterion, optimizer=optimizer, num_epochs=1, logdir="./logs", check=True, ) assert os.path.isfile("./logs/quantized.pth")
def main(): generator = nn.Sequential( # We want to generate 128 coefficients to reshape into a 7x7x128 map nn.Linear(128, 128 * 7 * 7), nn.LeakyReLU(0.2, inplace=True), Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(128, 1, (7, 7), padding=3), nn.Sigmoid(), ) discriminator = nn.Sequential( nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), GlobalMaxPool2d(), Flatten(), nn.Linear(128, 1), ) model = {"generator": generator, "discriminator": discriminator} optimizer = { "generator": torch.optim.Adam( generator.parameters(), lr=0.0003, betas=(0.5, 0.999) ), "discriminator": torch.optim.Adam( discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999) ), } loaders = { "train": DataLoader( MNIST( os.getcwd(), train=True, download=True, transform=ToTensor(), ), batch_size=32, ), } runner = CustomRunner() runner.train( model=model, optimizer=optimizer, loaders=loaders, callbacks=[ dl.OptimizerCallback( optimizer_key="generator", metric_key="loss_generator" ), dl.OptimizerCallback( optimizer_key="discriminator", metric_key="loss_discriminator" ), ], main_metric="loss_generator", num_epochs=20, verbose=True, logdir="./logs_gan", check=True, )
def __init__( self, # state_net: StateNet, head_net: ValueHead, ): super().__init__() # self.state_net = state_net self.observation_net = nn.Sequential( nn.Conv2d(12, 16, kernel_size=3), nn.Dropout2d(p=0.1), nn.LeakyReLU(), nn.Conv2d(16, 32, kernel_size=3, groups=4), nn.Dropout2d(p=0.1), nn.LeakyReLU(), nn.Conv2d(32, 64, kernel_size=3, groups=4), # Flatten() ) self.observation_net.apply( utils.create_optimal_inner_init(nn.LeakyReLU)) self.aggregation_net = nn.Sequential( Flatten(), nn.Linear(64, 64), nn.LayerNorm(64), nn.Dropout(p=0.1), nn.LeakyReLU(), ) self.aggregation_net.apply( utils.create_optimal_inner_init(nn.LeakyReLU)) self.head_net = head_net
def __init__( self, arch: str = "resnet18", pretrained: bool = True, frozen: bool = True, pooling: str = None, pooling_kwargs: dict = None, cut_layers: int = 2, state_dict: Union[dict, str, Path] = None, ): """ Args: arch: Name for resnet. Have to be one of resnet18, resnet34, resnet50, resnet101, resnet152 pretrained: If True, returns a model pre-trained on ImageNet frozen: If frozen, sets requires_grad to False pooling: pooling pooling_kwargs: params for pooling state_dict (Union[dict, str, Path]): Path to ``torch.Model`` or a dict containing parameters and persistent buffers. """ super().__init__() resnet = torchvision.models.__dict__[arch](pretrained=pretrained) if state_dict is not None: if isinstance(state_dict, (Path, str)): state_dict = torch.load(str(state_dict)) resnet.load_state_dict(state_dict) modules = list(resnet.children())[:-cut_layers] # delete last layers if frozen: for module in modules: utils.set_requires_grad(module, requires_grad=False) if pooling is not None: pooling_kwargs = pooling_kwargs or {} pooling_layer_fn = MODULE.get(pooling) pooling_layer = ( pooling_layer_fn( in_features=resnet.fc.in_features, **pooling_kwargs ) if "attn" in pooling.lower() else pooling_layer_fn(**pooling_kwargs) ) modules += [pooling_layer] if hasattr(pooling_layer, "out_features"): out_features = pooling_layer.out_features( in_features=resnet.fc.in_features ) else: out_features = None else: out_features = resnet.fc.in_features modules += [Flatten()] self.out_features = out_features self.encoder = nn.Sequential(*modules)
def test_mnist(): trainset = MNIST( "./data", train=False, download=True, transform=ToTensor(), ) testset = MNIST( "./data", train=False, download=True, transform=ToTensor(), ) loaders = { "train": DataLoader(trainset, batch_size=32), "valid": DataLoader(testset, batch_size=64), } model = nn.Sequential(Flatten(), nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10)) def objective(trial): lr = trial.suggest_loguniform("lr", 1e-3, 1e-1) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() runner = dl.SupervisedRunner() runner.train( model=model, loaders=loaders, criterion=criterion, optimizer=optimizer, callbacks=[ OptunaPruningCallback(trial), AccuracyCallback(num_classes=10), ], num_epochs=10, main_metric="accuracy01", minimize_metric=False, ) return runner.best_valid_metrics[runner.main_metric] study = optuna.create_study( direction="maximize", pruner=optuna.pruners.MedianPruner(n_startup_trials=1, n_warmup_steps=0, interval_steps=1), ) study.optimize(objective, n_trials=5, timeout=300) assert True
def __init__(self, features_dim: int): """ Args: features_dim: size of the output tensor """ super(SimpleConv, self).__init__() self._net = nn.Sequential( nn.Conv2d(1, 32, 3, 1), nn.ReLU(), nn.Conv2d(32, 64, 3, 1), nn.ReLU(), nn.MaxPool2d(2), Flatten(), nn.Linear(9216, 128), nn.ReLU(), nn.Linear(128, features_dim), Normalize(), )
def __init__(self, out_features: int, normalize: bool = True): """ Args: out_features: size of the output tensor """ super().__init__() layers = [ nn.Conv2d(1, 32, 3, 1), nn.ReLU(), nn.Conv2d(32, 64, 3, 1), nn.ReLU(), nn.MaxPool2d(2), Flatten(), nn.Linear(9216, 128), nn.ReLU(), nn.Linear(128, out_features), ] if normalize: layers.append(Normalize()) self._net = nn.Sequential(*layers)
def __init__(self, num_hidden1=128, num_hidden2=64): """ Args: num_hidden1: size of the first hidden representation num_hidden2: size of the second hidden representation """ super().__init__() self.conv_net = nn.Sequential( nn.Conv2d(1, 32, 3, 1), nn.ReLU(), nn.Conv2d(32, 64, 3, 1), nn.ReLU(), nn.MaxPool2d(2), Flatten(), ) self.linear_net = nn.Sequential( nn.Linear(9216, num_hidden1), nn.ReLU(), nn.Linear(num_hidden1, num_hidden2), Normalize(), ) self._net = nn.Sequential(self.conv_net, self.linear_net)
def __init__(self, out_features: int): """ Args: out_features: size of the output tensor """ super().__init__() layers = [ nn.Conv2d(1, 32, 3, 1), nn.LeakyReLU(), nn.BatchNorm2d(32), nn.Conv2d(32, 64, 3, 1), nn.LeakyReLU(), nn.MaxPool2d(2), Flatten(), nn.BatchNorm1d(9216), nn.Linear(9216, 128), nn.LeakyReLU(), nn.Linear(128, out_features), nn.BatchNorm1d(out_features), ] self._net = nn.Sequential(*layers)
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # latent_dim = 128 # generator = nn.Sequential( # # We want to generate 128 coefficients to reshape into a 7x7x128 map # nn.Linear(128, 128 * 7 * 7), # nn.LeakyReLU(0.2, inplace=True), # Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), # nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.Conv2d(128, 1, (7, 7), padding=3), # nn.Sigmoid(), # ) # discriminator = nn.Sequential( # nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # GlobalMaxPool2d(), # Flatten(), # nn.Linear(128, 1), # ) latent_dim = 32 generator = nn.Sequential( nn.Linear(latent_dim, 28 * 28), Lambda(_ddp_hack), nn.Sigmoid(), ) discriminator = nn.Sequential(Flatten(), nn.Linear(28 * 28, 1)) model = {"generator": generator, "discriminator": discriminator} criterion = { "generator": nn.BCEWithLogitsLoss(), "discriminator": nn.BCEWithLogitsLoss() } optimizer = { "generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)), "discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)), } loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } runner = CustomRunner(latent_dim) runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ dl.CriterionCallback( input_key="combined_predictions", target_key="labels", metric_key="loss_discriminator", criterion_key="discriminator", ), dl.CriterionCallback( input_key="generated_predictions", target_key="misleading_labels", metric_key="loss_generator", criterion_key="generator", ), dl.OptimizerCallback( model_key="generator", optimizer_key="generator", metric_key="loss_generator", ), dl.OptimizerCallback( model_key="discriminator", optimizer_key="discriminator", metric_key="loss_discriminator", ), ], valid_loader="train", valid_metric="loss_generator", minimize_valid_metric=True, num_epochs=1, verbose=False, logdir=logdir, ) if not isinstance(engine, dl.DistributedDataParallelEngine): runner.predict_batch(None)[0, 0].cpu().numpy()
def train_experiment(device): with TemporaryDirectory() as logdir: latent_dim = 128 generator = nn.Sequential( # We want to generate 128 coefficients to reshape into a 7x7x128 map nn.Linear(128, 128 * 7 * 7), nn.LeakyReLU(0.2, inplace=True), Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(128, 1, (7, 7), padding=3), nn.Sigmoid(), ) discriminator = nn.Sequential( nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), GlobalMaxPool2d(), Flatten(), nn.Linear(128, 1), ) model = {"generator": generator, "discriminator": discriminator} criterion = { "generator": nn.BCEWithLogitsLoss(), "discriminator": nn.BCEWithLogitsLoss() } optimizer = { "generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)), "discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)), } loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } class CustomRunner(dl.Runner): def predict_batch(self, batch): batch_size = 1 # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Decode them to fake images generated_images = self.model["generator"]( random_latent_vectors).detach() return generated_images def handle_batch(self, batch): real_images, _ = batch batch_size = real_images.shape[0] # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Decode them to fake images generated_images = self.model["generator"]( random_latent_vectors).detach() # Combine them with real images combined_images = torch.cat([generated_images, real_images]) # Assemble labels discriminating real from fake images labels = torch.cat([ torch.ones((batch_size, 1)), torch.zeros((batch_size, 1)) ]).to(self.device) # Add random noise to the labels - important trick! labels += 0.05 * torch.rand(labels.shape).to(self.device) # Discriminator forward combined_predictions = self.model["discriminator"]( combined_images) # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Assemble labels that say "all real images" misleading_labels = torch.zeros( (batch_size, 1)).to(self.device) # Generator forward generated_images = self.model["generator"]( random_latent_vectors) generated_predictions = self.model["discriminator"]( generated_images) self.batch = { "combined_predictions": combined_predictions, "labels": labels, "generated_predictions": generated_predictions, "misleading_labels": misleading_labels, } runner = CustomRunner() runner.train( engine=dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ dl.CriterionCallback( input_key="combined_predictions", target_key="labels", metric_key="loss_discriminator", criterion_key="discriminator", ), dl.CriterionCallback( input_key="generated_predictions", target_key="misleading_labels", metric_key="loss_generator", criterion_key="generator", ), dl.OptimizerCallback( model_key="generator", optimizer_key="generator", metric_key="loss_generator", ), dl.OptimizerCallback( model_key="discriminator", optimizer_key="discriminator", metric_key="loss_discriminator", ), ], valid_loader="train", valid_metric="loss_generator", minimize_valid_metric=True, num_epochs=1, verbose=False, logdir=logdir, ) runner.predict_batch(None)[0, 0].cpu().numpy()
nn.LeakyReLU(0.2, inplace=True), Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(128, 1, (7, 7), padding=3), nn.Sigmoid(), ) discriminator = nn.Sequential( nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), GlobalMaxPool2d(), Flatten(), nn.Linear(128, 1), ) model = {"generator": generator, "discriminator": discriminator} optimizer = { "generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)), "discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)), } loaders = { "train": DataLoader( MNIST(
# We want to generate 128 coefficients to reshape into a 7x7x128 map nn.Linear(128, 128 * 7 * 7), nn.LeakyReLU(0.2, inplace=True), Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(128, 1, (7, 7), padding=3), nn.Sigmoid(), ) discriminator = nn.Sequential( nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), GlobalMaxPool2d(), Flatten(), nn.Linear(128, 1)) model = {"generator": generator, "discriminator": discriminator} optimizer = { "generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)), "discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)), } loaders = { "train": DataLoader(MNIST(os.getcwd(), train=True, download=True,