def pck_callback(root, data_in, data_out, config): predicted_poses_file = inference_callback(root, data_in, data_out, config) callback_config = config.get("alphapose_pck_callback") true_poses_file = callback_config["true_poses_file"] distance_threshold = callback_config["distance_threshold"] keypoints_to_use = callback_config["keypoints_to_use"] joint_order = callback_config["joint_order"] if isinstance(distance_threshold, float) or isinstance( distance_threshold, int): pck = pck_from_posefiles( true_poses_file, predicted_poses_file, distance_threshold, keypoints_to_use, joint_order, ) logger = get_logger("Alphapose PCK Callback") logger.info(f"PCK@{distance_threshold} : {pck * 100: .02f}%") out = {f"pck@{distance_threshold}": pck} elif isinstance(distance_threshold, list): out = {} for d in distance_threshold: pck = pck_from_posefiles(true_poses_file, predicted_poses_file, d) logger = get_logger("Alphapose PCK Callback") logger.info(f"PCK@{d} : {pck * 100: .02f}%") out[f"pck@{d}"] = pck return out
def __init__(self, config): super(WGAN, self).__init__() if "debug_log_level" in config and config["debug_log_level"]: LogSingleton.set_log_level("debug") # get logger and config self.logger = get_logger("VAE_Model") self.config = config set_random_state(self.config) self.logger.info("WGAN_GradientPenalty init model.") if self.config["model_type"] == "face": self.C = 3 if self.config["model_type"] == "sketch": self.C = 1 self.netG = WGAN_Generator(self.C) self.netD = WGAN_Discriminator(input_channels=self.C) # WGAN values from paper self.b1 = 0.5 self.b2 = 0.999 self.learning_rate = config["learning_rate"] self.batch_size = self.config["batch_size"] # WGAN_gradient penalty uses ADAM self.d_optimizer = optim.Adam(self.D.parameters(), lr=self.learning_rate, betas=(self.b1, self.b2)) self.g_optimizer = optim.Adam(self.G.parameters(), lr=self.learning_rate, betas=(self.b1, self.b2)) self.generator_iters = self.config["num_steps"] self.critic_iter = 5 self.lambda_term = 10
def __init__(self, config, root, model, *args, **kwargs): super().__init__(config, root, model, *args, **kwargs) assert config[ "model"] == "model.gan.DCGAN", "This iterator only supports the model: model.gan.DCGAN" self.logger = get_logger("Iterator") # export to the right gpu if specified in the config self.device = set_gpu(config) self.logger.debug(f"Model will pushed to the device: {self.device}") # get the config and the logger self.config = config set_random_state(self.config["random_seed"]) self.batch_size = config['batch_size'] # Log the architecture of the model self.logger.debug(f"{model}") self.model = model.to(self.device) self.optimizer_G = torch.optim.Adam(self.model.netG.parameters(), lr=self.config["learning_rate"], betas=(.5, .999)) D_lr_factor = self.config["optimization"][ "D_lr_factor"] if "D_lr_factor" in config["optimization"] else 1 self.optimizer_D = torch.optim.Adam(self.model.netD.parameters(), lr=D_lr_factor * self.config["learning_rate"], betas=(.5, .999)) self.real_labels = torch.ones(self.batch_size, device=self.device) self.fake_labels = torch.zeros(self.batch_size, device=self.device) self.wasserstein = bool( self.config["losses"]['adversarial_loss'] == 'wasserstein')
def __init__(self, config): super().__init__() self.config = config self.logger = get_logger(self.__class__.__name__) n_down = retrieve(config, "Model/n_down") z_dim = retrieve(config, "Model/z_dim") in_channels = retrieve(config, "Model/in_channels") mid_channels = retrieve(config, "Model/mid_channels", default=in_channels) use_bn = retrieve(config, "Model/use_bn", default=False) self.be_deterministic = retrieve(config, "Model/be_deterministic", default=False) self.encoder = BasicFullyConnectedNet(dim=in_channels, depth=n_down, hidden_dim=mid_channels, out_dim=in_channels, use_bn=use_bn) self.mu_layer = BasicFullyConnectedNet(in_channels, depth=n_down, hidden_dim=mid_channels, out_dim=z_dim, use_bn=use_bn) self.logvar_layer = BasicFullyConnectedNet(in_channels, depth=n_down, hidden_dim=mid_channels, out_dim=z_dim, use_bn=use_bn) self.decoder = BasicFullyConnectedNet(dim=z_dim, depth=n_down + 1, hidden_dim=mid_channels, out_dim=in_channels, use_bn=use_bn)
def inference_callback(root, data_in, data_out, config): logger = get_logger("Alphapose Callback") callback_config = config.get("alphapose_callback") alphapose_config = callback_config.get("config") checkpoint = callback_config.get("checkpoint") cwd = callback_config.get("alphapose_dir") pythonpath = callback_config.get("alphapose_python") infer_script = callback_config.get("infer_script") outdir = callback_config.get("outdir") indir = callback_config.get("indir") outdir = os.path.abspath(os.path.join(root, outdir)) indir = os.path.abspath(os.path.join(root, indir)) os.makedirs(outdir, exist_ok=True) # input_files = REGEX command_string = f"{pythonpath} {infer_script} --cfg {alphapose_config} --checkpoint {checkpoint} --indir {indir} --outdir {outdir} --save_img --detector yolo --vis_fast" logger.info("Running command") for c in command_string.split(" "): logger.info(c) subprocess.call(command_string, shell=True, cwd=cwd) results_file = os.path.join(root, outdir, "alphapose-results.json") return results_file
def __init__(self, config): super().__init__(config=config) self.logger = get_logger(self.__class__.__name__) model = self.prepare(model_as_str=self.model_config["model"], checkpoint=retrieve(self.model_config, "checkpoint", default="none"), ).model normalize = torchvision.transforms.Normalize(mean=self.mean, std=self.std) self.image_transform = torchvision.transforms.Compose([ torchvision.transforms.Lambda(lambda image: F.interpolate(image, size=(227, 227), mode="bilinear")), torchvision.transforms.Lambda(lambda image: torch.stack([normalize(rescale(x)) for x in image])) ]) # prepare the model for analysis with variable layer indices. This needs to be handcrafted for any # other model you may want to choose -- see the ResNet and SqueezeNet models for more examples self.layers = nn.ModuleList() for layer in model.features: self.layers.append(layer) self.layers.append(model.avgpool) self.layers.append(Flatten(1)) for layer in model.classifier: self.layers.append(layer) if retrieve(config, "append_softmax", default=True): self.layers.append(nn.Softmax()) assert len(self.layers) == 23 #self.logger.info("Layer Information: \n {}".format(self.layers)) del model # don't need this hanging around
def __init__(self, min_channels, max_channels, in_channels, block_activation=nn.ReLU(), batch_norm=False, drop_rate=None, bias=True): """This is the constructor for a custom content encoder. Args: min_channels (int): Channel dimension after the first convolution is applied. max_channels (int): Channel dimension is double after every convolutional block up to the value 'max_channels'. in_channels (int): Channel dimension of the input image. block_activation (torch.nn module, optional): Activation function used in the convolutional blocks. Defaults to nn.ReLU(). batch_norm (bool, optional): Normalize over the batch size. Defaults to False. drop_rate (float, optional): Dropout rate for the convolutions. Defaults to None, corresponding to no dropout. bias (bool, optional): If the convolutions use a bias. Defaults to True. """ super(Content_Encoder, self).__init__() self.logger = get_logger("Content_Encoder") # create a list with all channel dimensions throughout the encoder. layers = [] channel_numbers = [in_channels] + list(2 ** np.arange(np.log2(min_channels), np.log2(max_channels+1)).astype(np.int)) # get all convolutional blocks with corresponding parameters for i in range(len(channel_numbers)-1): stride = 1 if i == 0 else 2 in_ch = channel_numbers[i] out_ch = channel_numbers[i+1] # add convolution layers.append(Conv2dBlock(in_ch, out_ch, block_activation, batch_norm, drop_rate, bias, stride=stride)) layers.append(nn.InstanceNorm2d(out_ch)) # save all blocks to the class instance self.main = nn.Sequential(*layers) self.logger.debug("Content Encoder channel sizes: {}".format(channel_numbers))
def __init__(self, config, root, model, *args, **kwargs): super().__init__(config, root, model, *args, **kwargs) self.logger = get_logger("Iterator") assert config["model"] in [ "model.vae_gan.VAE_GAN", "model.vae_gan.VAE_WGAN" ], "This Iterator only supports the VAE GAN models: VAE_GAN and VAE_WGAN." # export to the right gpu if specified in the config self.device = set_gpu(config) self.logger.debug(f"Model will pushed to the device: {self.device}") # get the config and the logger self.config = config set_random_state(random_seed=self.config["random_seed"]) self.batch_size = config['batch_size'] # Config will be tested inside the Model class even for the iterator # Log the architecture of the model self.logger.debug(f"{model}") self.model = model.to(self.device) self.optimizer_G = torch.optim.Adam(itertools.chain( self.model.netG.parameters()), lr=self.config["learning_rate"]) D_lr_factor = self.config["optimization"][ "D_lr_factor"] if "D_lr_factor" in config["optimization"] else 1 self.optimizer_D = torch.optim.Adam(self.model.netD.parameters(), lr=D_lr_factor * self.config["learning_rate"]) self.real_labels = torch.ones(self.batch_size, device=self.device) self.fake_labels = torch.zeros(self.batch_size, device=self.device)
def __init__(self, config): super().__init__(config=config) self.logger = get_logger(self.__class__.__name__) normalize = torchvision.transforms.Normalize(mean=self.mean, std=self.std) self.image_transform = torchvision.transforms.Compose([ torchvision.transforms.Lambda(lambda image: F.interpolate(image, size=(224, 224), mode="bilinear")), torchvision.transforms.Lambda(lambda image: torch.stack([normalize(rescale(x)) for x in image])) ]) model = self.prepare(model_as_str=self.model_config["model"], checkpoint=retrieve(self.model_config, "checkpoint", default="none"), pretrained_key=retrieve(self.model_config, "pretrained_key", default="none") ).model self.layers = nn.ModuleList() # input: index 0 self.layers.append(model.conv1) # 1 self.layers.append(model.bn1) # 2 self.layers.append(model.relu) # 3 self.layers.append(model.maxpool) # 4 self.layers.append(model.layer1) # 5 self.layers.append(model.layer2) # 6 self.layers.append(model.layer3) # 7 self.layers.append(model.layer4) # 8 self.layers.append(model.avgpool) # 9 self.layers.append(Flatten(1)) # 10 self.layers.append(model.fc) # 11 if retrieve(config, "append_softmax", default=True): self.logger.info("Note: Appending Softmax as last layer in classifier.") self.layers.append(nn.Softmax()) # 12
def __init__(self, config, train=False): """Initialize the dataset to load training or validation images according to the config.yaml file. :param DatasetMixin: This class inherits from this class to enable a good workflow through the framework edflow. :param config: This config is loaded from the config.yaml file which specifies all neccesary hyperparameter for to desired operation which will be executed by the edflow framework. """ # Create Logging for the Dataset if "debug_log_level" in config and config["debug_log_level"]: LogSingleton.set_log_level("debug") self.logger = get_logger("Dataset") self.config = config self.data_types = self.setup_data_types() self.data_roots = self.get_data_roots() # Load parameters from config self.set_image_transforms() self.set_random_state() self.no_encoder = self.config["model"] in [ "model.gan.DCGAN", "model.gan.WGAN" ] # Yet a bit sloppy but ok self.sketch_data = self.load_sketch_data() self.indices = self.load_indices(train)
def __init__(self, config): super().__init__(config["dataroot"]) self.config = config self.crop = crop self.logger = get_logger(self) # works if dataroot like "VOC2011/cats_meta" self.animal = config["dataroot"].split("/")[1].split("_")[0]
def __init__(self, config, root, model, *args, **kwargs): """Initialise all important parameters of the iterator.""" super().__init__(config, root, model, *args, **kwargs) assert config[ "model_type"] != "sketch2face", "This iterator does not support sketch2face models only single GAN models supported." assert config[ "model"] == "model.vae_gan.VAE_WGAN", "This iterator only supports the model: model.vae_gan.VAE_WGAN" # get the config and the logger self.config = config self.logger = get_logger("Iterator") set_random_state(self.config["random_seed"]) # Check if cuda is available self.device = set_gpu(self.config) self.logger.debug(f"Model will pushed to the device: {self.device}") # Log the architecture of the model self.logger.debug(f"{model}") self.model = model.to(self.device) # save important constants self.learning_rate = self.config["learning_rate"] self.batch_size = self.config["batch_size"] self.critic_iter = self.config["losses"][ "update_disc"] if "update_disc" in self.config["losses"] else 5 # WGAN values from paper b1, b2 = 0.5, 0.999 # use ADAM optimizer self.optimizer_G = torch.optim.Adam(self.model.netG.parameters(), lr=self.learning_rate, betas=(b1, b2)) # check if there is a different learning rate for the discriminators D_lr_factor = self.config["optimization"][ "D_lr_factor"] if "D_lr_factor" in config["optimization"] else 1 self.optimizer_D = torch.optim.Adam(self.model.netD.parameters(), lr=self.learning_rate * D_lr_factor, betas=(b1, b2))
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.logger = get_logger(self) # loss and optimizer self.optimizer = optim.Adam(self.model.parameters(), lr=self.config["lr"]) # Initialize Loss functions self.mse_loss = torch.nn.MSELoss() # self.mse_instance = MSELossInstances() # self.l1_instance = L1LossInstances() self.cuda = True if self.config["cuda"] and torch.cuda.is_available() else False self.device = "cuda" if self.cuda else "cpu" # Imagenet Mean self.mean = [0.485, 0.456, 0.406] self.std = [0.229, 0.224, 0.225] self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # self.normalize = torchvision.transforms.Normalize(mean=self.mean, std=self.std) if self.cuda: self.model.cuda() self.freeze_encoder() # vgg loss if self.config["losses"]["vgg"]: self.vggL1 = VGGLossWithL1(gpu_ids=[0], l1_alpha=self.config["losses"]["vgg_l1_alpha"], vgg_alpha=self.config["losses"]["vgg_alpha"]).to(self.device) # initalize perceptual loss if possible if self.config["losses"]["perceptual"]: net = self.config["losses"]["perceptual_network"] assert net in ["alex", "squeeze", "vgg"], f"Perceptual network needs to be 'alex', 'squeeze' or 'vgg', got {net}" self.perceptual_loss = PerceptualLoss(model='net-lin', net=net, use_gpu=self.cuda, spatial=False).to( self.device)
def __init__(self, config=None): super(self, Lord).__init__() self.logger = get_logger(self) self.config = config self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.latent_model = None self.amortized_model = None
def __init__(self, config): self.config = config["BigGANData"] self.logger = get_logger(self.__class__.__name__) self.n_samples = self.config["n_train_samples"] self.z_shape = self.config["z_shape"] self.n_classes = self.config["n_classes"] self.truncation_threshold = retrieve(self.config, "truncation", default=0) if self.truncation_threshold > 0: self.logger.info("Applying truncation at level {}".format(self.truncation_threshold))
def __init__(self, config, root, model, *args, **kwargs): super().__init__(config, root, model, *args, **kwargs) assert config[ "model"] == "model.cycle_gan.Cycle_GAN", "This CycleGAN iterator only works with with the Cycle_GAN model." assert config["losses"][ "adversarial_loss"] != "wasserstein", "This CycleGAN does not support an adversarial wasserstein loss" self.logger = get_logger("Iterator") # export to the right gpu if specified in the config self.device = set_gpu(config) self.logger.debug(f"Model will pushed to the device: {self.device}") # get the config and the logger self.config = config set_random_state(self.config["random_seed"]) self.batch_size = config['batch_size'] # Config will be tested inside the Model class even for the iterator # Log the architecture of the model self.logger.debug(f"{model}") self.model = model.to(self.device) # load pretrained models if specified in the config self.model, log_string = load_pretrained_vaes(config=self.config, model=self.model) self.logger.debug(log_string) self.optimizer_G = torch.optim.Adam( itertools.chain(self.model.netG_A.parameters(), self.model.netG_B.parameters()), lr=self.config["learning_rate"]) # betas=(opt.beta1, 0.999)) D_lr_factor = self.config["optimization"][ "D_lr_factor"] if "D_lr_factor" in config["optimization"] else 1 self.optimizer_D_A = torch.optim.Adam( self.model.netD_A.parameters(), lr=D_lr_factor * self.config["learning_rate"]) # betas=(opt.beta1, 0.999)) self.optimizer_D_B = torch.optim.Adam( self.model.netD_B.parameters(), lr=D_lr_factor * self.config["learning_rate"]) # betas=(opt.beta1, 0.999)) self.add_latent_layer = bool( 'num_latent_layer' in self.config['variational'] and self.config['variational']['num_latent_layer'] > 0) self.only_latent_layer = bool( 'only_latent_layer' in self.config['optimization'] and self.config['optimization']['only_latent_layer']) if self.only_latent_layer: self.optimizer_Lin = torch.optim.Adam( itertools.chain(self.model.netG_A.latent_layer.parameters(), self.model.netG_B.latent_layer.parameters()), lr=self.config["learning_rate"]) self.logger.debug( "Only latent layers are optimized\nNumber of latent layers: {}" .format(self.config['variational']['num_latent_layer'])) self.real_labels = torch.ones(self.batch_size, device=self.device) self.fake_labels = torch.zeros(self.batch_size, device=self.device)
def __init__(self, min_channels, max_channels, out_channels, style_dim, num_res_blocks=9, block_activation=nn.ReLU(), final_activation=nn.Tanh(), batch_norm=False, drop_rate=None, bias=True): """This is the constructor for a custom decoder. Args: min_channels (int): Channel dimension before the last convolution is applied. max_channels (int): Channel dimension after the first convolution is applied. The channel dimension is cut in half after every convolutional block. out_channels (int): Channel dimension of the output image. style_dim (int): Dimension of the style vector. num_res_blocks (int): Number of residual blocks. block_activation (torch.nn module, optional): Activation function used in the convolutional blocks. Defaults to nn.ReLU(). final_activation (torch.nn module, optional): Activation function used in the last convolution for the output image. Defaults to nn.Tanh(). batch_norm (bool, optional): Normalize over the batch size. Defaults to False. drop_rate (float, optional): Dropout rate for the convolutions. Defaults to None, corresponding to no dropout. bias (bool, optional): If the convolutions use a bias. Defaults to True. """ super(Decoder, self).__init__() self.logger = get_logger("Decoder") # create a list with all channel dimensions throughout the decoder. res_layers = [] self.res1 = StyleResidualBlock(max_channels, style_dim) self.res2 = StyleResidualBlock(max_channels, style_dim) self.res3 = StyleResidualBlock(max_channels, style_dim) self.res4 = StyleResidualBlock(max_channels, style_dim) self.res5 = StyleResidualBlock(max_channels, style_dim) self.res6 = StyleResidualBlock(max_channels, style_dim) self.res_layers = nn.Sequential(*res_layers) self.logger.debug("Added {} residual blocks.".format(num_res_blocks)) conv_layers = [] channel_numbers = list(2 ** np.arange(np.log2(min_channels), np.log2(max_channels+1)).astype(np.int)[::-1]) + [out_channels] stride = 2 padding = 1 # get all convolutional blocks with corresponding parameters for i in range(len(channel_numbers)-2): in_ch = channel_numbers[i] out_ch = channel_numbers[i+1] conv_layers.append(Transpose2dBlock(in_ch, out_ch, block_activation, batch_norm, drop_rate, bias, stride=stride, padding=padding)) conv_layers.append(nn.InstanceNorm2d(out_ch)) # save all blocks to the class instance conv_layers.append(Conv2dBlock(min_channels, out_channels, final_activation, stride=1)) self.conv_layers = nn.Sequential(*conv_layers) self.logger.debug("Decoder channel sizes: {}".format(channel_numbers))
def __init__(self, out_channels, out_size, min_channels, max_channels, num_classes, lin_layer_size=128, block_activation=nn.LeakyReLU(), batch_norm=False, drop_rate=None, bias=True): """This is the constructor for the discriminator of the style transfer model Args: out_channels (int): Channel dimension of the output image. out_size (int): Size of the output image. min_channels (int): Channel dimension before the last convolution is applied. max_channels (int): Channel dimension after the first convolution is applied. The channel dimension is cut in half after every convolutional block. num_classes(): Number of classes lin_layer_size (int): Size of last linear layer in the Discriminator block_activation (torch.nn module, optional): Activation function of the convolution. Defaults to nn.LeakyReLU(). batch_norm (bool, optional): Normalize over the batch size. Defaults to False. drop_rate (float, optional): Dropout rate for the convolutions. Defaults to None. bias (bool, optional): If the convolutions use a bias. Defaults to True. """ super(Discriminator, self).__init__() self.logger = get_logger("Discrimnator") conv_layers = [] channel_numbers = [out_channels] + list(2 ** np.arange(np.log2(min_channels), np.log2(max_channels)+1).astype(np.int)) linear_nodes = int((out_size/2)**2 * min_channels * (1/2)**((len(channel_numbers)-2))) for i in range(len(channel_numbers)-1): in_ch = channel_numbers[i] out_ch = channel_numbers[i+1] # add convolution conv_layers.append(Conv2dBlock(in_ch, out_ch, block_activation, batch_norm, drop_rate, bias)) conv_layers.append(nn.Flatten()) self.conv = nn.Sequential(*conv_layers) lin_layers = [] linear_nodes = linear_nodes + num_classes if lin_layer_size > 0: lin_layers.append(nn.Linear(linear_nodes, lin_layer_size)) lin_layers.append(block_activation) linear_nodes = lin_layer_size lin_layers.append(nn.Linear(linear_nodes, 1)) lin_layers.append(nn.Sigmoid()) self.lin = nn.Sequential(*lin_layers) # save all blocks to the class instance self.logger.debug("Discriminator channel sizes: {}".format(channel_numbers)) self.logger.debug("Linear layer size: {}".format(linear_nodes + num_classes))
def triplet_mse_error(root, data_in, data_out, config): per_example_mse = np.mean(np.square(data_out.labels["x_target"] - data_out.labels["x_out"]), axis=(1, 2, 3)) fpath = os.path.join(root, "per_example_mse.p") with open(fpath, "wb") as f: pickle.dump(per_example_mse, f) logger = edflow.get_logger("triplet_mse_error") logger.info(fpath) mse = np.mean(per_example_mse) std = np.std(per_example_mse) logger.info("mse: {:.4} +- {:.2}".format(mse, std)) return {"scalars": {"mse": mse}}
def __init__(self, config): super(VAE_GAN, self).__init__() self.config = config if "debug_log_level" in config and config["debug_log_level"]: LogSingleton.set_log_level("debug") self.logger = get_logger("VAE_GAN") assert bool("sketch" in self.config["model_type"]) != bool("face" in self.config["model_type"]), "The model_type for this VAE GAN model can only be 'sketch' or 'face' but not 'sketch2face'." assert config["iterator"] == "iterator.vae_gan.VAE_GAN", "This model supports only the VAE_GAN iterator." set_random_state(self.config) self.sigma = self.config["variational"]["sigma"] if "variational" in self.config and "sigma" in self.config["variational"] else False sketch = True if "sketch" in self.config["model_type"] else False self.netG = VAE_config(self.config) self.netD = Discriminator_sketch() if sketch else Discriminator_face()
def __init__(self, config): super().__init__() possible_resnets = { 'resnet18': models.resnet18, 'resnet34': models.resnet34, 'resnet50': models.resnet50, 'resnet101': models.resnet101, 'resnet50stylized': models.resnet50, } from torch.utils import model_zoo self.logger = get_logger(self.__class__.__name__) self.n_out = retrieve(config, "Model/n_classes") self.type = retrieve(config, "Model/type", default='resnet50') custom_head = retrieve(config, "Model/custom_head", default=True) self.model = possible_resnets[self.type](pretrained=retrieve( config, "Model/imagenet_pretrained", default=True)) if custom_head: self.model.fc = nn.Linear(self.model.fc.in_features, self.n_out) if self.type in ["resnet50stylized"]: self.logger.info( "Loading pretrained Resnet-50 trained on stylized ImageNet") which_stylized = retrieve(config, "Model/whichstyle", default="resnet50_trained_on_SIN") self.logger.info("Loading {} from url {}".format( which_stylized, STYLE_MODEL_URLS[which_stylized])) assert not custom_head url = STYLE_MODEL_URLS[which_stylized] state = model_zoo.load_url(url) # remove the .module in keys of state dict (from DataParallel) state_unboxed = dict() for k in tqdm(state["state_dict"].keys(), desc="StateDict"): state_unboxed[k[7:]] = state["state_dict"][k] self.model.load_state_dict(state_unboxed) self.logger.info( "Loaded resnet50 trained on stylized ImageNet, version {}". format(which_stylized)) normalize = torchvision.transforms.Normalize(mean=self.mean, std=self.std) self.image_transform = torchvision.transforms.Compose([ torchvision.transforms.Lambda(lambda image: F.interpolate( image, size=(224, 224), mode="bilinear")), torchvision.transforms.Lambda(lambda image: torch.stack( [normalize(rescale(x)) for x in image])) ])
def __init__(self, config, img_shape, code_dim): block_type, layers, channels, name = resnet_spec[int( config.get("resnet_type", "50"))] self.logger = get_logger(self) self.backbone = ResNetBackbone(block_type, layers) # resnet 18 / 34 need different input resnet 50/101/152 : 2048 if config["resnet_type"] <= 38: self.backbone.layer4.add_module( "fc", nn.Sequential(Flatten(), nn.Linear(512 * 4 * 4, code_dim))) else: self.backbone.layer4.add_module( "fc", nn.Sequential(Flatten(), nn.Linear(2048 * 4 * 4, code_dim)))
def __init__(self, config): super(VAE_WGAN, self).__init__() if "debug_log_level" in config and config["debug_log_level"]: LogSingleton.set_log_level("debug") # get logger and config self.logger = get_logger("CycleWGAN") self.config = config set_random_state(self.config) assert bool("sketch" in self.config["model_type"]) != bool("face" in self.config["model_type"]), "The model_type for this VAE GAN model can only be 'sketch' or 'face' but not 'sketch2face'." assert config["iterator"] == "iterator.vae_wgan.VAE_WGAN", "This model supports only the VAE_WGAN iterator." self.logger.info("VAE WGAN init model.") self.sigma = self.config['variational']['sigma'] if "variational" in self.config and "sigma" in self.config["variational"] else False self.netG = VAE_config(self.config) self.netD = WGAN_Discriminator_sketch() if "sketch" in self.config["model_type"] else WGAN_Discriminator_face(input_resolution=config["data"]["transform"]["resolution"])
def __init__(self, config, root, model, *args, **kwargs): super().__init__(config, root, model, *args, **kwargs) self.logger = get_logger("Iterator") # export to the right gpu if specified in the config self.device = set_gpu(config) self.logger.debug(f"Model will pushed to the device: {self.device}") # get the config and the logger self.config = config set_random_state(random_seed=self.config["random_seed"]) # Config will be tested inside the Model class even for the iterator # Log the architecture of the model self.logger.debug(f"{model}") self.vae = model b1, b2 = 0.5, 0.999 self.optimizer = torch.optim.Adam(self.vae.parameters(), lr=self.config["learning_rate"], betas=(b1, b2)) self.vae.to(self.device)
def __init__(self, config, train=False): """Initialize the dataset to load training or validation images according to the config.yaml file. :param DatasetMixin: This class inherits from this class to enable a good workflow through the framework edflow. :param config: This config is loaded from the config.yaml file which specifies all neccesary hyperparameter for to desired operation which will be executed by the edflow framework. """ # Create Logging for the Dataset if "debug_log_level" in config and config["debug_log_level"]: LogSingleton.set_log_level("debug") self.logger = get_logger("Dataset") self.config = config self.style_root = self.config["data"]["style_path"] self.content_root = self.config["data"]["content_path"] self.set_image_transform() self.set_random_state() self.indices = self.load_content_indices(train) self.art_list = self.load_art_list()
def __init__(self, min_channels, max_channels, in_channels, out_channels, in_size, num_classes, style_dim, num_res_blocks=9, lin_layer_size=0, block_activation=nn.ReLU(), final_activation=nn.Tanh(), batch_norm=False, drop_rate=None, bias=True): """This is the constructor for the full style transfer model with costum style and content encoder and decoder. Args: min_channels (int): Channel dimension after the first convolution is applied. max_channels (int): Channel dimension is double after every convolutional block up to the value 'max_channels'. in_channels (int): Channel dimension of the input image. out_channels (int): Channel dimension of the output image. style_dim (int): Dimension of the outcoming style vector. num_res_blocks (int): Number of residual blocks in the Decoder. Default to 9 lin_layer_size (int): Size of last linear layer in the Discriminator. Default is set to 0 block_activation (torch.nn module, optional): Activation function used in the convolutional blocks. Defaults to nn.ReLU(). final_activation (torch.nn module, optional): Activation function used in the last convolution for the output image. Defaults to nn.Tanh(). batch_norm (bool, optional): Normalize over the batch size. Defaults to False. drop_rate (float, optional): Dropout rate for the convolutions. Defaults to None, corresponding to no dropout. bias (bool, optional): If the convolutions use a bias. Defaults to True. """ super(Style_Transfer_Model, self).__init__() self.logger = get_logger("Style_Transfer_Model") self.c_enc = Content_Encoder(min_channels, max_channels, in_channels, block_activation, batch_norm, drop_rate, bias) self.s_enc = Style_Encoder(min_channels, max_channels, in_channels, style_dim, block_activation, batch_norm, drop_rate, bias) self.dec = Decoder(min_channels, max_channels, out_channels, style_dim, num_res_blocks, block_activation, final_activation, batch_norm, drop_rate, bias) self.disc = Discriminator(out_channels, in_size, min_channels, max_channels, num_classes, lin_layer_size, nn.LeakyReLU(0.2), batch_norm, drop_rate, bias) self.logger.info("Initialized.")
def __init__(self, config): super(DCGAN, self).__init__() if "debug_log_level" in config and config["debug_log_level"]: LogSingleton.set_log_level("debug") self.config = config self.logger = get_logger("DCGAN") self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") assert bool("sketch" in self.config["model_type"]) != bool( "face" in self.config["model_type"] ), "The model_type for this DCGAN model can only be 'sketch' or 'face' but not 'sketch2face'." self.sketch = True if "sketch" in self.config["model_type"] else False self.wasserstein = bool( self.config["losses"]['adversarial_loss'] == 'wasserstein') latent_dim = self.config['latent_dim'] min_channels = self.config['conv']['n_channel_start'] max_channels = self.config['conv']['n_channel_max'] sketch_shape = [32, 1] face_shape = [self.config['data']['transform']['resolution'], 3] num_extra_conv_sketch = self.config['conv']['sketch_extra_conv'] num_extra_conv_face = self.config['conv']['face_extra_conv'] block_activation = nn.ReLU() final_activation = nn.Tanh() batch_norm_dec = self.config['batch_norm'] drop_rate_dec = self.config['dropout']['dec_rate'] drop_rate_disc = self.config['dropout']['disc_rate'] bias_dec = self.config['bias']['dec'] shapes = sketch_shape if "sketch" in self.config[ "model_type"] else face_shape num_extra_conv = num_extra_conv_sketch if "sketch" in self.config[ "model_type"] else num_extra_conv_face self.netG = VAE_Decoder(latent_dim, min_channels, max_channels, *shapes, num_extra_conv, block_activation, final_activation, batch_norm_dec, drop_rate_dec, bias_dec) self.netD = Discriminator_sketch( droprate=drop_rate_disc, wasserstein=self.wasserstein ) if self.sketch else Discriminator_face(droprate=drop_rate_disc, wasserstein=self.wasserstein)
def __init__(self, config): self.prng = np.random.RandomState(1) self.config = config["BigGANData"] self.logger = get_logger(self.__class__.__name__) self.n_samples = self.config["n_test_samples"] self.z_shape = self.config["z_shape"] self.n_classes = self.config["n_classes"] self.truncation_threshold = retrieve(self.config, "truncation", default=0) self.zs = self.prng.randn(self.n_samples, *self.z_shape) if self.truncation_threshold > 0: self.logger.info("Applying truncation at level {}".format(self.truncation_threshold)) ix = 0 for z in tqdm(self.zs, desc="Truncation:"): for k, zi in enumerate(z): while abs(zi) > self.truncation_threshold: zi = self.prng.randn(1) z[k] = zi self.zs[ix] = z ix += 1 self.logger.info("Created truncated test data.") self.clss = self.prng.randint(self.n_classes, size=(self.n_samples,))
def __init__(self, config, mode="all"): assert mode in ["train", "validation", "all" ], f"Should be train, validation or all, got {mode}" self.config = config self.sequence_length = 2 # if config.get("sequence_length", False) == False else config["sequence_length"] # self.sc = Animal_Sequence(config) self.sc = MPII_Sequence(config) # works if dataroot like "VOC2011/cats_meta" # TODO PROBABLY NOT CORRECT HERE self.animal = config["dataroot"].split("/")[1].split("_")[0] self.train = int(config["train_size"] * len(self.sc)) self.test = 1 - self.train self.sigma = config["sigma"] self.augmentation = config["augmentation"] self.logger = get_logger(self) self.resize = iaa.Resize(self.config["resize_to"]) self.aug_factor = 0.5 self.seq = iaa.Sequential([ iaa.Sometimes(self.aug_factor + 0.2, iaa.Fliplr()), iaa.Sometimes(self.aug_factor, iaa.Flipud()), ], random_order=True) if mode != "all": # split_indices = np.arange(self.train) if mode == "train" else np.arange(self.train + 1, len(self.sc)) dset_indices = np.arange(len(self.sc)) train_indices, test_indices = sklearn.model_selection.train_test_split( dset_indices, train_size=float(config["train_size"]), random_state=int(config["random_state"])) if mode == "train": self.data = SubDataset(self.sc, train_indices) else: self.data = SubDataset(self.sc, test_indices) else: self.data = self.sc
def dim_callback(root, data_in, data_out, config): logger = edflow.get_logger("dim_callback") factors = data_out.labels["factor"] za = data_out.labels["example1"].squeeze() zb = data_out.labels["example2"].squeeze() za_by_factor = dict() zb_by_factor = dict() mean_by_factor = dict() score_by_factor = dict() zall = np.concatenate([za,zb], 0) mean = np.mean(zall, 0, keepdims=True) var = np.sum(np.mean((zall-mean)*(zall-mean), 0)) for f in range(data_in.n_factors): if f != data_in.residual_index: indices = np.where(factors==f)[0] za_by_factor[f] = za[indices] zb_by_factor[f] = zb[indices] mean_by_factor[f] = 0.5*( np.mean(za_by_factor[f], 0, keepdims=True)+ np.mean(zb_by_factor[f], 0, keepdims=True)) score_by_factor[f] = np.sum( np.mean( (za_by_factor[f]-mean_by_factor[f])*(zb_by_factor[f]-mean_by_factor[f]), 0)) score_by_factor[f] = score_by_factor[f]/var else: score_by_factor[f] = 1.0 scores = np.array([score_by_factor[f] for f in range(data_in.n_factors)]) m = np.max(scores) e = np.exp(scores-m) softmaxed = e / np.sum(e) dim = za.shape[1] dims = [int(s*dim) for s in softmaxed] dims[-1] = dim - sum(dims[:-1]) logger.info("estimated factor dimensionalities: {}".format(dims))