Python LossBuilder Exemples, loss.LossBuilder Python Exemples

Exemple #1

0

Afficher le fichier

    def forward(self, ref_im, seed, loss_str, eps, noise_type,
                num_trainable_noise_layers, tile_latent, bad_noise_layers,
                opt_name, learning_rate, steps, lr_schedule, save_intermediate,
                **kwargs):

        if seed:
            torch.manual_seed(seed)
            torch.cuda.manual_seed(seed)
            torch.backends.cudnn.deterministic = True

        batch_size = ref_im.shape[0]

        # Generate latent tensor
        if (tile_latent):
            latent = torch.randn((batch_size, 1, 512),
                                 dtype=torch.float,
                                 requires_grad=True,
                                 device='cuda')
        else:
            latent = torch.randn((batch_size, 18, 512),
                                 dtype=torch.float,
                                 requires_grad=True,
                                 device='cuda')

        # Generate list of noise tensors
        noise = []  # stores all of the noise tensors
        noise_vars = []  # stores the noise tensors that we want to optimize on

        for i in range(18):
            # dimension of the ith noise tensor
            res = (batch_size, 1, 2**(i // 2 + 2), 2**(i // 2 + 2))

            if (noise_type == 'zero' or i
                    in [int(layer) for layer in bad_noise_layers.split('.')]):
                new_noise = torch.zeros(res, dtype=torch.float, device='cuda')
                new_noise.requires_grad = False
            elif (noise_type == 'fixed'):
                new_noise = torch.randn(res, dtype=torch.float, device='cuda')
                new_noise.requires_grad = False
            elif (noise_type == 'trainable'):
                new_noise = torch.randn(res, dtype=torch.float, device='cuda')
                if (i < num_trainable_noise_layers):
                    new_noise.requires_grad = True
                    noise_vars.append(new_noise)
                else:
                    new_noise.requires_grad = False
            else:
                raise Exception("unknown noise type")

            noise.append(new_noise)

        var_list = [latent] + noise_vars

        opt_dict = {
            'sgd': torch.optim.SGD,
            'adam': torch.optim.Adam,
            'sgdm': partial(torch.optim.SGD, momentum=0.9),
            'adamax': torch.optim.Adamax
        }
        opt_func = opt_dict[opt_name]
        opt = SphericalOptimizer(opt_func, var_list, lr=learning_rate)

        schedule_dict = {
            'fixed':
            lambda x: 1,
            'linear1cycle':
            lambda x: (9 * (1 - np.abs(x / steps - 1 / 2) * 2) + 1) / 10,
            'linear1cycledrop':
            lambda x: (9 * (1 - np.abs(x /
                                       (0.9 * steps) - 1 / 2) * 2) + 1) / 10
            if x < 0.9 * steps else 1 / 10 + (x - 0.9 * steps) /
            (0.1 * steps) * (1 / 1000 - 1 / 10),
        }
        schedule_func = schedule_dict[lr_schedule]
        scheduler = torch.optim.lr_scheduler.LambdaLR(opt.opt, schedule_func)

        loss_builder = LossBuilder(ref_im, loss_str, eps).cuda()

        min_loss = np.inf
        min_l2 = np.inf
        best_summary = ""
        start_t = time.time()
        gen_im = None

        if self.verbose: print("Optimizing")
        for j in range(steps):
            opt.opt.zero_grad()

            # Duplicate latent in case tile_latent = True
            if (tile_latent):
                latent_in = latent.expand(-1, 18, -1)
            else:
                latent_in = latent

            # Apply learned linear mapping to match latent distribution to that of the mapping network
            latent_in = self.lrelu(latent_in * self.gaussian_fit["std"] +
                                   self.gaussian_fit["mean"])

            # Normalize image to [0,1] instead of [-1,1]
            gen_im = (self.synthesis(latent_in, noise) + 1) / 2

            # Calculate Losses
            loss, loss_dict = loss_builder(latent_in, gen_im)
            loss_dict['TOTAL'] = loss

            # Save best summary for log
            if (loss < min_loss):
                min_loss = loss
                best_summary = f'BEST ({j+1}) | ' + ' | '.join(
                    [f'{x}: {y:.4f}' for x, y in loss_dict.items()])
                best_im = gen_im.clone()

            loss_l2 = loss_dict['L2']

            if (loss_l2 < min_l2):
                min_l2 = loss_l2

            # Save intermediate HR and LR images
            if (save_intermediate):
                yield (best_im.cpu().detach().clamp(0, 1),
                       loss_builder.D(best_im).cpu().detach().clamp(0, 1))

            loss.backward()
            opt.step()
            scheduler.step()

        total_t = time.time() - start_t
        current_info = f' | time: {total_t:.1f} | it/s: {(j+1)/total_t:.2f} | batchsize: {batch_size}'
        if self.verbose: print(best_summary + current_info)
        if (min_l2 <= eps):
            yield (gen_im.clone().cpu().detach().clamp(0, 1),
                   loss_builder.D(best_im).cpu().detach().clamp(0, 1))
        else:
            print(
                "Could not find a face that downscales correctly within epsilon"
            )

Exemple #2

0

Afficher le fichier

Fichier : PULSE.py Projet : diogo149/iterated_pulse

    def forward(
        self,
        ref_im,
        loss_str,
        eps,
        noise_type,
        num_trainable_noise_layers,
        tile_latent,
        bad_noise_layers,
        opt_name,
        learning_rate,
        steps,
        lr_schedule,
        save_intermediate,
        seed=0,
        var_list_initial_values=None,
        step_postprocess=None,
        psi=1.0,
        **kwargs,
    ):

        if seed:
            torch.manual_seed(seed)
            torch.cuda.manual_seed(seed)
            torch.backends.cudnn.deterministic = True

        batch_size = ref_im.shape[0]

        # Generate latent tensor
        if tile_latent:
            latent = torch.randn(
                (batch_size, 1, 512),
                dtype=torch.float,
                requires_grad=True,
                device="cuda",
            )
        else:
            latent = torch.randn(
                (batch_size, 18, 512),
                dtype=torch.float,
                requires_grad=True,
                device="cuda",
            )
        with torch.no_grad():
            latent *= psi

        # Generate list of noise tensors
        noise = []  # stores all of the noise tensors
        noise_vars = []  # stores the noise tensors that we want to optimize on

        for i in range(18):
            # dimension of the ith noise tensor
            res = (batch_size, 1, 2**(i // 2 + 2), 2**(i // 2 + 2))

            if noise_type == "zero" or i in [
                    int(layer) for layer in bad_noise_layers.split(".")
            ]:
                new_noise = torch.zeros(res, dtype=torch.float, device="cuda")
                new_noise.requires_grad = False
            elif noise_type == "fixed":
                new_noise = torch.randn(res, dtype=torch.float, device="cuda")
                new_noise.requires_grad = False
            elif noise_type == "trainable":
                new_noise = torch.randn(res, dtype=torch.float, device="cuda")
                if i < num_trainable_noise_layers:
                    new_noise.requires_grad = True
                    noise_vars.append(new_noise)
                else:
                    new_noise.requires_grad = False
            else:
                raise Exception("unknown noise type")

            noise.append(new_noise)

        var_list = [latent] + noise_vars

        if var_list_initial_values is not None:
            assert len(var_list) == len(var_list_initial_values)
            with torch.no_grad():
                for var, initial_value in zip(var_list,
                                              var_list_initial_values):
                    var.copy_(initial_value)

        opt_dict = {
            "sgd": torch.optim.SGD,
            "adam": torch.optim.Adam,
            "sgdm": partial(torch.optim.SGD, momentum=0.9),
            "adamax": torch.optim.Adamax,
            "custom": partial(torch.optim.AdamW, betas=(0.9, 0.99)),
        }
        opt_func = opt_dict[opt_name]
        if step_postprocess is None:
            opt = SphericalOptimizer(opt_func, var_list, lr=learning_rate)
        else:
            opt = StepPostProcessOptimizer(opt_func,
                                           var_list,
                                           step_postprocess=step_postprocess,
                                           lr=learning_rate)

        schedule_dict = {
            "fixed":
            lambda x: 1,
            "linear1cycle":
            lambda x: (9 * (1 - np.abs(x / steps - 1 / 2) * 2) + 1) / 10,
            "linear1cycledrop":
            lambda x: (9 * (1 - np.abs(x /
                                       (0.9 * steps) - 1 / 2) * 2) + 1) / 10
            if x < 0.9 * steps else 1 / 10 + (x - 0.9 * steps) /
            (0.1 * steps) * (1 / 1000 - 1 / 10),
        }
        schedule_func = schedule_dict[lr_schedule]
        scheduler = torch.optim.lr_scheduler.LambdaLR(opt.opt, schedule_func)

        loss_builder = LossBuilder(ref_im, loss_str, eps).cuda()

        min_loss = np.inf
        min_l2 = np.inf
        best_summary = ""
        start_t = time.time()
        gen_im = None

        if self.verbose:
            print("Optimizing")
        for j in range(steps):
            opt.opt.zero_grad()

            # Duplicate latent in case tile_latent = True
            if tile_latent:
                latent_in = latent.expand(-1, 18, -1)
            else:
                latent_in = latent

            # Apply learned linear mapping to match latent distribution to that of the mapping network
            latent_in = self.lrelu(latent_in * self.gaussian_fit["std"] +
                                   self.gaussian_fit["mean"])

            # Normalize image to [0,1] instead of [-1,1]
            gen_im = (self.synthesis(latent_in, noise) + 1) / 2

            # Calculate Losses
            loss, loss_dict = loss_builder(latent_in, gen_im)
            loss_dict["TOTAL"] = loss

            # Save best summary for log
            if loss < min_loss:
                min_loss = loss
                best_summary = f"BEST ({j+1}) | " + " | ".join(
                    [f"{x}: {y:.4f}" for x, y in loss_dict.items()])
                best_im = gen_im.clone()

            loss_l2 = loss_dict["L2"]

            if loss_l2 < min_l2:
                min_l2 = loss_l2

            # Save intermediate HR and LR images
            if save_intermediate:
                yield dict(
                    final=False,
                    min_l2=min_l2,
                    HR=best_im.cpu().detach().clamp(0, 1),
                    LR=loss_builder.D(best_im).cpu().detach().clamp(0, 1),
                )

            loss.backward()
            opt.step()
            scheduler.step()

        total_t = time.time() - start_t
        current_info = f" | time: {total_t:.1f} | it/s: {(j+1)/total_t:.2f} | batchsize: {batch_size}"
        if self.verbose:
            print(best_summary + current_info)
        yield dict(
            final=True,
            min_l2=min_l2,
            success=min_l2 <= eps,
            HR=gen_im.clone().cpu().detach().clamp(0, 1),
            LR=loss_builder.D(best_im).cpu().detach().clamp(0, 1),
            var_list=var_list,
            loss_dict=loss_dict,
        )

Exemple #3

0

Afficher le fichier

    def forward(self, ref_im, loss_str, eps, tile_latent, opt_name, steps,
                learning_rate, lr_schedule, save_intermediate):

        gaussian_fit = self.Gaussian_fit()

        batch_size = ref_im.shape[0]

        if (tile_latent):
            latent = torch.randn((batch_size, 1, 512),
                                 dtype=torch.float,
                                 requires_grad=True,
                                 device='cuda')
        else:
            latent = torch.randn((batch_size, 18, 512),
                                 dtype=torch.float,
                                 requires_grad=True,
                                 device='cuda')

        var_list = [latent]

        opt_dict = {
            'sgd': torch.optim.SGD,
            'adam': torch.optim.Adam,
            'sgdm': partial(torch.optim.SGD, momentum=0.9),
            'adamax': torch.optim.Adamax
        }
        opt_func = opt_dict[opt_name]
        opt = SphericalOptimizer(opt_func, var_list, lr=learning_rate)

        schedule_dict = {
            'fixed':
            lambda x: 1,
            'linear1cycle':
            lambda x: (9 * (1 - np.abs(x / steps - 1 / 2) * 2) + 1) / 10,
            'linear1cycledrop':
            lambda x: (9 * (1 - np.abs(x /
                                       (0.9 * steps) - 1 / 2) * 2) + 1) / 10
            if x < 0.9 * steps else 1 / 10 + (x - 0.9 * steps) /
            (0.1 * steps) * (1 / 1000 - 1 / 10),
        }
        schedule_func = schedule_dict[lr_schedule]
        scheduler = torch.optim.lr_scheduler.LambdaLR(opt.opt, schedule_func)

        ref_im = ref_im.cuda()

        loss_builder = LossBuilder(ref_im, loss_str, eps).cuda()

        min_loss = np.inf
        min_l2 = np.inf
        best_summary = ""
        start_t = time.time()
        gen_im = None

        print("Optimizing")

        for j in range(steps):
            opt.opt.zero_grad()

            # Duplicate latent in case tile_latent = True
            if (tile_latent):
                latent_in = latent.expand(-1, 18, -1)
            else:
                latent_in = latent

            # Apply learned linear mapping to match latent distribution to that of the mapping network
            latent_in = self.lrelu(latent_in * gaussian_fit["std"] +
                                   gaussian_fit["mean"])

            # Normalize image to [0,1] instead of [-1,1]
            gen_im = (self.synthesizer(latent_in) + 1) / 2

            # Calculate Losses
            loss, loss_dict = loss_builder(latent_in, gen_im)
            loss_dict['TOTAL'] = loss

            # Save best summary for log
            if (loss < min_loss):
                min_loss = loss
                best_summary = f'BEST ({j+1}) | ' + ' | '.join(
                    [f'{x}: {y:.4f}' for x, y in loss_dict.items()])
                best_im = gen_im.clone()

            loss_l2 = loss_dict['L2']

            if (loss_l2 < min_l2):
                min_l2 = loss_l2

            # Save intermediate HR and LR images
            if (save_intermediate):
                yield (best_im.cpu().detach().clamp(0, 1),
                       loss_builder.D(best_im).cpu().detach().clamp(0, 1))

            loss.backward()
            opt.step()
            scheduler.step()

        total_t = time.time() - start_t
        current_info = f' | time: {total_t:.1f} | it/s: {(j+1)/total_t:.2f} | batchsize: {batch_size}'
        print(best_summary + current_info)
        if (min_l2 <= eps):
            yield (gen_im.clone().cpu().detach().clamp(0, 1),
                   loss_builder.D(best_im).cpu().detach().clamp(0, 1))
        else:
            print(
                "Could not find a face that downscales correctly within epsilon"
            )

Exemple #4

0

Afficher le fichier

Fichier : project.py Projet : xjw00654/PULSE-stylegan2_pytorch

class Projector(nn.Module):
    """
    Projects data to latent space and noise tensors.
    Arguments:
        G (Generator)
        dlatent_avg_samples (int): Number of dlatent samples
            to collect to find the mean and std.
            Default value is 10 000.
        dlatent_avg_label (int, torch.Tensor, optional): The label to
            use when gathering dlatent statistics.
        dlatent_device (int, str, torch.device, optional): Device to use
            for gathering statistics of dlatents. By default uses
            the same device as parameters of `G` reside on.
        dlatent_batch_size (int): The batch size to sample
            dlatents with. Default value is 1024.
        lpips_model (nn.Module): A model that returns feature the distance
            between two inputs. Default value is the LPIPS VGG16 model.
        lpips_size (int, optional): Resize any data fed to `lpips_model` by scaling
            the data so that its smallest side is the same size as this
            argument. Only has a default value of 256 if `lpips_model` is unspecified.
        verbose (bool): Write progress of dlatent statistics gathering to stdout.
            Default value is True.
    """
    def __init__(self,
                 G,
                 dlatent_avg_samples=10000,
                 dlatent_avg_label=None,
                 dlatent_device=None,
                 dlatent_batch_size=1024,
                 lpips_model=None,
                 lpips_size=None,
                 verbose=True):
        super(Projector, self).__init__()
        assert isinstance(G, models.Generator)
        G.eval().requires_grad_(False)

        self.G_synthesis = G.G_synthesis

        G_mapping = G.G_mapping

        dlatent_batch_size = min(dlatent_batch_size, dlatent_avg_samples)

        if dlatent_device is None:
            dlatent_device = next(G_mapping.parameters()).device()
        else:
            dlatent_device = torch.device(dlatent_device)

        G_mapping.to(dlatent_device)

        latents = torch.empty(
            dlatent_avg_samples, G_mapping.latent_size).normal_()
        dlatents = []

        labels = None
        if dlatent_avg_label is not None:
            labels = torch.tensor(dlatent_avg_label).to(dlatent_device).long().view(-1).repeat(dlatent_batch_size)

        if verbose:
            progress = utils.ProgressWriter(np.ceil(dlatent_avg_samples / dlatent_batch_size))
            progress.write('Gathering dlatents...', step=False)

        for i in range(0, dlatent_avg_samples, dlatent_batch_size):
            batch_latents = latents[i: i + dlatent_batch_size].to(dlatent_device)
            batch_labels = None
            if labels is not None:
                batch_labels = labels[:len(batch_latents)]
            with torch.no_grad():
                dlatents.append(G_mapping(batch_latents, labels=batch_labels).cpu())
            if verbose:
                progress.step()

        if verbose:
            progress.write('Done!', step=False)
            progress.close()

        dlatents = torch.cat(dlatents, dim=0)

        self.register_buffer(
            '_dlatent_avg',
            dlatents.mean(dim=0).view(1, 1, -1)
        )
        self.register_buffer(
            '_dlatent_std',
            torch.sqrt(
                torch.sum((dlatents - self._dlatent_avg) ** 2) / dlatent_avg_samples + 1e-8
            ).view(1, 1, 1)
        )

        if lpips_model is None:
            warnings.warn(
                'Using default LPIPS distance metric based on VGG 16. ' + \
                'This metric will only work on image data where values are in ' + \
                'the range [-1, 1], please specify an lpips module if you want ' + \
                'to use other kinds of data formats.'
            )
            lpips_model = lpips.LPIPS_VGG16(pixel_min=-1, pixel_max=1)
            lpips_size = 256
        self.lpips_model = lpips_model.eval().requires_grad_(False)
        self.lpips_size = lpips_size

        self.to(dlatent_device)

    def _scale_for_lpips(self, data):
        if not self.lpips_size:
            return data
        scale_factor = self.lpips_size / min(data.size()[2:])
        if scale_factor == 1:
            return data
        mode = 'nearest'
        if scale_factor < 1:
            mode = 'area'
        return F.interpolate(data, scale_factor=scale_factor, mode=mode)

    def _check_job(self):
        assert self._job is not None, 'Call `start()` first to set up target.'
        # device of dlatent param will not change with the rest of the models
        # and buffers of this class as it was never registered as a buffer or
        # parameter. Same goes for optimizer. Make sure it is on the correct device.
        if self._job.dlatent_param.device != self._dlatent_avg.device:
            self._job.dlatent_param = self._job.dlatent_param.to(self._dlatent_avg)
            self._job.opt.load_state_dict(
                utils.move_to_device(self._job.opt.state_dict(), self._dlatent_avg.device)[0])

    def generate(self):
        """
        Generate an output with the current dlatent and noise values.
        Returns:
            output (torch.Tensor)
        """
        self._check_job()
        with torch.no_grad():
            return self.G_synthesis(self._job.dlatent_param)

    def get_dlatent(self):
        """
        Get a copy of the current dlatent values.
        Returns:
            dlatents (torch.Tensor)
        """
        self._check_job()
        return self._job.dlatent_param.data.clone()

    def get_noise(self):
        """
        Get a copy of the current noise values.
        Returns:
            noise_tensors (list)
        """
        self._check_job()
        return [noise.data.clone() for noise in self._job.noise_params]

    def start(self,
              target,
              num_steps=1000,
              initial_learning_rate=0.1,
              initial_noise_factor=0.05,
              lr_rampdown_length=0.25,
              lr_rampup_length=0.05,
              noise_ramp_length=0.75,
              regularize_noise_weight=1e5,
              verbose=True,
              verbose_prefix='',
              noise_layers=5):
        """
        Set up a target and its projection parameters.
        Arguments:
            target (torch.Tensor): The data target. This should
                already be preprocessed (scaled to correct value range).
            num_steps (int): Number of optimization steps. Default
                value is 1000.
            initial_learning_rate (float): Default value is 0.1.
            initial_noise_factor (float): Default value is 0.05.
            lr_rampdown_length (float): Default value is 0.25.
            lr_rampup_length (float): Default value is 0.05.
            noise_ramp_length (float): Default value is 0.75.
            regularize_noise_weight (float): Default value is 1e5.
            verbose (bool): Write progress to stdout every time
                `step()` is called.
            verbose_prefix (str, optional): This is written before
                any other output to stdout.
        """
        if target.dim() == self.G_synthesis.dim + 1:
            target = target.unsqueeze(0)
        assert target.dim() == self.G_synthesis.dim + 2, \
            'Number of dimensions of target data is incorrect.'

        target = target.to(self._dlatent_avg)
        target_scaled = self._scale_for_lpips(target)

        dlatent_param = nn.Parameter(self._dlatent_avg.clone().repeat(target.size(0), len(self.G_synthesis), 1))
        noise_params = self.G_synthesis.static_noise(trainable=True)
        for i_n, n in enumerate(noise_params):
            if i_n > noise_layers:
                # n.grad_fn = None
                n.requires_grad = True
        params = [dlatent_param] + noise_params
        # opt = torch.optim.Adam(params)
        opt = SphericalOptimizer(torch..optim.Adam, params, lr=initial_learning_rate)
        schedule_func = lambda x: (9 * (1 - np.abs(x / (0.9 * num_steps) - 1 / 2) * 2) + 1) / 10 
                if x < 0.9 * num_steps else 1/10 + (x - 0.9 * num_steps) / (0.1 * num_steps) * (1 / 1000 - 1 / 10)

        loss_builder = LossBuilder(target, "100*L2+0.1*GEOCROSS", 1e-3).cuda()
        scheduler = torch.optim.lr_scheduler.LambdaLR(opt.opt, schedule_func)

        noise_tensor = torch.empty_like(dlatent_param)

        if verbose:
            progress = utils.ProgressWriter(num_steps)
            value_tracker = utils.ValueTracker()

        self._job = utils.AttributeDict(**locals())
        self._job.current_step = 0
        self._job.min_loss = np.inf
        self._job.min_l2 = np.inf
        self._job.best_output = None

Exemple #5

0

Afficher le fichier

def main(config, mode, weights):
    # Generate configuration
    cfg = kaptan.Kaptan(handler='yaml')
    config = cfg.import_config(config)

    # Generate logger
    MODEL_SAVE_NAME, MODEL_SAVE_FOLDER, LOGGER_SAVE_NAME, CHECKPOINT_DIRECTORY = utils.generate_save_names(
        config)
    os.makedirs(MODEL_SAVE_FOLDER, exist_ok=True)
    logger = utils.generate_logger(MODEL_SAVE_FOLDER, LOGGER_SAVE_NAME)

    logger.info("*" * 40)
    logger.info("")
    logger.info("")
    logger.info("Using the following configuration:")
    logger.info(config.export("yaml", indent=4))
    logger.info("")
    logger.info("")
    logger.info("*" * 40)

    NORMALIZATION_MEAN, NORMALIZATION_STD, RANDOM_ERASE_VALUE = utils.fix_generator_arguments(
        config)
    TRAINDATA_KWARGS = {
        "rea_value": config.get("TRANSFORMATION.RANDOM_ERASE_VALUE")
    }
    """ MODEL PARAMS """
    from utils import model_weights

    MODEL_WEIGHTS = None
    if config.get("MODEL.MODEL_BASE") in model_weights:
        if mode == "train":
            if os.path.exists(
                    model_weights[config.get("MODEL.MODEL_BASE")][1]):
                pass
            else:
                logger.info(
                    "Model weights file {} does not exist. Downloading.".
                    format(model_weights[config.get("MODEL.MODEL_BASE")][1]))
                utils.web.download(
                    model_weights[config.get("MODEL.MODEL_BASE")][1],
                    model_weights[config.get("MODEL.MODEL_BASE")][0])
            MODEL_WEIGHTS = model_weights[config.get("MODEL.MODEL_BASE")][1]
    else:
        raise NotImplementedError(
            "Model %s is not available. Please choose one of the following: %s"
            % (config.get("MODEL.MODEL_BASE"), str(model_weights.keys())))

    # ------------------ LOAD SAVED LOGGER IF EXISTS ----------------------------
    DRIVE_BACKUP = config.get("SAVE.DRIVE_BACKUP")
    if DRIVE_BACKUP:  # Find backed-up log file,  if it exists, and copy to local
        backup_logger = os.path.join(CHECKPOINT_DIRECTORY, LOGGER_SAVE_NAME)
        logger_save_path = os.path.join(MODEL_SAVE_FOLDER, LOGGER_SAVE_NAME)
        if os.path.exists(backup_logger):
            shutil.copy2(backup_logger, logger_save_path)
    else:
        backup_logger = None

    NUM_GPUS = torch.cuda.device_count()
    if NUM_GPUS > 1:
        raise RuntimeError(
            "Not built for multi-GPU. Please start with single-GPU.")
    logger.info("Found %i GPUs" % NUM_GPUS)

    # --------------------- BUILD GENERATORS ------------------------
    data_crawler = utils.dynamic_import(cfg=config,
                                        module_name="crawlers",
                                        import_name="EXECUTION.CRAWLER")
    data_generator = utils.dynamic_import(cfg=config,
                                          module_name="generators",
                                          import_name="EXECUTION.GENERATOR")

    crawler = data_crawler(data_folder=config.get("DATASET.ROOT_DATA_FOLDER"),
                           train_folder=config.get("DATASET.TRAIN_FOLDER"),
                           test_folder=config.get("DATASET.TEST_FOLDER"),
                           query_folder=config.get("DATASET.QUERY_FOLDER"),
                           **{"logger": logger})

    train_mode = config.get("EXECUTION.TRAIN_MODE", "train")  # or "train-gzsl"
    train_generator = data_generator(
        gpus=NUM_GPUS,
        i_shape=config.get("DATASET.SHAPE"),
        normalization_mean=NORMALIZATION_MEAN,
        normalization_std=NORMALIZATION_STD,
        normalization_scale=1. /
        config.get("TRANSFORMATION.NORMALIZATION_SCALE"),
        h_flip=config.get("TRANSFORMATION.H_FLIP"),
        t_crop=config.get("TRANSFORMATION.T_CROP"),
        rea=config.get("TRANSFORMATION.RANDOM_ERASE"),
        **TRAINDATA_KWARGS)
    train_generator.setup(crawler,
                          mode=train_mode,
                          batch_size=config.get("TRANSFORMATION.BATCH_SIZE"),
                          instance=config.get("TRANSFORMATION.INSTANCES"),
                          workers=config.get("TRANSFORMATION.WORKERS"))

    logger.info("Generated training data generator")
    TRAIN_CLASSES = train_generator.num_entities
    test_mode = config.get("EXECUTION.TEST_MODE", "zsl")  # or "gzsl"
    test_generator = data_generator(
        gpus=NUM_GPUS,
        i_shape=config.get("DATASET.SHAPE"),
        normalization_mean=NORMALIZATION_MEAN,
        normalization_std=NORMALIZATION_STD,
        normalization_scale=1. /
        config.get("TRANSFORMATION.NORMALIZATION_SCALE"),
        h_flip=0,
        t_crop=False,
        rea=False)
    test_generator.setup(crawler,
                         mode=test_mode,
                         batch_size=config.get("TRANSFORMATION.BATCH_SIZE"),
                         instance=config.get("TRANSFORMATION.INSTANCES"),
                         workers=config.get("TRANSFORMATION.WORKERS"))
    TEST_CLASSES = test_generator.num_entities
    logger.info("Generated validation data/query generator")

    # --------------------- INSTANTIATE MODEL ------------------------
    model_builder = __import__("models", fromlist=["*"])
    model_builder = getattr(model_builder,
                            config.get("EXECUTION.MODEL_BUILDER"))
    logger.info("Loaded {} from {} to build CarZam model".format(
        config.get("EXECUTION.MODEL_BUILDER"), "models"))

    carzam_model = model_builder(   arch=config.get("MODEL.MODEL_ARCH"), \
                                    base=config.get("MODEL.MODEL_BASE"), \
                                    weights=MODEL_WEIGHTS, \
                                    embedding_dimensions = config.get("MODEL.EMBEDDING_DIMENSIONS"), \
                                    normalization = config.get("MODEL.MODEL_NORMALIZATION"), \
                                    **json.loads(config.get("MODEL.MODEL_KWARGS")))
    logger.info("Finished instantiating model with {} architecture".format(
        config.get("MODEL.MODEL_ARCH")))

    if mode == "test":
        carzam_model.load_state_dict(torch.load(weights))
        carzam_model.cuda()
        carzam_model.eval()
    else:
        if weights != "":  # Load weights if train and starting from a another model base...
            logger.info(
                "Commencing partial model load from {}".format(weights))
            carzam_model.partial_load(weights)
            logger.info("Completed partial model load from {}".format(weights))
        carzam_model.cuda()
        logger.info(
            torchsummary.summary(carzam_model,
                                 input_size=(3, *config.get("DATASET.SHAPE"))))

    # --------------------- INSTANTIATE LOSS ------------------------
    from loss import CarZamLossBuilder as LossBuilder
    loss_function = LossBuilder(loss_functions=config.get("LOSS.LOSSES"),
                                loss_lambda=config.get("LOSS.LOSS_LAMBDAS"),
                                loss_kwargs=config.get("LOSS.LOSS_KWARGS"),
                                **{"logger": logger})
    logger.info("Built loss function")

    # --------------------- INSTANTIATE LOSS OPTIMIZER --------------
    from optimizer.StandardLossOptimizer import StandardLossOptimizer as loss_optimizer

    LOSS_OPT = loss_optimizer(
        base_lr=config.get("LOSS_OPTIMIZER.BASE_LR",
                           config.get("OPTIMIZER.BASE_LR")),
        lr_bias=config.get("LOSS_OPTIMIZER.LR_BIAS_FACTOR",
                           config.get("OPTIMIZER.LR_BIAS_FACTOR")),
        weight_decay=config.get("LOSS_OPTIMIZER.WEIGHT_DECAY",
                                config.get("OPTIMIZER.WEIGHT_DECAY")),
        weight_bias=config.get("LOSS_OPTIMIZER.WEIGHT_BIAS_FACTOR",
                               config.get("OPTIMIZER.WEIGHT_BIAS_FACTOR")),
        gpus=NUM_GPUS)
    loss_optimizer = LOSS_OPT.build(
        loss_builder=loss_function,
        name=config.get("LOSS_OPTIMIZER.OPTIMIZER_NAME",
                        config.get("OPTIMIZER.OPTIMIZER_NAME")),
        **json.loads(
            config.get("LOSS_OPTIMIZER.OPTIMIZER_KWARGS",
                       config.get("OPTIMIZER.OPTIMIZER_KWARGS"))))
    logger.info("Built loss optimizer")

    # --------------------- INSTANTIATE OPTIMIZER ------------------------
    optimizer_builder = __import__("optimizer", fromlist=["*"])
    optimizer_builder = getattr(optimizer_builder,
                                config.get("EXECUTION.OPTIMIZER_BUILDER"))
    logger.info("Loaded {} from {} to build Optimizer model".format(
        config.get("EXECUTION.OPTIMIZER_BUILDER"), "optimizer"))

    OPT = optimizer_builder(
        base_lr=config.get("OPTIMIZER.BASE_LR"),
        lr_bias=config.get("OPTIMIZER.LR_BIAS_FACTOR"),
        weight_decay=config.get("OPTIMIZER.WEIGHT_DECAY"),
        weight_bias=config.get("OPTIMIZER.WEIGHT_BIAS_FACTOR"),
        gpus=NUM_GPUS)
    optimizer = OPT.build(
        carzam_model, config.get("OPTIMIZER.OPTIMIZER_NAME"),
        **json.loads(config.get("OPTIMIZER.OPTIMIZER_KWARGS")))
    logger.info("Built optimizer")

    # --------------------- INSTANTIATE SCHEDULER ------------------------
    try:  # We first check if scheduler is part of torch's provided schedulers.
        scheduler = __import__('torch.optim.lr_scheduler',
                               fromlist=['lr_scheduler'])
        scheduler = getattr(scheduler, config.get("SCHEDULER.LR_SCHEDULER"))
    except (
            ModuleNotFoundError, AttributeError
    ):  # If it fails, then we try to import from schedulers implemented in scheduler/ folder
        scheduler_ = config.get("SCHEDULER.LR_SCHEDULER")
        scheduler = __import__("scheduler." + scheduler_,
                               fromlist=[scheduler_])
        scheduler = getattr(scheduler, scheduler_)
    scheduler = scheduler(optimizer,
                          last_epoch=-1,
                          **json.loads(config.get("SCHEDULER.LR_KWARGS")))
    logger.info("Built scheduler")

    # ------------------- INSTANTIATE LOSS SCHEEDULER ---------------------
    loss_scheduler = None
    if loss_optimizer is not None:  # In case loss has no differentiable paramters
        try:
            loss_scheduler = __import__('torch.optim.lr_scheduler',
                                        fromlist=['lr_scheduler'])
            loss_scheduler = getattr(
                loss_scheduler,
                config.get("LOSS_SCHEDULER.LR_SCHEDULER",
                           config.get("SCHEDULER.LR_SCHEDULER")))
        except (ModuleNotFoundError, AttributeError):
            loss_scheduler_ = config.get("LOSS_SCHEDULER.LR_SCHEDULER",
                                         config.get("SCHEDULER.LR_SCHEDULER"))
            loss_scheduler = __import__("scheduler." + loss_scheduler_,
                                        fromlist=[loss_scheduler_])
            loss_scheduler = getattr(loss_scheduler, loss_scheduler_)
        loss_scheduler = loss_scheduler(
            loss_optimizer,
            last_epoch=-1,
            **json.loads(
                config.get("LOSS_SCHEDULER.LR_KWARGS",
                           config.get("SCHEDULER.LR_KWARGS"))))
        logger.info("Built loss scheduler")
    else:
        loss_scheduler = None

    # --------------------- DRIVE BACKUP ------------------------
    if DRIVE_BACKUP:  #
        fl_list = glob.glob(os.path.join(CHECKPOINT_DIRECTORY, "*.pth"))
    else:
        fl_list = glob.glob(os.path.join(MODEL_SAVE_FOLDER, "*.pth"))
    _re = re.compile(r'.*epoch([0-9]+)\.pth')
    previous_stop = [
        int(item[1]) for item in [_re.search(item) for item in fl_list]
        if item is not None
    ]
    if len(previous_stop) == 0:
        previous_stop = 0
        logger.info("No previous stop detected. Will start from epoch 0")
    else:
        previous_stop = max(previous_stop) + 1
        logger.info(
            "Previous stop detected. Will attempt to resume from epoch %i" %
            previous_stop)

    # --------------------- PERFORM TRAINING ------------------------
    trainer = __import__("trainer", fromlist=["*"])
    trainer = getattr(trainer, config.get("EXECUTION.TRAINER"))
    logger.info("Loaded {} from {} to build Trainer".format(
        config.get("EXECUTION.TRAINER"), "trainer"))

    loss_stepper = trainer(model=carzam_model,
                           loss_fn=loss_function,
                           optimizer=optimizer,
                           loss_optimizer=loss_optimizer,
                           scheduler=scheduler,
                           loss_scheduler=loss_scheduler,
                           train_loader=train_generator.dataloader,
                           test_loader=test_generator.dataloader,
                           queries=TEST_CLASSES,
                           epochs=config.get("EXECUTION.EPOCHS"),
                           logger=logger,
                           test_mode=config.get("EXECUTION.TEST_MODE",
                                                "zsl"))  # or "gzsl"
    loss_stepper.setup(step_verbose=config.get("LOGGING.STEP_VERBOSE"),
                       save_frequency=config.get("SAVE.SAVE_FREQUENCY"),
                       test_frequency=config.get("EXECUTION.TEST_FREQUENCY"),
                       save_directory=MODEL_SAVE_FOLDER,
                       save_backup=DRIVE_BACKUP,
                       backup_directory=CHECKPOINT_DIRECTORY,
                       gpus=NUM_GPUS,
                       fp16=config.get("OPTIMIZER.FP16"),
                       model_save_name=MODEL_SAVE_NAME,
                       logger_file=LOGGER_SAVE_NAME)
    if mode == 'train':
        loss_stepper.train(continue_epoch=previous_stop)
    elif mode == 'test':
        loss_stepper.evaluate()
    else:
        raise NotImplementedError()