def __init__(
        self,
        n_epochs: int = 10,
        batch_size: int = 10,
        lr: float = 0.01,
        log_interval: int = 50,
    ):
        """
        Inherits from Trainer class.
        This class handles the training and test processes of the LogisticRegression
        model.

        :param n_epochs: number of epochs
        :param batch_size: the size of the batches
        :param lr: the learning rate
        :param log_interval: the interval at which logging and loss collection is
        performed during training
        """
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.lr = lr
        self.log_interval = log_interval

        self.logger = TrainLogger()
        for attribute in self.__dict__:
            setattr(self.logger, attribute, self.__dict__[attribute])

        # It is important that the super initialization happens after
        # setting the trainlogger attributes.
        super(LogisticRegressionTrainer, self).__init__()
Ejemplo n.º 2
0
    def __init__(self, c=1.0, kernel="rbf", degree=3, max_iter=-1):
        """
        Inherits from Trainer class.
        This class handles the training and test processes of the SVM
        model.

        :param c: the penalty for all misclassified data samples in the model;
        the penalty for a datapoint is proportional to the distance of the
        point to the decision boundary
        :param kernel: function to transform the data in the desired way
        :param degree: Degree of the polynomial kernel function (‘poly’);
        ignored by all other kernel functions
        :param max_iter: limit of iterations; no limit if it is -1
        """
        self.c = c
        # TODO: Include gamma as parameter. High gamma: more likely to overfit
        # gamma only applicable to non-linear kernels
        self.kernel = kernel
        self.degree = degree
        self.max_iter = max_iter

        self.logger = TrainLogger()
        for attribute in self.__dict__:
            setattr(self.logger, attribute, self.__dict__[attribute])

        # It is important that the super initialization happens after
        # setting the trainlogger attributes.
        super(SVMTrainer, self).__init__()
Ejemplo n.º 3
0
    def run(self):
        self.img2pose_model.train()

        # accumulate running loss to log into tensorboard
        running_losses = {}
        running_losses["loss"] = 0

        step = 0

        # prints the best step and loss every time it does a validation
        self.best_step = 0
        self.best_val_loss = float("Inf")

        for epoch in range(self.config.epochs):
            train_logger = TrainLogger(self.config.batch_size,
                                       self.config.frequency_log)
            idx = 0
            for idx, data in enumerate(self.train_loader):
                imgs, targets = data
                imgs = [image.to(self.config.device) for image in imgs]
                targets = [{k: v.to(self.config.device)
                            for k, v in t.items()} for t in targets]
                self.optimizer.zero_grad()

                # forward pass

                losses = self.img2pose_model.forward(imgs, targets)

                loss = sum(loss for loss in losses.values())
                # if loss.item() > 100000:
                #     import ipdb; ipdb.set_trace()
                # does a backward propagation through the network
                loss.backward()

                torch.nn.utils.clip_grad_norm_(
                    self.img2pose_model.fpn_model.parameters(), 10)

                self.optimizer.step()

                if self.config.distributed:
                    losses = reduce_dict(losses)
                    loss = sum(loss for loss in losses.values())

                for loss_name in losses.keys():
                    if loss_name in running_losses:
                        running_losses[loss_name] += losses[loss_name].item()
                    else:
                        running_losses[loss_name] = losses[loss_name].item()

                running_losses["loss"] += loss.item()

                # saves loss into tensorboard
                if step % self.tensorboard_loss_every == 0 and step != 0:
                    for loss_name in running_losses.keys():
                        self.writer.add_scalar(
                            f"train_{loss_name}",
                            running_losses[loss_name] /
                            self.tensorboard_loss_every,
                            step,
                        )

                        running_losses[loss_name] = 0

                train_logger(epoch, self.config.epochs, idx,
                             len(self.train_loader), loss.item())
                step += 1

            # evaluate model using validation set (if set)
            if self.config.val_source is not None:
                val_loss = self.evaluate(step)

            else:
                # otherwise just save the model
                save_model(
                    self.img2pose_model.fpn_model_without_ddp,
                    self.optimizer,
                    self.config,
                    step=step,
                )

            # if validation loss stops decreasing, decrease lr
            if self.config.lr_plateau and self.config.val_source is not None:
                self.scheduler.step(val_loss)

            # early stop model to prevent overfitting
            if self.config.early_stop and self.config.val_source is not None:
                self.early_stop(val_loss)
                if self.early_stop.stop:
                    print("Early stopping model...")
                    break

        if self.config.val_source is not None:
            val_loss = self.evaluate(step)
Ejemplo n.º 4
0
    def __init__(
        self,
        args: Dict,
        train_envs,
        val_envs,
        vis_env,
        actor_critic,
        options_policy,
        options_decoder,
        trajectory_encoder,
        trajectory_optim,
        z_encoder,
        b_args,
        agent,
        args_state,
        rollouts: RolloutStorage,
        device: torch.device,
        num_processes_eff: int,
    ):
        self.args = args
        self.train_envs = train_envs
        self.val_envs = val_envs
        self.actor_critic = actor_critic
        self.options_decoder = options_decoder
        self.trajectory_encoder = trajectory_encoder
        self.trajectory_optim = trajectory_optim
        self.options_policy = options_policy
        self.z_encoder = z_encoder
        self.b_args = b_args
        self.agent = agent
        self.args_state = args_state
        self.rollouts = rollouts
        self.num_processes_eff = num_processes_eff
        self.device = device
        NUM_BATCHES_PER_EPOCH = 100
        self.num_batches_per_epoch = NUM_BATCHES_PER_EPOCH
        self.continuous_state_space = False
        if self.args.env_name in ['mountain-car', 'acrobat']:
            self.continuous_state_space = True

        self.logger = TrainLogger(
            args=args,
            vis_env=vis_env,
            val_envs=val_envs,
            device=device,
            num_batches_per_epoch=self.num_batches_per_epoch,
            num_processes_eff=self.num_processes_eff,
            continuous_state_space=self.continuous_state_space,
        )

        self.omega_dim_growth_ratio = 1.5  # from VALOR
        self.omega_dim_ll_threshold = np.log(
            self.args.omega_traj_ll_theta)  # from VALOR
        self.min_omega_dim = min(2, self.args.omega_option_dims)
        if self.args.model == 'cond' \
        or self.args.hier_mode == 'infobot-supervised' \
        or self.args.option_space == 'continuous':
            self.omega_dim_current = self.args.omega_option_dims
        elif hasattr(self.args, 'omega_dim_current'):
            self.omega_dim_current = self.args.omega_dim_current
        elif self.args.use_omega_dim_curriculum and self.args.hier_mode != 'transfer':
            self.omega_dim_current = self.min_omega_dim
        else:
            self.omega_dim_current = self.args.omega_option_dims

        if self.args.reset_adaptive:
            print("Using adaptive reset, setting initial reset_prob to 1.0")
            reset_probs = [1.0 for _ in range(self.args.num_processes)]
            self.train_envs.modify_attr('reset_prob', reset_probs)

        self.total_time_steps = 0

        self.to(device)
Ejemplo n.º 5
0
def train(args):
    def to_var(x, volatile=False, requires_grad=False):
        if torch.cuda.is_available() and not args.nogpu:
            x = x.cuda(args.gpu_device_num)
        return Variable(x, volatile=volatile, requires_grad=requires_grad)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    print("\nsaving at {}\n".format(args.save_dir))
    print("initializing...")

    # if args.layer_num is 5 and args.base_conv_channel is 64 then
    # gen_layer: [Z_dim, 512, 256, 128,  64, 3]
    # dis_layer: [    3,  64, 128, 256, 512, 1]
    gen_layers = [args.zl_dim + args.zg_dim + args.zp_dim] + [
        args.base_conv_channel * (2**(args.layer_num - n))
        for n in range(2, args.layer_num + 1)
    ] + [3]
    dis_layers = [3] + [
        args.base_conv_channel * (2**n) for n in range(args.layer_num - 1)
    ] + [1]
    print("generator channels: ", gen_layers)
    print("discriminator channels: ", dis_layers)

    if torch.cuda.is_available() and not args.nogpu:
        generator = Generator(conv_channels=gen_layers,
                              kernel_size=args.kernel_size,
                              local_noise_dim=args.zl_dim,
                              global_noise_dim=args.zg_dim,
                              periodic_noise_dim=args.zp_dim,
                              spatial_size=args.spatial_size,
                              hidden_noise_dim=args.mlp_hidden_dim).cuda(
                                  args.gpu_device_num)
        discriminator = Discriminator(conv_channels=dis_layers,
                                      kernel_size=args.kernel_size).cuda(
                                          args.gpu_device_num)
    else:
        generator = Generator(conv_channels=gen_layers,
                              kernel_size=args.kernel_size,
                              local_noise_dim=args.zl_dim,
                              global_noise_dim=args.zg_dim,
                              periodic_noise_dim=args.zp_dim,
                              spatial_size=args.spatial_size,
                              hidden_noise_dim=args.mlp_hidden_dim)
        discriminator = Discriminator(conv_channels=dis_layers,
                                      kernel_size=args.kernel_size)

    if args.show_parameters:
        for idx, m in enumerate(model.modules()):
            print(idx, '->', m)

        print(args)

    # training setting
    if args.sgd:
        generator_optimizer = torch.optim.SGD(generator.parameters(),
                                              lr=args.learning_rate_g,
                                              momentum=0.9,
                                              weight_decay=1e-8)
        discriminator_optimizer = torch.optim.SGD(discriminator.parameters(),
                                                  lr=args.learning_rate_g,
                                                  momentum=0.9,
                                                  weight_decay=1e-8)
    else:
        generator_optimizer = torch.optim.Adam(generator.parameters(),
                                               lr=args.learning_rate_d,
                                               weight_decay=1e-8,
                                               betas=(args.adam_beta, 0.999))
        discriminator_optimizer = torch.optim.Adam(discriminator.parameters(),
                                                   lr=args.learning_rate_d,
                                                   weight_decay=1e-8,
                                                   betas=(args.adam_beta,
                                                          0.999))

    # for cropping size
    img_size = args.spatial_size * (2**args.layer_num)

    train_loader = get_loader(data_set=dataset_setting.get_dtd_train_loader(
        args, img_size),
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)

    # for loggin the trainning
    tlog = TrainLogger("train_log",
                       log_dir=args.save_dir,
                       csv=True,
                       header=True,
                       suppress_err=False)
    tlog.disable_pickle_object()
    tlog.set_default_Keys(
        ["epoch", "total_loss", "discriminator_loss", "generator_loss"])

    # output from discriminator is [0,1] of each patch, exsisting spatial_size*spatial_size number.
    true_label = torch.ones(args.batch_size,
                            args.spatial_size * args.spatial_size)
    fake_label = torch.zeros(args.batch_size,
                             args.spatial_size * args.spatial_size)

    # for fixed sampling

    fixed_noise = to_var(generator.generate_noise(
        batch_size=8,
        local_dim=args.zl_dim,
        global_dim=args.zg_dim,
        periodic_dim=args.zp_dim,
        spatial_size=args.spatial_size,
        tile=args.tile),
                         volatile=False)

    epochs = tqdm(range(args.epochs), ncols=100, desc="train")

    for epoch in epochs:
        # for logging
        epoch_total_loss = 0.0
        epoch_total_dloss = 0.0
        epoch_total_gloss = 0.0

        if (epoch + 1) % args.decay_every == 0 and args.sgd:
            for param_group in generator_optimizer.param_groups:
                param_group['lr'] *= args.decay_value

            for param_group in discriminator_optimizer.param_groups:
                param_group['lr'] *= args.decay_value

            tqdm.write("decayed learning rate, factor {}".format(
                args.decay_value))

        _train_loader = tqdm(train_loader, ncols=100)

        for images in _train_loader:
            batch_size = images.shape[0]

            imgs = to_var(images, volatile=False)
            true_labels = to_var(true_label[:batch_size], volatile=False)
            fake_labels = to_var(fake_label[:batch_size], volatile=False)
            noise = to_var(
                generator.generate_noise(batch_size=batch_size,
                                         local_dim=args.zl_dim,
                                         global_dim=args.zg_dim,
                                         periodic_dim=args.zp_dim,
                                         spatial_size=args.spatial_size,
                                         tile=args.tile))

            # generate fake image
            fake_img = generator(noise)

            # train discriminator ################################################################
            discriminator_optimizer.zero_grad()
            ######## train discriminator with real image
            discriminator_pred = discriminator(imgs)
            discriminator_true_loss = F.binary_cross_entropy(
                discriminator_pred, true_labels)

            epoch_total_loss += discriminator_true_loss.item()
            epoch_total_dloss += discriminator_true_loss.item()

            discriminator_true_loss.backward()

            ######## train discriminator with fake image
            discriminator_pred = discriminator(fake_img.detach())
            discriminator_fake_loss = F.binary_cross_entropy(
                discriminator_pred, fake_labels)

            epoch_total_loss += discriminator_fake_loss.item()
            epoch_total_dloss += discriminator_fake_loss.item()

            discriminator_fake_loss.backward()
            discriminator_optimizer.step()

            # train generator ####################################################################
            generator_optimizer.zero_grad()

            fake_discriminate = discriminator(fake_img)
            generator_loss = F.binary_cross_entropy(fake_discriminate,
                                                    true_labels)

            epoch_total_loss += generator_loss.item()
            epoch_total_gloss += generator_loss.item()

            generator_loss.backward()
            generator_optimizer.step()

            _train_loader.set_description(
                "train[{}] dloss: {:.5f}, gloss: {:.5f}".format(
                    args.save_dir, epoch_total_dloss, epoch_total_gloss))

        if (epoch + 1) % args.save_sample_every == 0:
            generator.eval()
            # generate fake image
            fake_img = generator(fixed_noise)

            save_image(fake_img.mul(0.5).add(0.5).cpu(),
                       output_dir=args.save_dir,
                       img_name="sample_e{}".format(epoch + 1))
            generator.train()

        tqdm.write("[#{}]train epoch dloss: {:.5f}, gloss: {:.5f}".format(
            epoch + 1, epoch_total_dloss, epoch_total_gloss))

        tlog.log([
            epoch + 1,
            float(epoch_total_loss),
            float(epoch_total_dloss),
            float(epoch_total_gloss)
        ])

        # save model
        if (epoch + 1) % args.save_model_every == 0:
            generator_state = {
                'epoch': epoch + 1,
                'optimizer_state_dict': generator_optimizer.state_dict()
            }
            discriminator_state = {
                'epoch': epoch + 1,
                'optimizer_state_dict': discriminator_optimizer.state_dict()
            }
            generator.save(add_state=generator_state,
                           file_name=os.path.join(
                               args.save_dir,
                               'generator_param_epoch{}.pth'.format(epoch +
                                                                    1)))
            discriminator.save(
                add_state=discriminator_state,
                file_name=os.path.join(
                    args.save_dir,
                    'discriminator_param_epoch{}.pth'.format(epoch + 1)))

            tqdm.write("model saved.")

    # saving training result
    generator.save(
        add_state={'optimizer_state_dict': generator_optimizer.state_dict()},
        file_name=os.path.join(
            args.save_dir, 'generator_param_fin_{}.pth'.format(
                epoch + 1,
                datetime.now().strftime("%Y%m%d_%H-%M-%S"))))
    discriminator.save(add_state={
        'optimizer_state_dict':
        discriminator_optimizer.state_dict()
    },
                       file_name=os.path.join(
                           args.save_dir,
                           'discriminator_param_fin_{}.pth'.format(
                               epoch + 1,
                               datetime.now().strftime("%Y%m%d_%H-%M-%S"))))

    print("data is saved at {}".format(args.save_dir))
Ejemplo n.º 6
0
def train_model(model,
                train_loader,
                val_loader,
                train_dataset,
                optimizer,
                criterion,
                scheduler,
                device,
                cfg,
                visualize=True):
    # TensorboardX writer.
    tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR, flush_secs=1)

    # A simple logger is used for the losses.
    logger = TrainLogger()

    # Init a visualizer on a fixed test batch.
    vis = None
    if visualize:
        vis = utils.init_vis(train_dataset, cfg.TRAIN.LOG_DIR)

    # Train.
    val_max_acc = -1.
    for epoch in range(cfg.TRAIN.EPOCHS):
        train_steps = train(model,
                            device,
                            train_loader,
                            optimizer,
                            criterion,
                            epoch,
                            scheduler,
                            visualizer=vis,
                            tb_writer=tb_writer,
                            logger=logger)

        tb_test_idx = (epoch + 1) * train_steps

        val_loss, val_accuracy, targets, preds = test(model, device,
                                                      val_loader, criterion,
                                                      cfg.TEST.BATCH_SIZE)

        cm = confusion_matrix(targets, preds)
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        if vis is not None:
            vis.add_conf_mat(cm, tb_test_idx)

        tb_writer.add_scalar('val_loss', val_loss, tb_test_idx)
        tb_writer.add_scalar('val_accuracy', val_accuracy, tb_test_idx)
        logger.add_val(val_loss, val_accuracy / 100., tb_test_idx)

        # Save checkpoint.
        if cfg.PATHS.CHECKPOINTS_PATH != '':
            save_checkpoint(model, optimizer, epoch,
                            cfg.PATHS.CHECKPOINTS_PATH)

        if val_accuracy >= val_max_acc:
            val_max_acc = val_accuracy

    plot_history(logger, save_path=cfg.TRAIN.LOG_DIR + '/history.png')
    tb_writer.close()

    return val_max_acc