Exemple #1
0
    def fit(self, X, Y):
        if self.loss == "mse":
            loss = MSELoss()
        elif self.loss == "crossentropy":
            loss = CrossEntropyLoss()

        # convert Y to one-hot encoding
        if self.classifier:
            Y = to_one_hot(Y.flatten())
        else:
            # if the shape of Y is like (N,), then we need to convert it to be (N,1)
            Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y

        N, M = X.shape
        # out_dims is the number of classes in the outcome. 1 for continuous Y
        self.out_dims = Y.shape[1]
        # Record all the learners (all the trees)
        self.learners = np.empty((self.n_iter, self.out_dims), dtype=object)
        # weights for each prediction (since we don't do the linear search step, we just put the learning_rate here)
        # The very first iteration has weights equals to 1 (so we don't multiply self.learning_rate)
        self.weights = np.ones((self.n_iter, self.out_dims))
        self.weights[1:, :] *= self.learning_rate

        # Prediction values, N samples, and each samples has self.out_dims dimensions
        Y_pred = np.zeros((N, self.out_dims))
        # Very first iteration, use mean to predict
        for k in range(self.out_dims):
            t = loss.base_estimator()
            t.fit(X, Y[:, k])
            Y_pred[:, k] = t.predict(X)
            self.learners[0, k] = t

        # Incrementally fit each learner on the negative gradient of the loss
        # wrt the previous fit (pseudo-residuals)
        for i in range(1, self.n_iter):
            for k in range(self.out_dims):
                y, y_pred = Y[:, k], Y_pred[:, k]
                neg_grad = -1 * loss.grad(y, y_pred)
                # use MSE as the surrogate loss when fitting to negative gradients
                t = DecisionTree(classifier=False,
                                 max_depth=self.max_depth,
                                 criterion="mse")
                # fit X to negative gradients of the current loss function
                t.fit(X, neg_grad)
                self.learners[i, k] = t

                # compute step size and weight for the current learner
                step = 1.0
                h_pred = t.predict(X)

                # We ignore the linear search step

                # update weights and our overall prediction for Y
                self.weights[i, k] *= step
                Y_pred[:, k] += self.weights[i, k] * h_pred
def training(m, inputs, targets, batch_size, nb_epochs, lr):
    """
    Training function
    :param m: model
    :param inputs: input data
    :param targets:
    :param batch_size:
    :param nb_epochs:
    :param lr: learning rate
    :return:
    """
    criterion = MSELoss()
    optimizer = SGD(m, lr, momentum=0.9)

    for epoch in range(nb_epochs):
        for batch in range(0, inputs.size(0), batch_size):
            output = m.forward(inputs.narrow(0, batch, batch_size))
            loss = criterion.forward(output,
                                     targets.narrow(0, batch, batch_size))
            dl = criterion.backward()
            m.backward(dl)
            optimizer.step()
        if (epoch % 50 == 0) or (epoch == nb_epochs - 1):
            print('Epoch: {}    Loss: {:.04f}'.format(epoch, loss.item()))
Exemple #3
0
    def fit(self, X, Y):
        """
        Fit the gradient boosted decision trees on a dataset
        :param X:
        :param Y:
        :return:
        """
        if self.loss == "mse":
            loss = MSELoss()
        elif self.loss == "crossentropy":
            loss = CrossEntropyLoss()

        if self.classifier:
            Y = to_one_hot(Y.flatten())
        else:
            Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y

        N, M = X.shape
        self.out_dims = Y.shape[1]
        self.learners = np.empty((self.n_iter, self.out_dims), dtype=object)
        self.weights = np.ones((self.n_iter, self.out_dims))
        self.weights[1:, :] = self.learning_rate

        # fit the base estimator
        Y_pred = np.zeros((N, self.out_dims))
        for k in range(self.out_dims):
            t = loss.base_estimator()
            t.fit(X, Y[:, k])
            Y_pred[:, k] += t.predict(X)
            self.learners[0, k] = t

        # incrementally fit each learner on the negative gradient of the loss
        for i in range(1, self.n_iter):
            for k in range(self.out_dims):
                y, y_pred = Y[:, k], Y_pred[:, k]
                neg_grad = -1 * loss.grad(y, y_pred)

                t = DecisionTree(classifier=False,
                                 max_depth=self.max_depth,
                                 criterion="mse")

                t.fit(X, neg_grad)
                self.learners[i, k] = t

                step = 1.0
                h_pred = t.predict(X)

                if self.step_size == "adaptive":
                    step = loss.line_search(y, y_pred, h_pred)

                self.weights[i, k] *= step
                Y_pred[:, k] += self.weights[i, k] * h_pred
Exemple #4
0
    def fit(self, X, Y):
        if self.loss == "mse":
            loss = MSELoss()
        elif self.loss == "crossentropy":
            loss = CrossEntropyLoss()

        # convert Y to one_hot if not already
        if self.classifier:
            Y = to_one_hot(Y.flatten())
        else:
            Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y

        N, M = X.shape
        self.out_dims = Y.shape[1]
        self.learners = np.empty((self.n_iter, self.out_dims), dtype=object)
        self.weights = np.ones((self.n_iter, self.out_dims))
        self.weights[1:, :] *= self.learning_rate

        # fit the base estimator
        Y_pred = np.zeros((N, self.out_dims))
        for k in range(self.out_dims):
            t = loss.base_estimator()
            t.fit(X, Y[:, k])
            Y_pred[:, k] += t.predict(X)
            self.learners[0, k] = t

        # incrementally fit each learner on the negative gradient of the loss
        # wrt the previous fit (pseudo-residuals)
        for i in range(1, self.n_iter):
            for k in range(self.out_dims):
                y, y_pred = Y[:, k], Y_pred[:, k]
                neg_grad = -1 * loss.grad(y, y_pred)

                # use MSE as the surrogate loss when fitting to negative gradients
                t = DecisionTree(classifier=False,
                                 max_depth=self.max_depth,
                                 criterion="mse")

                # fit current learner to negative gradients
                t.fit(X, neg_grad)
                self.learners[i, k] = t

                # compute step size and weight for the current learner
                step = 1.0
                h_pred = t.predict(X)
                if self.step_size == "adaptive":
                    step = loss.line_search(y, y_pred, h_pred)

                # update weights and our overall prediction for Y
                self.weights[i, k] *= step
                Y_pred[:, k] += self.weights[i, k] * h_pred
Exemple #5
0
    def __init__(self, config,
                 net_d: nn.Module = None, net_g: nn.Module = None,
                 net_p: nn.Module = None):
        super().__init__()
        self.config = config
        assert config.mode in MODES
        self.mode = config.mode
        self.log_dir = config.log_dir
        self.loss_config = getattr(config.model.losses, self.mode)
        if getattr(config.data, 'norm', None) is not None:
            self.input_mean = torch.tensor(
                config.data.norm.mean, device='cuda'
            ).view(1, 3, 1, 1)
            self.input_std = torch.tensor(
                config.data.norm.std, device='cuda'
            ).view(1, 3, 1, 1)
        else:
            self.input_mean = None
            self.input_std = None
        self.device = torch.device('cuda:0') if torch.cuda.is_available() \
            else torch.device('cpu')
        self.net_d = net_d
        self.net_g = net_g
        self.net_p = net_p

        # GAN loss
        self.gan_loss = LOSSES[config.model.gan.type](
            with_logits=config.model.gan.with_logits
        )
        if self.net_d is not None and self.gan_loss.need_lipschitz_d:
            for m in [m for m in self.net_d.modules() if m._parameters]:
                nn.utils.spectral_norm(m)

        # MLE loss
        self.mle = config.model.mle.type
        if self.mle == 'gaussian':
            self.mle_loss = GaussianMLELoss(
                **config.model.mle.options._asdict())
        elif self.mle == 'laplace':
            self.mle_loss = LaplaceMLELoss(
                **config.model.mle.options._asdict())
        else:
            raise ValueError('Invalid MLE loss type: %s' % self.mle)

        # MMD loss
        if getattr(self.loss_config, 'mmd_weight', 0) > 0:
            self.mmd_loss = MMDLoss(**self.config.model.mmd.options._asdict())
        else:
            self.mmd_loss = None

        # Other losses
        self._l1_loss = L1Loss()
        self._mse_loss = MSELoss()

        # Build optimizers
        self.optim_d, self.optim_g, self.optim_p = None, None, None
        if net_d is not None and self.mode != MODE_PRED \
                and self.loss_config.gan_weight:
            self.optim_d = self._build_optimizer(
                config.d_optimizer, net_d.parameters())
        if net_g is not None and self.mode != MODE_PRED:
            self.optim_g = self._build_optimizer(
                config.g_optimizer, net_g.parameters())
        if net_p is not None and self.mode == MODE_PRED:
            self.optim_p = self._build_optimizer(
                config.p_optimizer, net_p.parameters())

        # Build learning rate schedulers
        self.lr_sched_d, self.lr_sched_g, self.lr_sched_p = None, None, None
        if self.optim_d is not None and config.d_lr_scheduler is not None:
            self.lr_sched_d = self._build_lr_scheduler(
                config.d_lr_scheduler, self.optim_d)
        if self.optim_g is not None and config.g_lr_scheduler is not None:
            self.lr_sched_g = self._build_lr_scheduler(
                config.g_lr_scheduler, self.optim_g)
        if self.optim_p is not None and config.p_lr_scheduler is not None:
            self.lr_sched_p = self._build_lr_scheduler(
                config.p_lr_scheduler, self.optim_p)

        # Set train/eval
        if self.mode == MODE_BASE:
            net_g.train()
            net_d.train()
        elif self.mode == MODE_PRED:
            net_p.train()
        elif self.mode == MODE_MR:
            net_g.train()
            net_d.train()
            if net_p is not None:
                net_p.eval()
        else:
            raise NotImplementedError
Exemple #6
0
    def __init__(self, opt):
        """Initialize the CycleGAN class.

        Parameters:
            opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
        """
        BaseModel.__init__(self, opt)
        # specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
        self.loss_names = [
            'D_AB', 'G_AB', 'cycle_ABA', 'D_BA', 'G_BA', 'cycle_BAB'
        ]
        # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
        visual_names_A = ['real_A', 'fake_B', 'rec_A']
        visual_names_B = ['real_B', 'fake_A', 'rec_B']
        #if self.isTrain and self.opt.lambda_identity > 0.0:  # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B)
        #    visual_names_A.append('idt_B')
        #    visual_names_B.append('idt_A')

        # self.visual_names = visual_names_A + visual_names_B  # combine visualizations for A and B
        # specify the models you want to save to the disk. The training/test scripts will call <BaseModel.save_networks> and <BaseModel.load_networks>.
        if self.isTrain:
            self.model_names = ['G_AB', 'G_BA', 'D_AB', 'D_BA']
        else:  # during test time, only load Gs
            self.model_names = ['G_AB', 'G_BA']

        # define networks (both Generators and discriminators)
        # The naming is different from those used in the paper.
        # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X)

        self.netG_AB, self.netG_BA = networks.define_Gs(
            opt.task, opt.network_type, opt.language, 'en', self.gpu_ids,
            opt.freeze_GB_encoder)

        if self.isTrain:  # define discriminators

            netDAB_name = networks.define_name(opt.netD, 'en')
            netDBA_name = networks.define_name(opt.netD, opt.language)

            self.netD_AB = networks.define_D(opt.netD, netDAB_name,
                                             self.gpu_ids)
            self.netD_BA = networks.define_D(opt.netD, netDBA_name,
                                             self.gpu_ids)

        if self.isTrain:
            # define loss functions
            self.criterionGAN = networks.GANLoss(opt.gan_mode).to(
                self.device)  # define GAN loss.

            if opt.loss_type == 'cosine':
                self.criterionCycle = CosineSimilarityLoss().to(self.device)
            elif opt.loss_type == 'mse':
                self.criterionCycle = MSELoss().to(self.device)
            else:
                raise NotImplementedError(opt.loss_type + " not implemented")

            self.criterionIdt = torch.nn.CosineEmbeddingLoss(
            )  # CosineSimilarityLoss()
            # initialize optimizers; schedulers will be automatically created by function <BaseModel.setup>.
            if opt.freeze_GB_encoder is False:
                self.optimizer_G = torch.optim.Adam(itertools.chain(
                    self.netG_AB.parameters(), self.netG_BA.parameters()),
                                                    lr=opt.lr,
                                                    betas=(opt.beta1, 0.999))
            else:
                self.optimizer_G = torch.optim.Adam(itertools.chain(
                    self.netG_AB.parameters(),
                    self.netG_BA.module.model.base_model.decoder.parameters()),
                                                    lr=opt.lr,
                                                    betas=(opt.beta1, 0.999))

            self.optimizer_D = torch.optim.Adam(itertools.chain(
                self.netD_AB.parameters(), self.netD_BA.parameters()),
                                                lr=opt.lr,
                                                betas=(opt.beta1, 0.999))
            self.optimizers.append(self.optimizer_G)
            self.optimizers.append(self.optimizer_D)

        self.loss_G_AB = 0
        self.loss_G_BA = 0
        self.loss_D_AB = 0
        self.loss_D_BA = 0
        self.loss_cycle_ABA = 0
        self.loss_cycle_BAB = 0
        self.loss_G = 0
Exemple #7
0
import numpy as np
from tensor import Tensor
from layers import Sequential, Linear
from activations import Tanh, Sigmoid
from optimizers import SGD
from losses import MSELoss

np.random.seed(0)

data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True)

model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)

    loss.backward()
    optim.step()
    print(loss)