def fit(self, X, Y): if self.loss == "mse": loss = MSELoss() elif self.loss == "crossentropy": loss = CrossEntropyLoss() # convert Y to one-hot encoding if self.classifier: Y = to_one_hot(Y.flatten()) else: # if the shape of Y is like (N,), then we need to convert it to be (N,1) Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y N, M = X.shape # out_dims is the number of classes in the outcome. 1 for continuous Y self.out_dims = Y.shape[1] # Record all the learners (all the trees) self.learners = np.empty((self.n_iter, self.out_dims), dtype=object) # weights for each prediction (since we don't do the linear search step, we just put the learning_rate here) # The very first iteration has weights equals to 1 (so we don't multiply self.learning_rate) self.weights = np.ones((self.n_iter, self.out_dims)) self.weights[1:, :] *= self.learning_rate # Prediction values, N samples, and each samples has self.out_dims dimensions Y_pred = np.zeros((N, self.out_dims)) # Very first iteration, use mean to predict for k in range(self.out_dims): t = loss.base_estimator() t.fit(X, Y[:, k]) Y_pred[:, k] = t.predict(X) self.learners[0, k] = t # Incrementally fit each learner on the negative gradient of the loss # wrt the previous fit (pseudo-residuals) for i in range(1, self.n_iter): for k in range(self.out_dims): y, y_pred = Y[:, k], Y_pred[:, k] neg_grad = -1 * loss.grad(y, y_pred) # use MSE as the surrogate loss when fitting to negative gradients t = DecisionTree(classifier=False, max_depth=self.max_depth, criterion="mse") # fit X to negative gradients of the current loss function t.fit(X, neg_grad) self.learners[i, k] = t # compute step size and weight for the current learner step = 1.0 h_pred = t.predict(X) # We ignore the linear search step # update weights and our overall prediction for Y self.weights[i, k] *= step Y_pred[:, k] += self.weights[i, k] * h_pred
def fit(self, X, Y): """ Fit the gradient boosted decision trees on a dataset :param X: :param Y: :return: """ if self.loss == "mse": loss = MSELoss() elif self.loss == "crossentropy": loss = CrossEntropyLoss() if self.classifier: Y = to_one_hot(Y.flatten()) else: Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y N, M = X.shape self.out_dims = Y.shape[1] self.learners = np.empty((self.n_iter, self.out_dims), dtype=object) self.weights = np.ones((self.n_iter, self.out_dims)) self.weights[1:, :] = self.learning_rate # fit the base estimator Y_pred = np.zeros((N, self.out_dims)) for k in range(self.out_dims): t = loss.base_estimator() t.fit(X, Y[:, k]) Y_pred[:, k] += t.predict(X) self.learners[0, k] = t # incrementally fit each learner on the negative gradient of the loss for i in range(1, self.n_iter): for k in range(self.out_dims): y, y_pred = Y[:, k], Y_pred[:, k] neg_grad = -1 * loss.grad(y, y_pred) t = DecisionTree(classifier=False, max_depth=self.max_depth, criterion="mse") t.fit(X, neg_grad) self.learners[i, k] = t step = 1.0 h_pred = t.predict(X) if self.step_size == "adaptive": step = loss.line_search(y, y_pred, h_pred) self.weights[i, k] *= step Y_pred[:, k] += self.weights[i, k] * h_pred
def fit(self, X, Y): if self.loss == "mse": loss = MSELoss() elif self.loss == "crossentropy": loss = CrossEntropyLoss() # convert Y to one_hot if not already if self.classifier: Y = to_one_hot(Y.flatten()) else: Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y N, M = X.shape self.out_dims = Y.shape[1] self.learners = np.empty((self.n_iter, self.out_dims), dtype=object) self.weights = np.ones((self.n_iter, self.out_dims)) self.weights[1:, :] *= self.learning_rate # fit the base estimator Y_pred = np.zeros((N, self.out_dims)) for k in range(self.out_dims): t = loss.base_estimator() t.fit(X, Y[:, k]) Y_pred[:, k] += t.predict(X) self.learners[0, k] = t # incrementally fit each learner on the negative gradient of the loss # wrt the previous fit (pseudo-residuals) for i in range(1, self.n_iter): for k in range(self.out_dims): y, y_pred = Y[:, k], Y_pred[:, k] neg_grad = -1 * loss.grad(y, y_pred) # use MSE as the surrogate loss when fitting to negative gradients t = DecisionTree(classifier=False, max_depth=self.max_depth, criterion="mse") # fit current learner to negative gradients t.fit(X, neg_grad) self.learners[i, k] = t # compute step size and weight for the current learner step = 1.0 h_pred = t.predict(X) if self.step_size == "adaptive": step = loss.line_search(y, y_pred, h_pred) # update weights and our overall prediction for Y self.weights[i, k] *= step Y_pred[:, k] += self.weights[i, k] * h_pred
def training(m, inputs, targets, batch_size, nb_epochs, lr): """ Training function :param m: model :param inputs: input data :param targets: :param batch_size: :param nb_epochs: :param lr: learning rate :return: """ criterion = MSELoss() optimizer = SGD(m, lr, momentum=0.9) for epoch in range(nb_epochs): for batch in range(0, inputs.size(0), batch_size): output = m.forward(inputs.narrow(0, batch, batch_size)) loss = criterion.forward(output, targets.narrow(0, batch, batch_size)) dl = criterion.backward() m.backward(dl) optimizer.step() if (epoch % 50 == 0) or (epoch == nb_epochs - 1): print('Epoch: {} Loss: {:.04f}'.format(epoch, loss.item()))
def __init__(self, config, net_d: nn.Module = None, net_g: nn.Module = None, net_p: nn.Module = None): super().__init__() self.config = config assert config.mode in MODES self.mode = config.mode self.log_dir = config.log_dir self.loss_config = getattr(config.model.losses, self.mode) if getattr(config.data, 'norm', None) is not None: self.input_mean = torch.tensor( config.data.norm.mean, device='cuda' ).view(1, 3, 1, 1) self.input_std = torch.tensor( config.data.norm.std, device='cuda' ).view(1, 3, 1, 1) else: self.input_mean = None self.input_std = None self.device = torch.device('cuda:0') if torch.cuda.is_available() \ else torch.device('cpu') self.net_d = net_d self.net_g = net_g self.net_p = net_p # GAN loss self.gan_loss = LOSSES[config.model.gan.type]( with_logits=config.model.gan.with_logits ) if self.net_d is not None and self.gan_loss.need_lipschitz_d: for m in [m for m in self.net_d.modules() if m._parameters]: nn.utils.spectral_norm(m) # MLE loss self.mle = config.model.mle.type if self.mle == 'gaussian': self.mle_loss = GaussianMLELoss( **config.model.mle.options._asdict()) elif self.mle == 'laplace': self.mle_loss = LaplaceMLELoss( **config.model.mle.options._asdict()) else: raise ValueError('Invalid MLE loss type: %s' % self.mle) # MMD loss if getattr(self.loss_config, 'mmd_weight', 0) > 0: self.mmd_loss = MMDLoss(**self.config.model.mmd.options._asdict()) else: self.mmd_loss = None # Other losses self._l1_loss = L1Loss() self._mse_loss = MSELoss() # Build optimizers self.optim_d, self.optim_g, self.optim_p = None, None, None if net_d is not None and self.mode != MODE_PRED \ and self.loss_config.gan_weight: self.optim_d = self._build_optimizer( config.d_optimizer, net_d.parameters()) if net_g is not None and self.mode != MODE_PRED: self.optim_g = self._build_optimizer( config.g_optimizer, net_g.parameters()) if net_p is not None and self.mode == MODE_PRED: self.optim_p = self._build_optimizer( config.p_optimizer, net_p.parameters()) # Build learning rate schedulers self.lr_sched_d, self.lr_sched_g, self.lr_sched_p = None, None, None if self.optim_d is not None and config.d_lr_scheduler is not None: self.lr_sched_d = self._build_lr_scheduler( config.d_lr_scheduler, self.optim_d) if self.optim_g is not None and config.g_lr_scheduler is not None: self.lr_sched_g = self._build_lr_scheduler( config.g_lr_scheduler, self.optim_g) if self.optim_p is not None and config.p_lr_scheduler is not None: self.lr_sched_p = self._build_lr_scheduler( config.p_lr_scheduler, self.optim_p) # Set train/eval if self.mode == MODE_BASE: net_g.train() net_d.train() elif self.mode == MODE_PRED: net_p.train() elif self.mode == MODE_MR: net_g.train() net_d.train() if net_p is not None: net_p.eval() else: raise NotImplementedError
def __init__(self, opt): """Initialize the CycleGAN class. Parameters: opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions """ BaseModel.__init__(self, opt) # specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses> self.loss_names = [ 'D_AB', 'G_AB', 'cycle_ABA', 'D_BA', 'G_BA', 'cycle_BAB' ] # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals> visual_names_A = ['real_A', 'fake_B', 'rec_A'] visual_names_B = ['real_B', 'fake_A', 'rec_B'] #if self.isTrain and self.opt.lambda_identity > 0.0: # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B) # visual_names_A.append('idt_B') # visual_names_B.append('idt_A') # self.visual_names = visual_names_A + visual_names_B # combine visualizations for A and B # specify the models you want to save to the disk. The training/test scripts will call <BaseModel.save_networks> and <BaseModel.load_networks>. if self.isTrain: self.model_names = ['G_AB', 'G_BA', 'D_AB', 'D_BA'] else: # during test time, only load Gs self.model_names = ['G_AB', 'G_BA'] # define networks (both Generators and discriminators) # The naming is different from those used in the paper. # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X) self.netG_AB, self.netG_BA = networks.define_Gs( opt.task, opt.network_type, opt.language, 'en', self.gpu_ids, opt.freeze_GB_encoder) if self.isTrain: # define discriminators netDAB_name = networks.define_name(opt.netD, 'en') netDBA_name = networks.define_name(opt.netD, opt.language) self.netD_AB = networks.define_D(opt.netD, netDAB_name, self.gpu_ids) self.netD_BA = networks.define_D(opt.netD, netDBA_name, self.gpu_ids) if self.isTrain: # define loss functions self.criterionGAN = networks.GANLoss(opt.gan_mode).to( self.device) # define GAN loss. if opt.loss_type == 'cosine': self.criterionCycle = CosineSimilarityLoss().to(self.device) elif opt.loss_type == 'mse': self.criterionCycle = MSELoss().to(self.device) else: raise NotImplementedError(opt.loss_type + " not implemented") self.criterionIdt = torch.nn.CosineEmbeddingLoss( ) # CosineSimilarityLoss() # initialize optimizers; schedulers will be automatically created by function <BaseModel.setup>. if opt.freeze_GB_encoder is False: self.optimizer_G = torch.optim.Adam(itertools.chain( self.netG_AB.parameters(), self.netG_BA.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) else: self.optimizer_G = torch.optim.Adam(itertools.chain( self.netG_AB.parameters(), self.netG_BA.module.model.base_model.decoder.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizer_D = torch.optim.Adam(itertools.chain( self.netD_AB.parameters(), self.netD_BA.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizers.append(self.optimizer_G) self.optimizers.append(self.optimizer_D) self.loss_G_AB = 0 self.loss_G_BA = 0 self.loss_D_AB = 0 self.loss_D_BA = 0 self.loss_cycle_ABA = 0 self.loss_cycle_BAB = 0 self.loss_G = 0
import numpy as np from tensor import Tensor from layers import Sequential, Linear from activations import Tanh, Sigmoid from optimizers import SGD from losses import MSELoss np.random.seed(0) data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True) target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True) model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()]) criterion = MSELoss() optim = SGD(parameters=model.get_parameters(), alpha=1) for i in range(10): pred = model.forward(data) loss = criterion.forward(pred, target) loss.backward() optim.step() print(loss)