Example #1
0
    def get_coefficients(self):
        ar, cr, a, b, c, d = self.term.get_coefficients()

        # Real componenets
        crd = cr * self.delta
        coeffs = [2 * ar * (torch.cosh(crd) - 1) / crd**2, cr]

        # Imaginary coefficients
        cd = c * self.delta
        dd = d * self.delta
        c2 = c**2
        d2 = d**2
        factor = 2.0 / (self.delta * (c2 + d2))**2
        cos_term = torch.cosh(cd) * torch.cos(dd) - 1
        sin_term = torch.sinh(cd) * torch.sin(dd)

        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d

        coeffs += [
            factor * (C1 * cos_term - C2 * sin_term),
            factor * (C2 * cos_term + C1 * sin_term),
            c,
            d,
        ]

        return coeffs
Example #2
0
def double_soliton(x: torch.tensor, t: torch.tensor, c: float,
                   x0: float) -> torch.tensor:
    """Single soliton solution of the KdV equation (u_t + u_{xxx} - 6 u u_x = 0)
    source: http://lie.math.brocku.ca/~sanco/solitons/kdv_solitons.php

    Args:
        x ([Tensor]): Input vector of spatial coordinates.
        t ([Tensor]): Input vector of temporal coordinates.
        c ([Array]): Array containing the velocities of the two solitons, note that c[0] > c[1].
        x0 ([Array]):  Array containing the offsets of the two solitons.

    Returns:
        [Tensor]: Solution.
    """
    assert c[0] > c[1], "c1 has to be bigger than c[2]"

    xi0 = (np.sqrt(c[0]) / 2 * (x - c[0] * t - x0[0])
           )  #  switch to moving coordinate frame
    xi1 = np.sqrt(c[1]) / 2 * (x - c[1] * t - x0[1])

    part_1 = 2 * (c[0] - c[1])
    numerator = c[0] * torch.cosh(xi1)**2 + c[1] * torch.sinh(xi0)**2
    denominator_1 = (np.sqrt(c[0]) - np.sqrt(c[1])) * torch.cosh(xi0 + xi1)
    denominator_2 = (np.sqrt(c[0]) + np.sqrt(c[1])) * torch.cosh(xi0 - xi1)
    u = part_1 * numerator / (denominator_1 + denominator_2)**2
    coords = torch.cat((t.reshape(-1, 1), x.reshape(-1, 1)), dim=1)
    return coords, u.view(-1, 1)
    def forward(self):
        M = torch.eye(6)

        if self.K1 < 0:
            K1 = -self.K1
            flip = True
        else:
            K1 = self.K1
            flip = False
        
        k = torch.sqrt(K1)
        
        kl = self.L * k
        M[0,0] = torch.cos(kl)
        M[0,1] = torch.sin(kl) / k
        M[1,0] = -k * torch.sin(kl)
        M[1,1] = torch.cos(kl)

        M[2,2] = torch.cosh(kl)
        M[2,3] = torch.sinh(kl) / k
        M[3,2] = k * torch.sinh(kl)
        M[3,3] = torch.cosh(kl)

        if flip:
            M = rot(- np.pi / 2) @ M @ rot(np.pi / 2)

        return M
Example #4
0
def generate_data_labels(ka_period_list, x0, y0, z0, b, dt, kd, kdmin):
    a = torch.from_numpy(ka_period_list[:, 0]).double().cpu()
    k = torch.from_numpy(ka_period_list[:, 1]).double().cpu()
    xp = torch.from_numpy(np.ones(360000) * x0).double().cpu()
    yp = torch.from_numpy(np.ones(360000) * y0).double().cpu()
    zp = torch.from_numpy(np.ones(360000) * z0).double().cpu()
    yokr = torch.from_numpy(np.zeros((360000, 1000))).double().cpu()
    l_bif = torch.from_numpy(np.zeros(360000)).long().cpu()
    l_okr = torch.from_numpy(np.zeros(360000)).long().cpu()
    # xml[0] = x0
    for i in range(1, kd + 1):
        xx = xp
        yy = yp
        zz = zp
        kx1 = a * torch.log(b * torch.cosh(yy)) - a * torch.log(xx + b)
        ky1 = k * yy - zz - (xx + b) * torch.tanh(yy)
        kz1 = (2 * k + 1) * yy - 2 * zz
        x1 = xx + (dt * kx1) / 2
        y1 = yy + (dt * ky1) / 2
        z1 = zz + (dt * kz1) / 2
        kx2 = a * torch.log(b * torch.cosh(y1)) - a * torch.log(x1 + b)
        ky2 = k * y1 - z1 - (x1 + b) * torch.tanh(y1)
        kz2 = (2 * k + 1) * y1 - 2 * z1
        x2 = xx + (dt * kx2) / 2
        y2 = yy + (dt * ky2) / 2
        z2 = zz + (dt * kz2) / 2
        kx3 = a * torch.log(b * torch.cosh(y2)) - a * torch.log(x2 + b)
        ky3 = k * y2 - z2 - (x2 + b) * torch.tanh(y2)
        kz3 = (2 * k + 1) * y2 - 2 * z2
        x3 = xx + (dt * kx3)
        y3 = yy + (dt * ky3)
        z3 = zz + (dt * kz3)
        kx4 = a * torch.log(b * torch.cosh(y3)) - a * torch.log(x3 + b)
        ky4 = k * y3 - z3 - (x3 + b) * torch.tanh(y3)
        kz4 = (2 * k + 1) * y3 - 2 * z3
        xp = xx + ((kx1 + 2 * kx2 + 2 * kx3 + kx4) / 6) * dt
        yp = yy + ((ky1 + 2 * ky2 + 2 * ky3 + ky4) / 6) * dt
        zp = zz + ((kz1 + 2 * kz2 + 2 * kz3 + kz4) / 6) * dt
        # xml[k] = xp
        if i >= kdmin:
            r00 = (yy - 1.0) * (yp - 1.0)
            y00 = (xx - 1.0) - (yy - 1.0) * (xp - xx) / (yp - yy)
            condition = (r00 <= 0) & (y00 > 0)
            yokr[condition, l_bif[condition]] = y00[condition]
            l_bif[condition] += 1

    max_l_bif = max(l_bif).item()
    for i in range(0, max_l_bif):
        cond_i = l_bif == i + 1
        if yokr[cond_i, 0:i + 1].nelement() != 0:
            cond_i_idx = torch.where(cond_i)[0]
            periods, indices = yokr[cond_i, 0:i + 1].sort(descending=True)
            l_okr[cond_i] = 1
            tmp = periods[:, 0]
            for j in range(1, i + 1):
                cond_abs = torch.abs(periods[:, j] - tmp) >= 0.01
                cond_abs_idx = cond_i_idx[cond_abs]
                tmp[cond_abs] = periods[cond_abs, j]
                l_okr[cond_abs_idx] += 1
    return a, k, l_okr
Example #5
0
File: hat.py Project: ZixuanKe/CAT
    def train_epoch(self, t, x, y, thres_cosh=50, thres_emb=6):
        self.model.train()

        r = np.arange(x.size(0))
        np.random.shuffle(r)
        r = torch.LongTensor(r).cuda()

        # Loop batches
        for i in range(0, len(r), self.sbatch):
            if i + self.sbatch <= len(r): b = r[i:i + self.sbatch]
            else: b = r[i:]
            #             images=torch.autograd.Variable(x[b],volatile=False)
            #             targets=torch.autograd.Variable(y[b],volatile=False)
            #             task=torch.autograd.Variable(torch.LongTensor([t]).cuda(),volatile=False)
            images = torch.autograd.Variable(x[b])
            targets = torch.autograd.Variable(y[b])
            task = torch.autograd.Variable(torch.LongTensor([t]).cuda())
            s = (self.smax - 1 / self.smax) * i / len(r) + 1 / self.smax

            # Forward
            output, masks = self.model.forward(task, images, s=s)
            if self.split:
                output = output[t]
            loss, _ = self.criterion(output, targets, masks)

            # Backward
            self.optimizer.zero_grad()
            loss.backward()

            # Restrict layer gradients in backprop
            if t > 0:
                for n, p in self.model.named_parameters():
                    if n in self.mask_back:
                        p.grad.data *= self.mask_back[n]

            # Compensate embedding gradients
            for n, p in self.model.named_parameters():
                if n.startswith('e'):
                    num = torch.cosh(
                        torch.clamp(s * p.data, -thres_cosh, thres_cosh)) + 1
                    den = torch.cosh(p.data) + 1
                    p.grad.data *= self.smax / s * num / den

            # Apply step
            if args.optimizer == 'SGD' or args.optimizer == 'SGD_momentum_decay':
                torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                              self.clipgrad)
            self.optimizer.step()

            # Constrain embeddings
            for n, p in self.model.named_parameters():
                if n.startswith('e'):
                    p.data = torch.clamp(p.data, -thres_emb, thres_emb)

            #print(masks[-1].data.view(1,-1))
            #if i>=5*self.sbatch: sys.exit()
            #if i==0: print(masks[-2].data.view(1,-1),masks[-2].data.max(),masks[-2].data.min())
        #print(masks[-2].data.view(1,-1))

        return
Example #6
0
    def effective_energy(self, sample, frozen_nodes, tree_order, tree_hierarchy):
        h = sample.matmul(self.J[frozen_nodes, :]) + self.h
        tree_energy = torch.zeros(sample.shape[0], device=self.device, dtype=sample.dtype)
        tree = torch.from_numpy(np.array(tree_order)).to(self.device)
        for layer in tree_hierarchy:
            index_matrix = torch.zeros(len(layer), 2, dtype=torch.int64,
                                       device=self.device)
            index_matrix[:, 0] = torch.arange(len(layer))
            if len(self.J[layer][:, tree].nonzero()) != 0:
                index_matrix.index_copy_(0,
                                         self.J[layer][:, tree].nonzero()[:, 0],
                                         self.J[layer][:, tree].nonzero())
            index = index_matrix[:, 1]
            root = tree[index]

            hpj = self.J[layer, root] + h[:, layer]
            hmj = -self.J[layer, root] + h[:, layer]

            tree_energy += -torch.log(2 * (torch.cosh(self.beta * hpj) *
                                           torch.cosh(self.beta * hmj)).sqrt()).sum(dim=1) / self.beta
            for k in range(len(root)):
                h[:, root[k]] += torch.log(torch.cosh(self.beta * hpj) /
                                           torch.cosh(self.beta * hmj))[:, k] / (2 * self.beta)
            tree = tree[len(layer):]

        batch = sample.shape[0]
        assert sample.shape[1] == len(frozen_nodes)
        J = self.J[frozen_nodes][:, frozen_nodes].to_sparse()
        fvs_energy = -torch.bmm(sample.view(batch, 1, len(frozen_nodes)),
                                torch.sparse.mm(J, sample.t()).t().view(batch, len(frozen_nodes), 1)).reshape(batch) / 2
        fvs_energy -= sample @ self.h[frozen_nodes]

        energy = fvs_energy + tree_energy

        return self.beta * energy
Example #7
0
def calc_loss(y_hat,
              y_cuda,
              mag_hat,
              batch_size=20,
              scale_by_freq=None,
              l1_lambda=2e-5,
              reg_logcosh=False):
    # Reconstruction term plus regularization -> Slightly less wiggly waveform

    #loss = logcosh(y_hat, y_cuda) + 1e-5*torch.abs(mag_hat).mean()
    # loss = logcosh(y_hat, y_cuda) + 2e-5*torch.abs(mag_hat).mean()
    #print("y_hat.dtype, y_cuda.dtype, mag_hat.dtype, scale_by_freq.dtype =",y_hat.dtype, y_cuda.dtype, mag_hat.dtype, scale_by_freq.dtype)
    if not reg_logcosh:
        if scale_by_freq is None:
            loss = logcosh(y_hat, y_cuda) + l1_lambda * torch.abs(mag_hat).mean(
            )  # second term is an L1 regularization to help 'damp' high-freq noise
        else:
            loss = logcosh(
                y_hat, y_cuda
            ) + l1_lambda / 10 * torch.abs(mag_hat * scale_by_freq).mean(
            )  # second term is an L1 regularization to help 'damp' high-freq noise
    else:
        if scale_by_freq is None:
            loss = logcosh(y_hat, y_cuda) + l1_lambda * torch.mean(
                torch.log(torch.cosh(mag_hat))
            )  # second term is an L1 regularization to help 'damp' high-freq noise
        else:
            loss = logcosh(y_hat, y_cuda) + l1_lambda / 10 * torch.mean(
                scale_by_freq * torch.log(torch.cosh(mag_hat))
            )  # second term is an L1 regularization to help 'damp' high-freq noise

    return loss
 def expm(self, p, d_p, lr=None, out=None, normalize=False):
     """Exponential map for hyperboloid"""
     if out is None:
         out = p
     if d_p.is_sparse:
         ix, d_val = d_p._indices().squeeze(), d_p._values()
         p_val = self.normalize(p.index_select(0, ix))
         ldv = self.ldot(d_val, d_val, keepdim=True)
         if self.debug:
             assert all(ldv > 0), "Tangent norm must be greater 0"
             assert all(ldv == ldv), "Tangent norm includes NaNs"
         nd_p = ldv.clamp_(min=0).sqrt_()
         t = th.clamp(nd_p, max=self.norm_clip)
         nd_p.clamp_(min=self.eps)
         newp = (th.cosh(t) * p_val).addcdiv_(th.sinh(t) * d_val, nd_p)
         if normalize:
             newp = self.normalize(newp)
         p.index_copy_(0, ix, newp)
     else:
         if lr is not None:
             d_p.narrow(-1, 0, 1).mul_(-1)
             d_p.addcmul_((self.ldot(p, d_p, keepdim=True)).expand_as(p), p)
             d_p.mul_(-lr)
         ldv = self.ldot(d_p, d_p, keepdim=True)
         if self.debug:
             assert all(ldv > 0), "Tangent norm must be greater 0"
             assert all(ldv == ldv), "Tangent norm includes NaNs"
         nd_p = ldv.clamp_(min=0).sqrt_()
         t = th.clamp(nd_p, max=self.norm_clip)
         nd_p.clamp_(min=self.eps)
         newp = (th.cosh(t) * p).addcdiv_(th.sinh(t) * d_p, nd_p)
         if normalize:
             newp = self.normalize(newp)
         p.copy_(newp)
Example #9
0
def log_cosh(pred, truth, sample_weight=None):
    ey_t = truth - pred
    if sample_weight is not None:

        return torch.mean(torch.log(torch.cosh(ey_t + 1e-12)) * sample_weight)
    else:
        return torch.mean(torch.log(torch.cosh(ey_t + 1e-12)))
Example #10
0
def compute_hmds(data=None, distance_matrix=None, model='poincare', dimensions=2):
    """
    Compute h-MDS following the paper "Representation Tradeoffs for Hyperbolic Embeddings" (Algorithm 2)
    It is the closest to PCA in the hyperbolic space

    input model: hyperbolic model where data sits in, in order to compute distances
    If distance matrix is computed, we do not use data.
    The distances are the same independently of the hyperbolic model used.
    """
    assert data is not None or distance_matrix is not None, 'We have to obtain the data somehow'

    # Compute distance matrix, and then Y=cosh(d)
    if distance_matrix is None:
        x = data.unsqueeze(1).expand(data.shape[0], data.shape[0], data.shape[1]).contiguous().view(-1, data.shape[1])
        y = data.unsqueeze(0).expand(data.shape[0], data.shape[0], data.shape[1]).contiguous().view(-1, data.shape[1])
        if model == 'poincare':
            distance_matrix = gmath.dist(x=x, y=y, k=torch.tensor(-1.0))
            Y = torch.cosh(distance_matrix)
        else:  # model == 'hyperboloid'
            Y = hyperboloid_distance(x, y)
    else:
        Y = torch.cosh(distance_matrix)
    Y = Y.view(data.shape[0], data.shape[0]).detach()

    pca = PCA(n_components=dimensions, svd_solver='full')
    data_hyperboloid_reduced = pca.fit_transform(-Y.cpu().numpy())
    x0 = np.sqrt((data_hyperboloid_reduced**2).sum(axis=-1, keepdims=True)+1)
    data_hyperboloid_reduced = np.concatenate([data_hyperboloid_reduced, x0], axis=-1)

    return data_hyperboloid_reduced
Example #11
0
 def backward(self, grad_output, grad_output_mean):  #STE Part
     input, = self.saved_tensors
     grad_input = grad_output.clone()
     grad_input = (2 / torch.cosh(input)) * (2 / torch.cosh(input)) * (
         grad_input)
     #grad_input[input.ge(1)] = 0 #great or equal
     #grad_input[input.le(-1)] = 0 #less or equal
     return grad_input
Example #12
0
    def get_value(self, tau0):
        dt = self.delta
        ar, cr, a, b, c, d = self.term.get_coefficients()

        # Format the lags correctly
        tau0 = torch.abs(as_tensor(tau0))
        tau = tau0[..., None]

        # Precompute some factors
        dpt = dt + tau
        dmt = dt - tau

        # Real parts:
        # tau > Delta
        crd = cr * dt
        cosh = torch.cosh(crd)
        norm = 2 * ar / crd**2
        K_large = torch.sum(norm * (cosh - 1) * torch.exp(-cr * tau), axis=-1)

        # tau < Delta
        crdmt = cr * dmt
        K_small = K_large + torch.sum(norm * (crdmt - torch.sinh(crdmt)),
                                      axis=-1)

        # Complex part
        cd = c * dt
        dd = d * dt
        c2 = c**2
        d2 = d**2
        c2pd2 = c2 + d2
        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d
        norm = 1.0 / (dt * c2pd2)**2
        k0 = torch.exp(-c * tau)
        cdt = torch.cos(d * tau)
        sdt = torch.sin(d * tau)

        # For tau > Delta
        cos_term = 2 * (torch.cosh(cd) * torch.cos(dd) - 1)
        sin_term = 2 * (torch.sinh(cd) * torch.sin(dd))
        factor = k0 * norm
        K_large += torch.sum((C1 * cos_term - C2 * sin_term) * factor * cdt,
                             axis=-1)
        K_large += torch.sum((C2 * cos_term + C1 * sin_term) * factor * sdt,
                             axis=-1)

        # tau < Delta
        edmt = torch.exp(-c * dmt)
        edpt = torch.exp(-c * dpt)
        cos_term = (edmt * torch.cos(d * dmt) + edpt * torch.cos(d * dpt) -
                    2 * k0 * cdt)
        sin_term = (edmt * torch.sin(d * dmt) + edpt * torch.sin(d * dpt) -
                    2 * k0 * sdt)
        K_small += torch.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
        K_small += torch.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)

        mask = tau0 >= dt
        return K_large * mask + K_small * (~mask)
Example #13
0
 def forward(self, gaze_gt, hp_gt, gaze_pred, hp_pred):
     # weight_hp = torch.sum(1 - torch.cos(hp_gt - hp_pred), dim=1)
     # cos_distant = torch.sum(1 - torch.cos(gaze_gt - gaze_pred), dim=1)
     hp_losls = torch.sum(torch.log(torch.cosh(hp_gt - hp_pred)), dim=1)
     # L1_gaze = torch.sum(torch.abs(gaze_gt - gaze_pred) , dim=1)
     gaze_loss = torch.sum(torch.log(torch.cosh(gaze_gt - gaze_pred)),
                           dim=1)
     # return torch.mean(0.25* L2_hp_weight + (L2_gaze))
     return torch.mean(0.2 * hp_losls + (gaze_loss))
Example #14
0
    def step(self, model, mask_back, t, s=None, thres_cosh=None, smax=None, clipgrad=None, finetune=False,
             closure=None):
        """Performs a single optimization step.

        Constraining joint objective based gradient and weight decay gradient.
        Momentum is disregarded, as cancelled out neurons don't build up momentum anyway.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']

            for p, (modp_name, modp) in zip(group['params'], model.named_parameters()):
                assert modp is p
                if p.grad is None:
                    continue

                d_p = p.grad.data
                if weight_decay != 0:
                    if 'embs' not in modp_name:  # Don't decay embedding params, doesn't train
                        d_p.add_(weight_decay, p.data)

                # Constrain grad
                if t > 0:  # Restrict layer gradients in backprop: a^{<t}
                    if modp_name in mask_back:
                        p.grad.data *= mask_back[modp_name]  # See before: stored as (1 - x) with prev task masks

                # Compensate embedding gradients
                if not finetune:
                    if 'embs' in modp_name:
                        num = torch.cosh(torch.clamp(s * p.data, -thres_cosh, thres_cosh)) + 1
                        den = torch.cosh(p.data) + 1
                        p.grad.data *= smax / s * num / den

                    # Clip
                    torch.nn.utils.clip_grad_norm(p, clipgrad)

                # Leave momentum as is
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(1 - dampening, d_p)
                    if nesterov:
                        d_p = d_p.add(momentum, buf)
                    else:
                        d_p = buf
                p.data.add_(-group['lr'], d_p)

        return loss
    def train_epoch(self, t, data, iter_bar, which_type):
        self.model.train()
        # Loop batches
        for step, batch in enumerate(iter_bar):
            batch = [
                bat.to(self.device) if bat is not None else None
                for bat in batch
            ]
            input_ids, segment_ids, input_mask, targets, _ = batch
            s = (self.smax - 1 / self.smax) * step / len(data) + 1 / self.smax
            task = torch.autograd.Variable(torch.LongTensor([t]).cuda(),
                                           volatile=True)

            # Forward
            outputs = self.model.forward(task, input_ids, segment_ids,
                                         input_mask, which_type, s)
            output = outputs[t]
            loss = self.criterion(output, targets)
            iter_bar.set_description('Train Iter (loss=%5.3f)' % loss.item())

            # Backward
            self.optimizer.zero_grad()
            loss.backward()

            if t > 0 and which_type == 'mcl':
                task = torch.autograd.Variable(torch.LongTensor([t]).cuda(),
                                               volatile=False)
                mask = self.model.ac.mask(task, s=self.smax)
                mask = torch.autograd.Variable(mask.data.clone(),
                                               requires_grad=False)
                for n, p in self.model.named_parameters():
                    if n in rnn_weights:
                        # print('n: ',n)
                        # print('p: ',p.grad.size())
                        p.grad.data *= self.model.get_view_for(n, mask)

            # Compensate embedding gradients
            for n, p in self.model.ac.named_parameters():
                if 'ac.e' in n:
                    num = torch.cosh(
                        torch.clamp(s * p.data, -self.thres_cosh,
                                    self.thres_cosh)) + 1
                    den = torch.cosh(p.data) + 1
                    p.grad.data *= self.smax / s * num / den

            torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                          self.clipgrad)
            self.optimizer.step()

            # Constrain embeddings
            for n, p in self.model.ac.named_parameters():
                if 'ac.e' in n:
                    p.data = torch.clamp(p.data, -self.thres_emb,
                                         self.thres_emb)

        return
Example #16
0
    def backward(ctx, grad_output):
        grad_input = None
        shape = ctx.shape
        lambd = ctx.fitter.lambd
        max_slope = ctx.fitter.max_slope
        monotonic = ctx.fitter.monotonic
        eps = ctx.fitter.eps
        power = ctx.fitter.power
        order = ctx.fitter.order
        dm = ctx.fitter.dm
        m = ctx.fitter.m
        a0 = ctx.fitter.a0.view(shape)
        if ctx.needs_input_grad[0]:
            dF = ctx.residual[torch.eye(
                shape[0], dtype=bool).repeat_interleave(shape[1],
                                                        axis=0)].view(shape)
            dF0 = ctx.residual.sum(axis=-1).view(shape) * -.5 * dm.view(-1, 1)
            summation = dF + dF0
            if order > 0:
                a1 = ctx.fitter.a1.view(shape)
                if max_slope is None:
                    dF1 = (ctx.residual * m).sum(
                        axis=-1).view(shape) * -1.5 * (dm * m).view(-1, 1)
                    summation += dF1
                else:
                    a0 = max_slope * a0
                    dF1   =  (ctx.residual*m).sum(axis=-1).view(shape) *\
                                (-1.5* (dm*m).view(-1,1) *(1/torch.cosh(a1/(a0+eps)))**2+\
                                -a1/(a0+eps)*(1/torch.cosh(a1/(a0+eps)))**2*-.5* dm.view(-1,1)*max_slope+\
                                torch.tanh(a1/(a0+eps))*-.5* dm.view(-1,1)*max_slope)

                    summation += dF1
            if order > 1:
                a2 = ctx.fitter.a2.view(shape)
                if not monotonic:
                    dF2   = (ctx.residual*.5*(3*m**2-1)).sum(axis=-1).view(shape) *\
                            -2.5*(dm*0.5*(3*m**2-1)).view(-1,1) #dc_2/ds is this term
                    summation += dF2
                else:
                    a1 = a1 / 3.
                    dF2   = (ctx.residual*.5*(3*m**2-1)).sum(axis=-1).view(shape) *\
                            (1/torch.cosh(a2/(a1+eps))**2*(-2.5*dm*0.5*(3*m**2-1)).view(-1,1)+\
                            (1/torch.cosh(a2/(a1+eps))**2* -a2/(a1+eps)*-1.5*(dm*m).view(-1,1)/3. +\
                            -1.5*(dm*m).view(-1,1)/3.*(torch.tanh(a2/(a1+eps)))))
                    summation += dF2

            summation *= (-power) / np.prod(shape)
            if lambd is not None:
                summation += -lambd*2/np.prod(shape) *\
                3/2* ctx.fitter.a1.view(shape)*(m*dm).view(-1,1)

            grad_input = grad_output * summation * ctx.weights

        return grad_input, None, None, None, None
Example #17
0
 def expm(self, p, d_p, lr=None, out=None, normalize=False):
     """Exponential map for hyperboloid"""
     if out is None:
         out = p
     # print("LORENTZIAN EXPONENTIAL MAP")
     if d_p.is_sparse:
         # t0 = time.time()
         ix, d_val = d_p._indices().squeeze(), d_p._values()
         # This pulls `ix` out of the original embedding table, which could
         # be in a corrupted state.  normalize it to fix it back to the
         # surface of the hyperboloid...
         # TODO: we should only do the normalize if we know that we are
         # training with multiple threads, otherwise this is a bit wasteful
         p_val = self.normalize(p.index_select(0, ix))
         ldv = self.ldot(d_val, d_val, keepdim=True)
         if self.debug:
             assert all(ldv > 0), "Tangent norm must be greater 0"
             assert all(ldv == ldv), "Tangent norm includes NaNs"
         nd_p = ldv.clamp_(min=0).sqrt_()
         t = th.clamp(nd_p, max=self.norm_clip)
         nd_p.clamp_(min=self.eps)
         newp = (th.cosh(t) * p_val).addcdiv_(th.sinh(t) * d_val, nd_p)
         # print("is p_val sparse: {}".format(p_val.is_sparse))
         # print("is nd_p sparse: {}".format(nd_p.is_sparse))
         # print("is p sparse: {}".format(p.is_sparse))
         # print("is newp sparse: {}".format(newp.is_sparse))
         print(f"p = {p}")
         print(f"d_p = {d_p}")
         # print(newp)
         if normalize:
             newp = self.normalize(newp)
             print(newp.shape)
         p.index_copy_(0, ix, newp)
         # t1 = time.time()
         # print("iteration time = {}".format(t1-t0))
     else:
         if lr is not None:
             d_p.narrow(-1, 0, 1).mul_(-1)
             d_p.addcmul_((self.ldot(p, d_p, keepdim=True)).expand_as(p), p)
             d_p.mul_(-lr)
         ldv = self.ldot(d_p, d_p, keepdim=True)
         if self.debug:
             assert all(ldv > 0), "Tangent norm must be greater 0"
             assert all(ldv == ldv), "Tangent norm includes NaNs"
         nd_p = ldv.clamp_(min=0).sqrt_()
         t = th.clamp(nd_p, max=self.norm_clip)
         nd_p.clamp_(min=self.eps)
         newp = (th.cosh(t) * p).addcdiv_(th.sinh(t) * d_p, nd_p)
         if normalize:
             newp = self.normalize(newp)
         p.copy_(newp)
Example #18
0
    def train_epoch(self, t, x, y, thres_cosh=50, thres_emb=6):
        self.model.train()

        r = np.arange(x.size(0))
        np.random.shuffle(r)
        r = torch.LongTensor(r).cuda()

        # Loop batches
        for i in range(0, len(r), self.sbatch):
            if i + self.sbatch <= len(r): b = r[i:i + self.sbatch]
            else: b = r[i:]
            images = torch.autograd.Variable(x[b])
            targets = torch.autograd.Variable(y[b])
            task = torch.autograd.Variable(torch.LongTensor([t]).cuda())
            s = (self.smax - 1 / self.smax) * i / len(r) + 1 / self.smax

            # Forward
            output, masks = self.model.forward(task, images, s=s)
            output = output[t]
            loss, _ = self.criterion(output, targets, masks)

            # Backward
            self.optimizer.zero_grad()
            loss.backward()

            # Restrict layer gradients in backprop
            if t > 0:
                for n, p in self.model.named_parameters():
                    if n in self.mask_back:
                        p.grad.data *= self.mask_back[n]

            # Compensate embedding gradients
            for n, p in self.model.named_parameters():
                if n.startswith('e'):
                    num = torch.cosh(
                        torch.clamp(s * p.data, -thres_cosh, thres_cosh)) + 1
                    den = torch.cosh(p.data) + 1
                    p.grad.data *= self.smax / s * num / den

            # Apply step

            self.optimizer.step()

            # Constrain embeddings
            for n, p in self.model.named_parameters():
                if n.startswith('e'):
                    p.data = torch.clamp(p.data, -thres_emb, thres_emb)

        return
Example #19
0
 def exp(self, X, G):
     # check for multi dimenstions
     G_lnorm = self.norm(X, G)
     if self._k == 1:
         ex = torch.cosh(G_lnorm) * X + torch.sinh(G_lnorm) * (G / G_lnorm)
         if G_lnorm == 0:
             ex = X
         return ex
     else:
         G_lnorm = G_lnorm.view(-1, 1)
         ex = torch.cosh(G_lnorm) * X + torch.sinh(G_lnorm) * (G / G_lnorm)
         exclude = G_lnorm == 0
         exclude = exclude.view(-1)
         ex[exclude, :] = X[exclude, :]
         return ex
Example #20
0
def log_cosh_loss(y_pred, y_true):
    loss = torch.cosh(y_pred - y_true)
    loss = torch.log(loss)
    loss = loss.mean(dim=0)
    loss = torch.log(loss)
    loss = torch.sum(loss)
    return loss
 def grad_log_prob(self, r):
     c = __to_tensor__(self.c)
     dim = self.dim
     res = - r / self.sigma.pow(2) + (dim - 1) * c.sqrt() * \
         torch.cosh(c.sqrt() * r) / torch.sinh(c.sqrt() * r)
     res[r < 0] = 0.0
     return res
Example #22
0
    def backward(ctx, grad_output):

        input, weight, bias, output = ctx.saved_variables
        grad_input = grad_weight = grad_bias = None
        grad_stride = grad_padding = grad_dilation = grad_groups = grad_nonlinearity_g = None

        # an easy way to get the gradient wrt the non-gaussianity is to calculate the
        # nongaussianity, then just call backwards on this.
        # From https://github.com/pytorch/pytorch/issues/1776
        # (but perhaps isn't maximally efficient; one could calculate the derivative manually)

        nongaussianity = torch.mean(torch.log(torch.cosh(output)))

        if ctx.needs_input_grad[0]:
            grad_input = torch.autograd.grad(nongaussianity, input,
                                             grad_output)
        if ctx.needs_input_grad[1]:
            if ctx.super_or_sub == "super":
                grad_weight = ctx.ica_strength * torch.autograd.grad(
                    nongaussianity, weight, grad_output)
            elif ctx.super_or_sub == "sub":
                grad_weight = -ctx.ica_strength * torch.autograd.grad(
                    nongaussianity, weight, grad_output)

        # no change in bias gradient

        return grad_input, grad_weight, grad_bias, grad_stride, grad_padding, grad_dilation, grad_groups, grad_nonlinearity_g
Example #23
0
def pseudo_hyperbolic_gaussian(z, mu_h, cov, version, vt=None, u=None):

    batch_size, n_h = mu_h.shape
    n = n_h - 1
    mu0 = to_cuda_var(torch.zeros(batch_size, n))
    v0 = torch.cat((to_cuda_var(torch.ones(batch_size, 1)), mu0),
                   1)  # origin of the hyperbolic space

    # try not using inverse exp. mapping if vt is already known
    if vt is None and u is None:
        u = inv_exp_map(z, mu_h)
        v = parallel_transport(u, mu_h, v0)
        vt = v[:, 1:]
        logp_vt = (MultivariateNormal(mu0, cov).log_prob(vt)).view(-1, 1)
    else:
        logp_vt = (MultivariateNormal(mu0, cov).log_prob(vt)).view(-1, 1)

    r = lorentz_tangent_norm(u)

    if version == 1:
        alpha = -lorentz_product(v0, mu_h)
        log_det_proj_mu = n * (torch.log(torch.sinh(r)) -
                               torch.log(r)) + torch.log(
                                   torch.cosh(r)) + torch.log(alpha)

    elif version == 2:
        log_det_proj_mu = (n - 1) * (torch.log(torch.sinh(r)) - torch.log(r))

    logp_z = logp_vt - log_det_proj_mu

    return logp_vt, logp_z
    def exp(self, lr):
        """ Exponential map """
        x = self.data.detach()
        # print("norm", HyperboloidParameter.norm_h(x))
        v = -lr * self.grad

        retract = False
        if retract:
        # retraction
            # print("retract")
            self.data = x + v

        else:
            # print("tangent", HyperboloidParameter.dot_h(x, v))
            assert torch.all(~torch.isnan(v))
            n = self.__class__.norm_h(v).unsqueeze(-1)
            assert torch.all(~torch.isnan(n))
            n.clamp_(max=1.0)
            # e = torch.cosh(n)*x + torch.sinh(n)*v/n
            mask = torch.abs(n)<1e-7
            cosh = torch.cosh(n)
            cosh[mask] = 1.0
            sinh = torch.sinh(n)
            sinh[mask] = 0.0
            n[mask] = 1.0
            e = cosh*x + sinh/n*v
            # assert torch.all(-HyperboloidParameter.dot_h(e,e) >= 0), torch.min(-HyperboloidParameter.dot_h(e,e))
            self.data = e
        self.proj()
Example #25
0
 def forward(self, y_t, y_prime_t):
     ey_t = y_t - y_prime_t
     value = torch.log(torch.cosh(ey_t + 1e-12))
     if self.reduction == 'mean':
         return torch.mean(value)
     elif self.reduction == 'sum':
         return torch.sum(value)
def train(args, epoch, net, trainLoader, optimizer):
    net.train()

    for batch_idx, (data, _) in enumerate(trainLoader):

        if args.cuda:
            data = data.cuda()
        data = Variable(data)
        data = torch.mean(data, 1).view(-1,32**2)

        optimizer.zero_grad()
        output = torch.squeeze(net(data))
        # now reconstruct the input
        data_r = output.mm(net.linear.weight)

        # the loss
        mse_loss = F.mse_loss(data,data_r)
       
        
        nongaussianity = args.ica * torch.mean(torch.log(torch.cosh(output)))
      
        loss = mse_loss + nongaussianity
        loss.backward()
        
        optimizer.step()

        print('Train Epoch {}: Loss: {:.6f},\t Nongaussianity: {:.6f}\t'.format(epoch,
            mse_loss.item(), nongaussianity.item()))
Example #27
0
    def forward(self, x, y, predict=False):
        """Computes logcosh-loss with weights
        
        Arguments:
            x {torch.Tensor} -- Predictions of shape (B, F), where F is number of targets
            y {tuple} -- (targets, weights), where targets is of shape (B, F) and weights of shape (F)
        
        Returns:
            [torch.Tensor] -- Averaged, weighted loss over batch.
        """

        # Unpack into targets and weights
        targets, weights = y

        # Calculate actual loss
        logcosh = torch.log(torch.cosh(x - targets))

        # weight to control contributions

        # Now weigh it
        loss_weighted = torch.sum(self._weights * logcosh, dim=-1)

        # Mean over the batch
        if not predict:
            loss = torch.mean(loss_weighted)
        else:
            loss = loss_weighted

        return loss
Example #28
0
    def get_celerite_matrices(self, x, diag):
        dt = self.delta
        ar, cr, a, b, c, d = self.term.get_coefficients()

        # Real part
        cd = cr * dt
        delta_diag = 2 * torch.sum(ar * (cd - torch.sinh(cd)) / cd**2)

        # Complex part
        cd = c * dt
        dd = d * dt
        c2 = c**2
        d2 = d**2
        c2pd2 = c2 + d2
        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d
        norm = (dt * c2pd2)**2
        sinh = torch.sinh(cd)
        cosh = torch.cosh(cd)
        delta_diag += 2 * torch.sum(
            (C2 * cosh * torch.sin(dd) - C1 * sinh * torch.cos(dd) +
             (a * c + b * d) * dt * c2pd2) / norm)

        new_diag = as_tensor(diag) + delta_diag
        return super().get_celerite_matrices(x, new_diag)
    def chart(self, inp):
        """
        Map inp onto the hyperboloid using the global chart
        """
        k = inp.shape[-1]

        # inp_norm = torch.norm(inp, p=2, dim=-1, keepdim=True)
        # d = F.normalize(inp, p=2, dim=-1)

        cv = torch.cosh(inp.narrow(-1,1,1)).squeeze()
        cu = torch.cosh(inp.narrow(-1,0,1)).squeeze()
        sv = torch.sinh(inp.narrow(-1,1,1)).squeeze()
        su = torch.sinh(inp.narrow(-1,0,1)).squeeze()
        # h_ = inp
        
        return torch.stack((cu*cv, cv*su, sv), dim=-1)
Example #30
0
 def __init__(self):
     super().__init__()
     self.activation_func_derivs = [
         lambda ds, x: torch.tanh(x), lambda ds, x: 1 / torch.cosh(x)**2,
         lambda ds, x: -2 * ds[0] * ds[1],
         lambda ds, x: ds[2]**2 / ds[1] - 2 * ds[1]**2
     ]  # ordered list of activation function and its derivatives