Beispiel #1
0
def pairwise_distance(x1, x2, p=2, eps=1e-6):
    r"""
    Computes the batchwise pairwise distance between vectors v1,v2:

    .. math ::
        \Vert x \Vert _p := \left( \sum_{i=1}^n  \vert x_i \vert ^ p \right) ^ {1/p}

    Args:
        x1: first input tensor
        x2: second input tensor
        p: the norm degree. Default: 2
        eps (float, optional): Small value to avoid division by zero. Default: 1e-6

    Shape:
        - Input: :math:`(N, D)` where `D = vector dimension`
        - Output: :math:`(N, 1)`

    Example::

        >>> input1 = autograd.Variable(torch.randn(100, 128))
        >>> input2 = autograd.Variable(torch.randn(100, 128))
        >>> output = F.pairwise_distance(input1, input2, p=2)
        >>> output.backward()
    """
    assert x1.size() == x2.size(), "Input sizes must be equal."
    assert x1.dim() == 2, "Input must be a 2D matrix."
    diff = torch.abs(x1 - x2)
    out = torch.pow(diff + eps, p).sum(dim=1, keepdim=True)
    return torch.pow(out, 1. / p)
    def forward(self, model_output, target, mask, attr):

        pred_seq, pred_attr = model_output

        # input (from model.forward())      (batch_size, max_seq_len, vocab_size)
        # target (from dataloader->labels)  (batch_size, max_seq_len)
        # mask (from dataloader->masks)     (batch_size, max_seq_len)

        if not self.seen:
            print('> in LanguageModelCriterion.forward(input, target, mask):')
            print('    pred_seq', pred_seq.shape)  # (200, 17, 3562)
            print('    pred_attr', pred_attr.shape)  # (200, 1000)
            print('    target', target.shape)  # (200, 17)
            print('    mask', mask.shape)  # (200, 17)
            print('    attr', attr.shape)  # (200, 1000)
            self.seen = True

        # truncate to the same size
        target = target[:, :pred_seq.size(1)]
        mask =  mask[:, :pred_seq.size(1)]
        pred_seq = to_contiguous(pred_seq).view(-1, pred_seq.size(2))
        target = to_contiguous(target).view(-1, 1)
        mask = to_contiguous(mask).view(-1, 1)
        output = - pred_seq.gather(1, target) * mask
        output = torch.sum(output) / torch.sum(mask)

        bsize = pred_attr.size(0)
        pred_attr = to_contiguous(pred_attr)
        attr = to_contiguous(attr.float())
        attr_loss = torch.pow(torch.sum(torch.pow((pred_attr - attr), 2)), 0.5) / bsize

        output = output + self.attr_weight * attr_loss

        return output
 def model():
     mu_latent = pyro.sample("mu_latent", dist.normal,
                             self.mu0, torch.pow(self.tau0, -0.5))
     sigma = torch.pow(self.tau, -0.5)
     pyro.observe("obs0", dist.lognormal, self.data[0], mu_latent, sigma)
     pyro.observe("obs1", dist.lognormal, self.data[1], mu_latent, sigma)
     return mu_latent
Beispiel #4
0
    def forward(self, x, labels):
        """
        Args:
        - x: feature matrix with shape (batch_size, feat_dim).
        - labels: ground truth labels with shape (num_classes).
        """
        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long()
        if self.use_gpu: classes = classes.cuda()
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))

        dist = []
        for i in range(batch_size):
            value = distmat[i][mask[i]]
            value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
            dist.append(value)
        dist = torch.cat(dist)
        loss = dist.mean()

        return loss
 def model():
     mu_latent = pyro.sample("mu_latent", dist.normal,
                             self.mu0, torch.pow(self.tau0, -0.5))
     bijector = AffineExp(torch.pow(self.tau, -0.5), mu_latent)
     x_dist = TransformedDistribution(dist.normal, bijector)
     pyro.observe("obs0", x_dist, self.data[0], ng_zeros(1), ng_ones(1))
     pyro.observe("obs1", x_dist, self.data[1], ng_zeros(1), ng_ones(1))
     return mu_latent
 def model():
     mu_latent = pyro.sample(
             "mu_latent",
             dist.Normal(self.mu0, torch.pow(self.lam0, -0.5), reparameterized=reparameterized))
     for i, x in enumerate(self.data):
         pyro.observe("obs_%d" % i, dist.normal, x, mu_latent,
                      torch.pow(self.lam, -0.5))
     return mu_latent
    def updateOutput(self, input):
        assert input.dim() == 4

        if self.scale is None:
            self.scale = input.new()
        if input.type() == 'torch.cuda.FloatTensor':
            self._backend.SpatialCrossMapLRN_updateOutput(
                self._backend.library_state,
                input,
                self.output,
                self.scale,
                self.size,
                self.alpha,
                self.beta,
                self.k
            )
        else:
            batchSize = input.size(0)
            channels = input.size(1)
            inputHeight = input.size(2)
            inputWidth = input.size(3)

            self.output.resize_as_(input)
            self.scale.resize_as_(input)

            # use output storage as temporary buffer
            inputSquare = self.output
            torch.pow(input, 2, out=inputSquare)

            prePad = int((self.size - 1) / 2 + 1)
            prePadCrop = channels if prePad > channels else prePad

            scaleFirst = self.scale.select(1, 0)
            scaleFirst.zero_()
            # compute first feature map normalization
            for c in range(prePadCrop):
                scaleFirst.add_(inputSquare.select(1, c))

            # reuse computations for next feature maps normalization
            # by adding the next feature map and removing the previous
            for c in range(1, channels):
                scalePrevious = self.scale.select(1, c - 1)
                scaleCurrent = self.scale.select(1, c)
                scaleCurrent.copy_(scalePrevious)
                if c < channels - prePad + 1:
                    squareNext = inputSquare.select(1, c + prePad - 1)
                    scaleCurrent.add_(1, squareNext)

                if c > prePad:
                    squarePrevious = inputSquare.select(1, c - prePad)
                    scaleCurrent.add_(-1, squarePrevious)

            self.scale.mul_(self.alpha / self.size).add_(self.k)

            torch.pow(self.scale, -self.beta, out=self.output)
            self.output.mul_(input)

        return self.output
Beispiel #8
0
 def model():
     mu_latent = pyro.sample("mu_latent", dist.normal,
                             self.mu0, torch.pow(self.lam0, -0.5))
     pyro.map_data("aaa", self.data, lambda i,
                   x: pyro.observe(
                       "obs_%d" % i, dist.normal,
                       x, mu_latent, torch.pow(self.lam, -0.5)),
                   batch_size=self.batch_size)
     return mu_latent
    def model(self, reparameterized, difficulty=0.0):
        next_mean = self.loc0
        for k in range(1, self.N + 1):
            latent_dist = dist.Normal(next_mean, torch.pow(self.lambdas[k - 1], -0.5))
            loc_latent = pyro.sample("loc_latent_%d" % k, latent_dist)
            next_mean = loc_latent

        loc_N = next_mean
        with pyro.iarange("data", self.data.size(0)):
            pyro.sample("obs", dist.Normal(loc_N.expand_as(self.data),
                                           torch.pow(self.lambdas[self.N], -0.5).expand_as(self.data)), obs=self.data)
        return loc_N
        def model(*args, **kwargs):
            next_mean = self.mu0
            for k in range(1, self.N + 1):
                latent_dist = dist.Normal(next_mean, torch.pow(self.lambdas[k - 1], -0.5))
                mu_latent = pyro.sample("mu_latent_%d" % k, latent_dist)
                next_mean = mu_latent

            mu_N = next_mean
            for i, x in enumerate(self.data):
                pyro.observe("obs_%d" % i, dist.normal, x, mu_N,
                             torch.pow(self.lambdas[self.N], -0.5))
            return mu_N
def log_norm(x, mu, std):
    """Compute the log pdf of x,
    under a normal distribution with mean mu and standard deviation std."""
    
#    print ("X device: ", x.device)
#    print ("mu device: ", mu.device)
#    print ("std device: ", std.device)
    x = x.view(-1)
    mu = mu.view(-1)
    std = std.view(-1)
    return -0.5 * torch.log(2*np.pi*torch.pow(std,2))  \
            - 0.5 * (1/torch.pow(std,2))* torch.pow( (x-mu),2) 
Beispiel #12
0
def mean_dist(source_points,warped_points,L_pck):
    # compute precentage of correct keypoints
    batch_size=source_points.size(0)
    dist=torch.zeros((batch_size))
    for i in range(batch_size):
        p_src = source_points[i,:]
        p_wrp = warped_points[i,:]
        N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1))
        point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5)
        L_pck_mat = L_pck[i].expand_as(point_distance)
        dist[i]=torch.mean(torch.div(point_distance,L_pck_mat))
    return dist
Beispiel #13
0
def pck(source_points,warped_points,L_pck,alpha=0.1):
    # compute precentage of correct keypoints
    batch_size=source_points.size(0)
    pck=torch.zeros((batch_size))
    for i in range(batch_size):
        p_src = source_points[i,:]
        p_wrp = warped_points[i,:]
        N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1))
        point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5)
        L_pck_mat = L_pck[i].expand_as(point_distance)
        correct_points = torch.le(point_distance,L_pck_mat*alpha)
        pck[i]=torch.mean(correct_points.float())
    return pck
def euclidean_dist(x, y):
  """
  Args:
    x: pytorch Variable, with shape [m, d]
    y: pytorch Variable, with shape [n, d]
  Returns:
    dist: pytorch Variable, with shape [m, n]
  """
  m, n = x.size(0), y.size(0)
  xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
  yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
  dist = xx + yy
  dist.addmm_(1, -2, x, y.t())
  dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
  return dist
Beispiel #15
0
    def test_save_and_load(self):
        lin = pyro.module("mymodule", self.linear_module)
        pyro.module("mymodule2", self.linear_module2)
        x = torch.randn(1, 3)
        myparam = pyro.param("myparam", torch.tensor(1.234 * torch.ones(1), requires_grad=True))

        cost = torch.sum(torch.pow(lin(x), 2.0)) * torch.pow(myparam, 4.0)
        cost.backward()
        params = list(self.linear_module.parameters()) + [myparam]
        optim = torch.optim.Adam(params, lr=.01)
        myparam_copy_stale = copy(pyro.param("myparam").detach().cpu().numpy())

        optim.step()

        myparam_copy = copy(pyro.param("myparam").detach().cpu().numpy())
        param_store_params = copy(pyro.get_param_store()._params)
        param_store_param_to_name = copy(pyro.get_param_store()._param_to_name)
        assert len(list(param_store_params.keys())) == 5
        assert len(list(param_store_param_to_name.values())) == 5

        pyro.get_param_store().save('paramstore.unittest.out')
        pyro.clear_param_store()
        assert len(list(pyro.get_param_store()._params)) == 0
        assert len(list(pyro.get_param_store()._param_to_name)) == 0
        pyro.get_param_store().load('paramstore.unittest.out')

        def modules_are_equal():
            weights_equal = np.sum(np.fabs(self.linear_module3.weight.detach().cpu().numpy() -
                                   self.linear_module.weight.detach().cpu().numpy())) == 0.0
            bias_equal = np.sum(np.fabs(self.linear_module3.bias.detach().cpu().numpy() -
                                self.linear_module.bias.detach().cpu().numpy())) == 0.0
            return (weights_equal and bias_equal)

        assert not modules_are_equal()
        pyro.module("mymodule", self.linear_module3, update_module_params=False)
        assert id(self.linear_module3.weight) != id(pyro.param('mymodule$$$weight'))
        assert not modules_are_equal()
        pyro.module("mymodule", self.linear_module3, update_module_params=True)
        assert id(self.linear_module3.weight) == id(pyro.param('mymodule$$$weight'))
        assert modules_are_equal()

        myparam = pyro.param("myparam")
        store = pyro.get_param_store()
        assert myparam_copy_stale != myparam.detach().cpu().numpy()
        assert myparam_copy == myparam.detach().cpu().numpy()
        assert sorted(param_store_params.keys()) == sorted(store._params.keys())
        assert sorted(param_store_param_to_name.values()) == sorted(store._param_to_name.values())
        assert sorted(store._params.keys()) == sorted(store._param_to_name.values())
Beispiel #16
0
    def backward(self, grad_output):
        input, output = self.saved_tensors
        grad_input = grad_output.new()

        if self._backend is not None:
            self._backend.SpatialCrossMapLRN_updateGradInput(
                self._backend.library_state,
                input,
                grad_output,
                grad_input,
                self.scale,
                output,
                self.size,
                self.alpha,
                self.beta,
                self.k
            )
        else:
            batch_size = input.size(0)
            channels = input.size(1)
            input_height = input.size(2)
            input_width = input.size(3)

            paddded_ratio = input.new(channels + self.size - 1, input_height,
                                      input_width)
            accum_ratio = input.new(input_height, input_width)

            cache_ratio_value = 2 * self.alpha * self.beta / self.size
            inversePrePad = int(self.size - (self.size - 1) / 2)

            grad_input.resize_as_(input)
            torch.pow(self.scale, -self.beta, out=grad_input).mul_(grad_output)

            paddded_ratio.zero_()
            padded_ratio_center = paddded_ratio.narrow(0, inversePrePad,
                                                       channels)
            for n in range(batch_size):
                torch.mul(grad_output[n], output[n], out=padded_ratio_center)
                padded_ratio_center.div_(self.scale[n])
                torch.sum(
                    paddded_ratio.narrow(0, 0, self.size - 1), 0, keepdim=False, out=accum_ratio)
                for c in range(channels):
                    accum_ratio.add_(paddded_ratio[c + self.size - 1])
                    grad_input[n][c].addcmul_(-cache_ratio_value, input[n][c],
                                              accum_ratio)
                    accum_ratio.add_(-1, paddded_ratio[c])

        return grad_input
    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) 
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output
Beispiel #18
0
 def log_prob_accept(self, value):
     v = value / self._d
     y = torch.pow(v, 1.0 / 3.0)
     x = (y - 1.0) / self._c
     log_prob_accept = 0.5 * x * x + self._d * (1.0 - v + torch.log(v))
     log_prob_accept[y <= 0] = -float('inf')
     return log_prob_accept
Beispiel #19
0
 def print_gradients(self, X, Y):
     """
     Print the gradients between the output and X
     """
     print ("--------- GRADIENTS ------------")
     predictions = self.forward(X)
     
     ## Define the loss: 
     loss = torch.sum(torch.pow(predictions - Y, 2))
     
     ## Clean previous gradients 
     self.zero_grad()
     loss.backward()
     
     print (self.linear1.weight.grad)
     print (self.linear1.bias.grad)
     
     print (self.W2.grad)
     print (self.b2.grad)
     
     print ("----------- STRUCTURE ------------")
     ## Clean previous gradients 
     print(loss.grad_fn)                       # MSELoss
     print(loss.grad_fn.next_functions[0][0])  # Linear 1
     print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # Sigmoid
 
 
     self.zero_grad()
Beispiel #20
0
def singleTagLoss(pred_tag, keypoints):
    """
    associative embedding loss for one image
    """
    eps = 1e-6
    tags = []
    pull = 0
    for i in keypoints:
        tmp = []
        for j in i:
            if j[1]>0:
                tmp.append(pred_tag[j[0]])
        if len(tmp) == 0:
            continue
        tmp = torch.stack(tmp)
        tags.append(torch.mean(tmp, dim=0))
        pull = pull +  torch.mean((tmp - tags[-1].expand_as(tmp))**2)

    if len(tags) == 0:
        return make_input(torch.zeros([1]).float()), make_input(torch.zeros([1]).float())

    tags = torch.stack(tags)[:,0]

    num = tags.size()[0]
    size = (num, num, tags.size()[1])
    A = tags.unsqueeze(dim=1).expand(*size)
    B = A.permute(1, 0, 2)

    diff = A - B
    diff = torch.pow(diff, 2).sum(dim=2)[:,:,0]
    push = torch.exp(-diff)
    push = (torch.sum(push) - num)
    return push/((num - 1) * num + eps) * 0.5, pull/(num + eps)
Beispiel #21
0
    def test_regularization(self):
        penalty = self.model.get_regularization_penalty().data
        assert (penalty > 0).all()

        penalty2 = 0

        # Config specifies penalty as
        #   "regularizer": [
        #     ["weight$", {"type": "l2", "alpha": 10}],
        #     ["bias$", {"type": "l1", "alpha": 5}]
        #   ]
        for name, parameter in self.model.named_parameters():
            if name.endswith("weight"):
                weight_penalty = 10 * torch.sum(torch.pow(parameter, 2))
                penalty2 += weight_penalty
            elif name.endswith("bias"):
                bias_penalty = 5 * torch.sum(torch.abs(parameter))
                penalty2 += bias_penalty

        assert (penalty == penalty2.data).all()

        # You get a RuntimeError if you call `model.forward` twice on the same inputs.
        # The data and config are such that the whole dataset is one batch.
        training_batch = next(self.iterator(self.instances, num_epochs=1))
        validation_batch = next(self.iterator(self.instances, num_epochs=1))

        training_loss = self.trainer._batch_loss(training_batch, for_training=True).data
        validation_loss = self.trainer._batch_loss(validation_batch, for_training=False).data

        # Training loss should have the regularization penalty, but validation loss should not.
        assert (training_loss != validation_loss).all()

        # Training loss should equal the validation loss plus the penalty.
        penalized = validation_loss + penalty
        assert (training_loss == penalized).all()
Beispiel #22
0
 def forward(self, inputs, targets):
     """
     Args:
     - inputs: feature matrix with shape (batch_size, feat_dim)
     - targets: ground truth labels with shape (num_classes)
     """
     n = inputs.size(0)
     
     # Compute pairwise distance, replace by the official when merged
     dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
     dist = dist + dist.t()
     dist.addmm_(1, -2, inputs, inputs.t())
     dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
     
     # For each anchor, find the hardest positive and negative
     mask = targets.expand(n, n).eq(targets.expand(n, n).t())
     dist_ap, dist_an = [], []
     for i in range(n):
         dist_ap.append(dist[i][mask[i]].max().unsqueeze(0))
         dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0))
     dist_ap = torch.cat(dist_ap)
     dist_an = torch.cat(dist_an)
     
     # Compute ranking hinge loss
     y = torch.ones_like(dist_an)
     loss = self.ranking_loss(dist_an, dist_ap, y)
     return loss
Beispiel #23
0
def kurtosis_score(x, dim=0):
    '''Test whether a dataset has normal kurtosis.

    This function tests the null hypothesis that the kurtosis
    of the population from which the sample was drawn is that
    of the normal distribution: ``kurtosis = 3(n-1)/(n+1)``.
    ripoff from: `scipy.stats.kurtosistest`.

    Args:
        a: Array of the sample data
        axis: Axis along which to compute test. Default is 0. If None,
           compute over the whole array `a`.
    Returns:
        statistic: The computed z-score for this test.
        p-value: A 2-sided chi squared probability for the hypothesis test.
    '''
    x, n, dim = _x_n_dim(x, dim)
    if n < 20:
        raise ValueError(
            "Number of elements has to be >= 20 to compute kurtosis")
    b2 = (x**4).mean(dim) / (x**2).mean(dim)**2
    E = 3.0 * (n - 1) / (n + 1)
    varb2 = 24.0 * n * (n - 2) * (n - 3) / ((n + 1)**2 * (n + 3) * (n + 5))
    x = (b2 - E) / math.sqrt(varb2)
    sqrtbeta1 = 6.0 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) *\
        math.sqrt((6.0 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3)))
    A = 6.0 + 8.0 / sqrtbeta1 * \
        (2.0 / sqrtbeta1 + math.sqrt(1 + 4.0 / (sqrtbeta1**2)))
    term1 = 1 - 2 / (9.0 * A)
    denom = 1 + x * math.sqrt(2 / (A - 4.0))
    term2 = torch.sign(denom) * torch.pow((1 - 2.0 / A) /
                                          torch.abs(denom), 1 / 3.0)
    Z = (term1 - term2) / math.sqrt(2 / (9.0 * A))
    return Z, 1 + torch.erf(-math.sqrt(0.5) * torch.abs(Z))
 def guide():
     pyro.module("mymodule", pt_guide)
     mu_q, tau_q = torch.exp(pt_guide.mu_q_log), torch.exp(pt_guide.tau_q_log)
     sigma = torch.pow(tau_q, -0.5)
     pyro.sample("mu_latent",
                 dist.Normal(mu_q, sigma, reparameterized=reparameterized),
                 baseline=dict(use_decaying_avg_baseline=True))
Beispiel #25
0
    def updateGradInput(self, input, gradOutput):
        assert input.dim() == 4

        if input.type() == 'torch.cuda.FloatTensor':
            self._backend.SpatialCrossMapLRN_updateGradInput(
                self._backend.library_state,
                input,
                gradOutput,
                self.gradInput,
                self.scale,
                self.output,
                self.size,
                self.alpha,
                self.beta,
                self.k
            )
        else:
            batchSize = input.size(0)
            channels = input.size(1)
            inputHeight = input.size(2)
            inputWidth = input.size(3)

            if self.paddedRatio is None:
                self.paddedRatio = input.new()
            if self.accumRatio is None:
                self.accumRatio = input.new()
            self.paddedRatio.resize_(channels + self.size - 1, inputHeight, inputWidth)
            self.accumRatio.resize_(inputHeight, inputWidth)

            cacheRatioValue = 2 * self.alpha * self.beta / self.size
            inversePrePad = int(self.size - (self.size - 1) / 2)

            self.gradInput.resize_as_(input)
            torch.pow(self.scale, -self.beta, out=self.gradInput).mul_(gradOutput)

            self.paddedRatio.zero_()
            paddedRatioCenter = self.paddedRatio.narrow(0, inversePrePad, channels)
            for n in range(batchSize):
                torch.mul(gradOutput[n], self.output[n], out=paddedRatioCenter)
                paddedRatioCenter.div_(self.scale[n])
                torch.sum(self.paddedRatio.narrow(0, 0, self.size - 1), 0, keepdim=False, out=self.accumRatio)
                for c in range(channels):
                    self.accumRatio.add_(self.paddedRatio[c + self.size - 1])
                    self.gradInput[n][c].addcmul_(-cacheRatioValue, input[n][c], self.accumRatio)
                    self.accumRatio.add_(-1, self.paddedRatio[c])

        return self.gradInput
 def forward(self, inputs, targets, step, weight_constraint_lambda, logger):
     n = inputs.size(0)
     # Compute pairwise distance, replace by the official when merged
     # features = F.normalize(inputs)
     features = inputs
     dist = torch.pow(features, 2).sum(dim=1, keepdim=True).expand(n, n)
     dist = dist + dist.t()
     dist.addmm_(1, -2, features, features.t())
     dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
     # get the positive label mask
     mask = targets.expand(n, n).eq(targets.expand(n, n).t())
     mask = mask.float()
     positive_dist = torch.mul(dist, mask)
     negative_dist = torch.mul(mask, dist.max()) + torch.mul(dist, 1 - mask)
     indexes_ap = []
     indexes_ng = []
     dist_ap = []
     dist_an = []
     for i in range(n):
         pos_dist, pos_index = positive_dist[i].max(0)
         neg_dist, neg_index = negative_dist[i].min(0)
         dist_ap.append(pos_dist)
         dist_an.append(neg_dist)
         indexes_ap.append(pos_index)
         indexes_ng.append(neg_index)
     dist_ap = torch.cat(dist_ap)
     dist_an = torch.cat(dist_an)
     indexes_ap = torch.cat(indexes_ap)
     indexes_ng = torch.cat(indexes_ng)
     pair_adp_inputs = []
     for i in range(n):
         pair_adp_inputs.append(torch.cat([inputs[i, :], inputs[indexes_ap.data[i], :]]))
     # for i in range(n):
         pair_adp_inputs.append(torch.cat([inputs[i, :], inputs[indexes_ng.data[i], :]]))
     pair_adp_inputs = torch.stack(pair_adp_inputs)
     # Compute adp_pairwise distance, replace by the official when merged
     dist_adp = self.AdpsubM(pair_adp_inputs, n)  # [2*batchsize] [ap,ng]*batchsize
     # dist_constraint = torch.norm(dist-dist.t())
     dist_ap_adp = dist_adp[::2]
     dist_an_adp = dist_adp[1::2]
     # Compute ranking hinge loss
     y = dist_an.data.new()
     y.resize_as_(dist_an.data)
     y.fill_(1)
     y = Variable(y)
     # dist_neg_constr = 1/torch.norm(dist[mask==0])
     trip_loss = self.softmargin_loss(dist_an - dist_ap, y)
     trip_loss_adp = self.softmargin_loss(dist_an_adp - dist_ap_adp, y)
     loss = trip_loss + trip_loss_adp
     # loss = trip_loss
     if logger:
         # logger.scalar_summary('Metric_constraint', Metric_constraint.data[0], step)
         # logger.scalar_summary('dist_constraint', dist_constraint.data[0], step)
         # logger.histo_summary('W',W.data.cpu().numpy(),step)
         logger.histo_summary('dist_apt', dist_adp.data.cpu().numpy(), step)
         logger.histo_summary('dist', dist.data.cpu().numpy(), step)
         logger.scalar_summary('trip_loss', trip_loss.data[0], step)
     prec = (dist_an.data > dist_ap.data).sum() * 1. / y.size(0)
     return trip_loss_adp, prec
Beispiel #27
0
    def on_criterion(self, state):
        """Calculate the decay term and add to state['loss'].

        :param state: The Model state
        :type state: dict
        """
        for param in self.params:
            state['loss'] += self.rate * torch.pow(param, self.p).sum()
 def obs_inner(i, _i, _x):
     for k in range(n_superfluous_top):
         pyro.sample("z_%d_%d" % (i, k),
                     dist.Normal(ng_zeros(4 - i, 1), ng_ones(4 - i, 1), reparameterized=False))
     pyro.observe("obs_%d" % i, dist.normal, _x, mu_latent, torch.pow(self.lam, -0.5))
     for k in range(n_superfluous_top, n_superfluous_top + n_superfluous_bottom):
         pyro.sample("z_%d_%d" % (i, k),
                     dist.Normal(ng_zeros(4 - i, 1), ng_ones(4 - i, 1), reparameterized=False))
Beispiel #29
0
 def forward(ctx, a, b):
     tensor, ctx.constant, ctx.tensor_first = sort_args(a, b)
     if ctx.tensor_first:
         ctx.save_for_backward(tensor)
         return tensor.pow(ctx.constant)
     else:
         result = torch.pow(ctx.constant, tensor)
         ctx.save_for_backward(result)
         return result
Beispiel #30
0
 def __compute_kl(self, mu):
     # def _compute_kl(self, mu, sd):
     # mu_2 = torch.pow(mu, 2)
     # sd_2 = torch.pow(sd, 2)
     # encoding_loss = (mu_2 + sd_2 - torch.log(sd_2)).sum() / mu_2.size(0)
     # return encoding_loss
     mu_2 = torch.pow(mu, 2)
     encoding_loss = torch.mean(mu_2)
     return encoding_loss
def sharpen(mask, temperature):
    masktemp = torch.pow(mask, temperature)
    masktempsum = masktemp.sum(dim=1).unsqueeze(dim=1)
    sharpenmask = masktemp / masktempsum
    return sharpenmask
Beispiel #32
0
 def mapping(self, x):
     x_vec = torch.ones(len(x)).view(1, -1)
     for i in range(1, self.M):
         tmp = torch.pow(x, i).view(1, -1)
         x_vec = torch.cat((x_vec, tmp), 0)
     return x_vec
    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
                amsgrad = group['amsgrad']

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    # Exponential moving average of gradient values
                    state['exp_avg'] = torch.zeros_like(p.data)
                    # Exponential moving average of squared gradient values
                    state['exp_avg_sq'] = torch.zeros_like(p.data)
            
                    if len(p.size())!=1:
                        state['followed_weight'] = np.random.randint(p.size(0)),np.random.randint(p.size(1))
                    if amsgrad:
                        # Maintains max of all exp. moving avg. of sq. grad. values
                        state['max_exp_avg_sq'] = torch.zeros_like(p.data)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                
                if amsgrad:
                    max_exp_avg_sq = state['max_exp_avg_sq']
                beta1, beta2 = group['betas']

                state['step'] += 1

                if group['weight_decay'] != 0:
                    grad.add_(group['weight_decay'], p.data)

                # Decay the first and second moment running average coefficient
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                if amsgrad:
                    # Maintains the maximum of all 2nd moment running avg. till now
                    torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
                    # Use the max. for normalizing running avg. of gradient
                    denom = max_exp_avg_sq.sqrt().add_(group['eps'])
                else:
                    denom = exp_avg_sq.sqrt().add_(group['eps'])


                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']
                step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
                

                binary_weight_before_update = torch.sign(p.data)
                condition_consolidation = (torch.mul(binary_weight_before_update,exp_avg) > 0.0 )

                decayed_exp_avg = torch.mul(torch.ones_like(p.data)-torch.pow(torch.tanh(group['meta']*torch.abs(p.data)),2) ,exp_avg)

  
                if len(p.size())==1: # True if p is bias, false if p is weight
                    p.data.addcdiv_(-step_size, exp_avg, denom)
                else:
                    #p.data.addcdiv_(-step_size, exp_avg , denom)  #normal update
                    p.data.addcdiv_(-step_size, torch.where(condition_consolidation, decayed_exp_avg, exp_avg) , denom)  #assymetric lr for metaplasticity
                    
        return loss
Beispiel #34
0
 def task_error(self, w, x, y):
     self._validate_inputs(w, x, y)
     # Compute mean squared error
     error = torch.mean(torch.pow(torch.mm(x, w) - y.view(-1, 1), 2))
     return error
Beispiel #35
0
    def forward(self, classifications, regressions, anchors, annotations):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[0, :, :]

        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).float().cuda())
                    classification_losses.append(
                        torch.tensor(0).float().cuda())
                else:
                    regression_losses.append(torch.tensor(0).float())
                    classification_losses.append(torch.tensor(0).float())

                continue

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones(classification.shape) * -1
            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            alpha_factor = torch.ones(targets.shape) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda()

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros(cls_loss.shape)
            if torch.cuda.is_available():
                zeros = zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.float(), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dx, targets_dy, targets_dw, targets_dh))
                targets = targets.t()

                norm = torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
                if torch.cuda.is_available():
                    norm = norm.cuda()
                targets = targets / norm

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).float().cuda())
                else:
                    regression_losses.append(torch.tensor(0).float())

        return torch.stack(classification_losses).mean(
            dim=0,
            keepdim=True), torch.stack(regression_losses).mean(dim=0,
                                                               keepdim=True)
    )


    if cfg.METHOD == "tau_norm":
        model_state_dict = model.state_dict()
        # set bias as zero
        model_state_dict['module.classifier.bias'].copy_(torch.zeros(
            (num_classes)))
        weight_ori = model_state_dict['module.classifier.weight']
        norm_weight = torch.norm(weight_ori, 2, 1)
        best_accuracy = 0
        best_p = 0
        for p in np.arange(0.0, 1.0, 0.1):
            ws = weight_ori.clone()
            for i in range(weight_ori.size(0)):
                ws[i] = ws[i] / torch.pow(norm_weight[i], p)
            model_state_dict['module.classifier.weight'].copy_(ws)
            print("\n___________________________", p, "__________________________________")
            acc, _ = valid_model(testLoader, model, num_classes, para_dict_train,
                                    para_dict_test,criterion, LOSS_RATIO=0)
            if acc > best_accuracy:
                best_accuracy = acc
                best_p = p
            print("when p is", best_p, ", best result is", best_accuracy)
    elif cfg.METHOD == "BPM":
        best_accuracy = 0
        best_LOSS_RATIO = 0
        for LOSS_RATIO in np.arange(0.0, 2.0, 0.1):
            print("\n___________________________", LOSS_RATIO, "__________________________________")
            acc, acc_per_class = valid_model(testLoader, model, num_classes, para_dict_train,
                                 para_dict_test, criterion, LOSS_RATIO)
Beispiel #37
0
def l2norm(X, dim, eps=1e-8):
    """L2-normalize columns of X
    """
    norm = torch.pow(X, 2).sum(dim=dim, keepdim=True).sqrt() + eps
    X = torch.div(X, norm)
    return X
Beispiel #38
0
 def psi(a: torch.Tensor) -> torch.Tensor:
     """Quadratic penalty function."""
     return torch.pow(torch.max(torch.zeros_like(a), a), 2)
Beispiel #39
0
def load_data(dataset_name,
              splits_file_path=None,
              train_percentage=None,
              val_percentage=None,
              embedding_mode=None,
              embedding_method=None,
              embedding_method_graph=None,
              embedding_method_space=None):
    if dataset_name in {'cora', 'citeseer', 'pubmed'}:
        adj, features, labels, _, _, _ = utils.load_data(dataset_name)
        labels = np.argmax(labels, axis=-1)
        features = features.todense()
        G = nx.DiGraph(adj)
    else:
        graph_adjacency_list_file_path = os.path.join('new_data', dataset_name,
                                                      'out1_graph_edges.txt')
        graph_node_features_and_labels_file_path = os.path.join(
            'new_data', dataset_name, f'out1_node_feature_label.txt')

        G = nx.DiGraph()
        graph_node_features_dict = {}
        graph_labels_dict = {}

        if dataset_name == 'film':
            with open(graph_node_features_and_labels_file_path
                      ) as graph_node_features_and_labels_file:
                graph_node_features_and_labels_file.readline()
                for line in graph_node_features_and_labels_file:
                    line = line.rstrip().split('\t')
                    assert (len(line) == 3)
                    assert (int(line[0]) not in graph_node_features_dict
                            and int(line[0]) not in graph_labels_dict)
                    feature_blank = np.zeros(932, dtype=np.uint8)
                    feature_blank[np.array(line[1].split(','),
                                           dtype=np.uint16)] = 1
                    graph_node_features_dict[int(line[0])] = feature_blank
                    graph_labels_dict[int(line[0])] = int(line[2])
        else:
            with open(graph_node_features_and_labels_file_path
                      ) as graph_node_features_and_labels_file:
                graph_node_features_and_labels_file.readline()
                for line in graph_node_features_and_labels_file:
                    line = line.rstrip().split('\t')
                    assert (len(line) == 3)
                    assert (int(line[0]) not in graph_node_features_dict
                            and int(line[0]) not in graph_labels_dict)
                    graph_node_features_dict[int(line[0])] = np.array(
                        line[1].split(','), dtype=np.uint8)
                    graph_labels_dict[int(line[0])] = int(line[2])

        with open(graph_adjacency_list_file_path) as graph_adjacency_list_file:
            graph_adjacency_list_file.readline()
            for line in graph_adjacency_list_file:
                line = line.rstrip().split('\t')
                assert (len(line) == 2)
                if int(line[0]) not in G:
                    G.add_node(int(line[0]),
                               features=graph_node_features_dict[int(line[0])],
                               label=graph_labels_dict[int(line[0])])
                if int(line[1]) not in G:
                    G.add_node(int(line[1]),
                               features=graph_node_features_dict[int(line[1])],
                               label=graph_labels_dict[int(line[1])])
                G.add_edge(int(line[0]), int(line[1]))

        adj = nx.adjacency_matrix(G, sorted(G.nodes()))
        features = np.array([
            features for _, features in sorted(G.nodes(data='features'),
                                               key=lambda x: x[0])
        ])
        labels = np.array([
            label
            for _, label in sorted(G.nodes(data='label'), key=lambda x: x[0])
        ])

    features = utils.preprocess_features(features)

    if not embedding_mode:
        g = DGLGraph(adj + sp.eye(adj.shape[0]))
    else:
        if embedding_mode == 'ExperimentTwoAll':
            embedding_file_path = os.path.join(
                'embedding_method_combinations_all',
                f'outf_nodes_relation_{dataset_name}all_embedding_methods.txt')
        elif embedding_mode == 'ExperimentTwoPairs':
            embedding_file_path = os.path.join(
                'embedding_method_combinations_in_pairs',
                f'outf_nodes_relation_{dataset_name}_graph_{embedding_method_graph}_space_{embedding_method_space}.txt'
            )
        else:
            embedding_file_path = os.path.join(
                'structural_neighborhood',
                f'outf_nodes_space_relation_{dataset_name}_{embedding_method}.txt'
            )
        space_and_relation_type_to_idx_dict = {}

        with open(embedding_file_path) as embedding_file:
            for line in embedding_file:
                if line.rstrip() == 'node1,node2	space	relation_type':
                    continue
                line = re.split(r'[\t,]', line.rstrip())
                assert (len(line) == 4)
                assert (int(line[0]) in G and int(line[1]) in G)
                if (line[2], int(
                        line[3])) not in space_and_relation_type_to_idx_dict:
                    space_and_relation_type_to_idx_dict[(line[2], int(
                        line[3]))] = len(space_and_relation_type_to_idx_dict)
                if G.has_edge(int(line[0]), int(line[1])):
                    G.remove_edge(int(line[0]), int(line[1]))
                G.add_edge(int(line[0]),
                           int(line[1]),
                           subgraph_idx=space_and_relation_type_to_idx_dict[(
                               line[2], int(line[3]))])

        space_and_relation_type_to_idx_dict['self_loop'] = len(
            space_and_relation_type_to_idx_dict)
        for node in sorted(G.nodes()):
            if G.has_edge(node, node):
                G.remove_edge(node, node)
            G.add_edge(
                node,
                node,
                subgraph_idx=space_and_relation_type_to_idx_dict['self_loop'])
        adj = nx.adjacency_matrix(G, sorted(G.nodes()))
        g = DGLGraph(adj)

        for u, v, feature in G.edges(data='subgraph_idx'):
            g.edges[g.edge_id(u,
                              v)].data['subgraph_idx'] = th.tensor([feature])

    if splits_file_path:
        with np.load(splits_file_path) as splits_file:
            train_mask = splits_file['train_mask']
            val_mask = splits_file['val_mask']
            test_mask = splits_file['test_mask']
    else:
        assert (train_percentage is not None and val_percentage is not None)
        assert (train_percentage < 1.0 and val_percentage < 1.0
                and train_percentage + val_percentage < 1.0)

        if dataset_name in {'cora', 'citeseer'}:
            disconnected_node_file_path = os.path.join(
                'unconnected_nodes', f'{dataset_name}_unconnected_nodes.txt')
            with open(disconnected_node_file_path) as disconnected_node_file:
                disconnected_node_file.readline()
                disconnected_nodes = []
                for line in disconnected_node_file:
                    line = line.rstrip()
                    disconnected_nodes.append(int(line))

            disconnected_nodes = np.array(disconnected_nodes)
            connected_nodes = np.setdiff1d(np.arange(features.shape[0]),
                                           disconnected_nodes)

            connected_labels = labels[connected_nodes]

            train_and_val_index, test_index = next(
                ShuffleSplit(n_splits=1,
                             train_size=train_percentage +
                             val_percentage).split(
                                 np.empty_like(connected_labels),
                                 connected_labels))
            train_index, val_index = next(
                ShuffleSplit(n_splits=1, train_size=train_percentage).split(
                    np.empty_like(connected_labels[train_and_val_index]),
                    connected_labels[train_and_val_index]))
            train_index = train_and_val_index[train_index]
            val_index = train_and_val_index[val_index]

            train_mask = np.zeros_like(labels)
            train_mask[connected_nodes[train_index]] = 1
            val_mask = np.zeros_like(labels)
            val_mask[connected_nodes[val_index]] = 1
            test_mask = np.zeros_like(labels)
            test_mask[connected_nodes[test_index]] = 1
        else:
            train_and_val_index, test_index = next(
                ShuffleSplit(n_splits=1,
                             train_size=train_percentage +
                             val_percentage).split(np.empty_like(labels),
                                                   labels))
            train_index, val_index = next(
                ShuffleSplit(n_splits=1, train_size=train_percentage).split(
                    np.empty_like(labels[train_and_val_index]),
                    labels[train_and_val_index]))
            train_index = train_and_val_index[train_index]
            val_index = train_and_val_index[val_index]

            train_mask = np.zeros_like(labels)
            train_mask[train_index] = 1
            val_mask = np.zeros_like(labels)
            val_mask[val_index] = 1
            test_mask = np.zeros_like(labels)
            test_mask[test_index] = 1

    num_features = features.shape[1]
    num_labels = len(np.unique(labels))
    assert (np.array_equal(np.unique(labels),
                           np.arange(len(np.unique(labels)))))

    features = th.FloatTensor(features)
    labels = th.LongTensor(labels)
    train_mask = th.BoolTensor(train_mask)
    val_mask = th.BoolTensor(val_mask)
    test_mask = th.BoolTensor(test_mask)

    # Adapted from https://docs.dgl.ai/tutorials/models/1_gnn/1_gcn.html
    degs = g.in_degrees().float()
    norm = th.pow(degs, -0.5).cuda()
    norm[th.isinf(norm)] = 0
    g.ndata['norm'] = norm.unsqueeze(1)

    return g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels
def angle_defn(pos, i, d_model_size):
    angle_rates = 1 / torch.pow(10000, (2 * (i // 2)) / d_model_size)
    return pos * angle_rates
import math
import numpy as np
import scipy as sp
import scipy.linalg
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from nf.utils import unconstrained_RQS

# supported non-linearities: note that the function must be invertible
functional_derivatives = {
    torch.tanh: lambda x: 1 - torch.pow(torch.tanh(x), 2),
    F.leaky_relu: lambda x: (x > 0).type(torch.FloatTensor) + \
                            (x < 0).type(torch.FloatTensor) * -0.01,
    F.elu: lambda x: (x > 0).type(torch.FloatTensor) + \
                     (x < 0).type(torch.FloatTensor) * torch.exp(x)
}


class Planar(nn.Module):
    """
    Planar flow.

        z = f(x) = x + u h(wᵀx + b)

    [Rezende and Mohamed, 2015]
    """
    def __init__(self, dim, nonlinearity=torch.tanh):
        super().__init__()
        self.h = nonlinearity
Beispiel #42
0
def tts_train_loop(paths: Paths, model: Tacotron, optimizer, train_set, lr,
                   train_steps, attn_example, max_y, max_x):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, _, padded_att_guides) in enumerate(train_set, 1):

            x, m = x.to(device), m.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention, r = data_parallel_workaround(
                    model, x, m)
            else:
                m1_hat, m2_hat, attention, r = model(x, m)

            reduced_guides = []

            att_guide_path = hp.attention_path
            for j, item_id in enumerate(ids):
                att = np.load(f'{att_guide_path}/{item_id}.npy')
                reduced = att[0::r]

                pred_attention = attention[j]
                n_frames = pred_attention.shape[0]
                n_phones = pred_attention.shape[-1]

                #  pred_attention = torch.tensor(pred_attention)
                # reduced = torch.tensor(reduced)

                padded_guides = pad2d_nonzero(reduced, n_frames, n_phones)
                #padded_guides = torch.tensor(padded_guides)
                reduced_guides.append(padded_guides)

            reduced_guides = torch.tensor(reduced_guides)
            mask = torch.ne(reduced_guides, -1).type(torch.FloatTensor)

            mask = torch.tensor(mask)
            padded_guides = [
                pad2d_zero(x, n_frames, n_phones) for x in reduced_guides
            ]
            padded_guides = torch.tensor(padded_guides)
            padded_guides = padded_guides.to(device)
            attention = attention.to(device)
            mask = mask.to(device)
            attention = attention * mask
            print("guide att shape", att.shape)
            print(att)

            print("reduced guide", padded_guides.shape)

            #   print("attention size",n_frames, n_phones)
            print("mask", mask.shape)
            print(mask)

            print(padded_guides.shape, attention.shape, mask.shape)

            print(attention)
            print(padded_guides)

            multiply = torch.pow((attention - padded_guides), 2)
            print(multiply)

            #multiply = torch.pow((pred_attention - padded_guides),2)* mask
            #print(multiply)

            attention_loss = torch.sum(multiply)
            print(attention_loss)
            mask_sum1 = torch.sum(mask)

            attention_loss /= mask_sum1
            print(attention_loss)

            #    batch_attention_losses.append(attention_loss)

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)

            #average_att_loss = sum(batch_attention_losses)/len(batch_attention_losses)
            #print("attention loss", average_att_loss)
            #print("m losses", m1_loss, m2_loss)
            prev_loss = m1_loss + m2_loss
            print("prev loss", prev_loss)
            loss = m1_loss + m2_loss + attention_loss
            print("loss + att", loss)
            #exit()
            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = f'taco_step{k}K'
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(attention[idx][:, :160]),
                               paths.tts_attention / f'{step}')
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / f'{step}', 600)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:#.4} | {speed:#.2} steps/s | Step: {k}k | '
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')
    def train(self,
              epoch,
              max_epoch,
              writer,
              print_freq=10,
              fixbase_epoch=0,
              open_layers=None):
        losses_t = AverageMeter()
        losses_x = AverageMeter()
        accs = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()

        loss_meter = AverageMeter()

        self.model.train()
        if (epoch + 1) <= fixbase_epoch and open_layers is not None:
            print('* Only train {} (epoch: {}/{})'.format(
                open_layers, epoch + 1, fixbase_epoch))
            open_specified_layers(self.model, open_layers)
        else:
            open_all_layers(self.model)

        num_batches = len(self.train_loader)
        end = time.time()

        layer_nums = 3
        for batch_idx, data in enumerate(self.train_loader):
            data_time.update(time.time() - end)

            imgs, pids = self._parse_data_for_train(data)

            if self.use_gpu:
                imgs = imgs.cuda()
                pids = pids.cuda()
            self.optimizer.zero_grad()
            outputs, features, h, b, y_resnet, mgn_1, mgn_2, mgn_3 = self.model(
                imgs)
            #print(len(logits_list))
            #print(logits_list[0].shape)
            pids_g = self.parse_pids(pids)
            x = features

            target_b = F.cosine_similarity(b[:pids_g.size(0) // 2],
                                           b[pids_g.size(0) // 2:])
            target_x = F.cosine_similarity(x[:pids_g.size(0) // 2],
                                           x[pids_g.size(0) // 2:])

            loss1 = F.mse_loss(target_b, target_x)
            loss2 = torch.mean(
                torch.abs(
                    torch.pow(
                        torch.abs(h) - Variable(torch.ones(h.size()).cuda()),
                        3)))
            loss_greedy = loss1 + 0.1 * loss2
            loss_batchhard_hash = self.compute_hashbatchhard(b, pids)

            loss_t = self._compute_loss(self.criterion_t, features, pids)
            loss_x = self._compute_loss(
                self.criterion_x, outputs, pids) + self._compute_loss(
                    self.criterion_x, y_resnet, pids) + self._compute_loss(
                        self.criterion_x, mgn_1, pids) + self._compute_loss(
                            self.criterion_x,
                            mgn_2, pids) + self._compute_loss(
                                self.criterion_x, mgn_3, pids)

            loss = self.weight_t * loss_t + self.weight_x * loss_x + loss_greedy + loss_batchhard_hash * 2

            loss.backward()
            self.optimizer.step()

            batch_time.update(time.time() - end)

            losses_t.update(loss_t.item(), pids.size(0))
            losses_x.update(loss_x.item(), pids.size(0))

            accs.update(metrics.accuracy(outputs, pids)[0].item())

            if (batch_idx + 1) % print_freq == 0:
                # estimate remaining time
                eta_seconds = batch_time.avg * (num_batches - (batch_idx + 1) +
                                                (max_epoch -
                                                 (epoch + 1)) * num_batches)
                eta_str = str(datetime.timedelta(seconds=int(eta_seconds)))
                print('Epoch: [{0}/{1}][{2}/{3}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss_t {loss_t.val:.4f} ({loss_t.avg:.4f})\t'
                      'Loss_x {loss_x.val:.4f} ({loss_x.avg:.4f})\t'
                      'Loss_g {loss_g:.4f} )\t'
                      'Loss_p {loss_p:.4f} )\t'
                      'Acc {acc.val:.2f} ({acc.avg:.2f})\t'
                      'Lr {lr:.6f}\t'
                      'eta {eta}'.format(
                          epoch + 1,
                          max_epoch,
                          batch_idx + 1,
                          num_batches,
                          batch_time=batch_time,
                          data_time=data_time,
                          loss_t=losses_t,
                          loss_x=losses_x,
                          loss_g=loss_greedy,
                          loss_p=loss_batchhard_hash,
                          acc=accs,
                          lr=self.optimizer.param_groups[0]['lr'],
                          eta=eta_str))

            if writer is not None:
                n_iter = epoch * num_batches + batch_idx
                writer.add_scalar('Train/Time', batch_time.avg, n_iter)
                writer.add_scalar('Train/Data', data_time.avg, n_iter)
                writer.add_scalar('Train/Loss_t', losses_t.avg, n_iter)
                writer.add_scalar('Train/Loss_x', losses_x.avg, n_iter)
                writer.add_scalar('Train/Acc', accs.avg, n_iter)
                writer.add_scalar('Train/Lr',
                                  self.optimizer.param_groups[0]['lr'], n_iter)

            end = time.time()

        if self.scheduler is not None:
            self.scheduler.step()
Beispiel #44
0
def expected_val(pred):
    n, m, d = pred.size()
    p = Variable(torch.arange(1, 6)).view((5, 1))
    if CUDA:
        p = p.cuda()
    return torch.mm(softmax(pred).view((n * m, d)), p).view((n, m, 1))


epochs = 1000
for ep in xrange(epochs):
    optimizer.zero_grad()
    #print(train_id.size(), train_mask.size())
    embeddings = enc(train_id, train_mask)
    y_hat = dec(embeddings, train_mask)
    train_loss = ce(y_hat, train_id, train_mask)
    reg_loss = 0
    for p in pars:
        reg_loss += torch.sum(torch.pow(p, 2))
    loss = train_loss + 0.0001 * reg_loss
    loss.backward()
    mse_train = mse(expected_val(y_hat), train_x, train_mask)
    optimizer.step()
    if ep % 1 == 0:
        val_hat = dec(enc(train_id, train_mask), val_mask)
        mse_val = mse(expected_val(val_hat), val_x, val_mask)
        val_loss = np.sqrt(mse_val.data[0])
    print(
        'Train Epoch: {}, Loss: {:.6f}, MSE: {:.6f}, Val_loss: {:.6f}'.format(
            ep, loss.data[0], np.sqrt(mse_train.data[0]), val_loss))
Beispiel #45
0
import torch

if __name__ == "__main__":
    x = torch.randn(3, 2).cuda().requires_grad_()
    y = torch.randn(3, 2).cuda().requires_grad_()
    k = 3

    torch.sqrt(torch.pow(x.unsqueeze(0) - y.unsqueeze(1),
                         2).sum(dim=2)).sum().backward()

    print(x.grad, y.grad)
Beispiel #46
0
 def forward(self, x1, x2):
     assert x1.size() == x2.size()
     eps = 1e-4 / x1.size(1)
     diff = torch.abs(x1 - x2)
     out = torch.pow(diff, self.norm).sum(dim=1)
     return torch.pow(out + eps, 1. / self.norm)
Beispiel #47
0
    def forward(self, x, U, V, N, eta):
        """
        x: B x C x H x W
        U: B x (C x K) x H x W
        V: B x (C x K) x H x W
        N: B x (C x K) x H x W
        """
        B, C, H, W = x.shape
        B, CK, H, W = U.shape
        K = int(CK / C)

        S2 = torch.clamp(V - torch.pow(U, 2), min=0.01)
        S = torch.sqrt(S2)

        # X_cat: B x CK x H x W
        X_cat = torch.cat([
            torch.cat([x[:, i:i + 1, :, :] for _ in range(K)], dim=1)
            # X_cat[:, i*K:(i+1)*K, :, :] corresponds to a feature map with K mixtures
            for i in range(C)
        ], dim=1)

        XdU = X_cat - U  # B x CK x H x W
        XdUoS = XdU / S  # B x CK x H x W
        XdUoS2 = torch.pow(XdUoS, 2)  # B x CK x H x W

        nTotal = 1 / eta - 1  # scalar

        N = torch.cat([
            nTotal * N[:, i * K:(i + 1) * K, :, :] / torch.sum(N[:, i * K:(i + 1) * K, :, :], dim=1, keepdim=True)
            for i in range(C)
        ], dim=1)
        assert N.shape == torch.Size(np.array([B, CK, H, W]))

        P = N / nTotal  # P: B x CK x H x W
        assert P.shape == torch.Size(np.array([B, CK, H, W]))

        # cdf: B x CK x H x W
        cdf = torch.cat([
            Normal(0, 1).cdf(torch.abs(XdUoS[:, i:i + 1, :, :]))
            for i in range(CK)
        ], dim=1)
        assert cdf.shape == torch.Size(np.array([B, CK, H, W]))

        # prob: B x CK x H x W
        prob = torch.cat([
            torch.sum(P[:, i * K:(i + 1) * K, :, :] * cdf[:, i * K:(i + 1) * K, :, :], dim=1, keepdim=True)
            for i in range(C)
        ], dim=1)
        assert prob.shape == torch.Size(np.array([B, C, H, W]))

        log_prob = torch.log(N) + -0.5 * XdUoS2 - torch.log(S)

        # Gamma = nn.Softmax(dim=1)(log_prob)

        Gamma = torch.cat([
            nn.Softmax(dim=1)(log_prob[:, i * K:(i + 1) * K, :, :])
            for i in range(C)
        ], dim=1)

        N = N + Gamma
        Eta = Gamma / N
        U = U + Eta * (X_cat - U)
        V = V + Eta * (torch.pow(X_cat, 2) - V)

        return U, V, N, prob
Beispiel #48
0
def gelu(x):
    return 0.5 * x * (1 + torch.tanh(
        math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
Beispiel #49
0
 def forward(self, x):
     x = torch.mean(x, 1, keepdim=True)
     mean = self.pool(x)
     return torch.mean(
         torch.pow(mean - torch.FloatTensor([self.mean_val]).cuda(), 2))
Beispiel #50
0
    def kl_loss(y: torch.Tensor) -> torch.Tensor:
        x_2 = torch.pow(y, 2)
        loss = torch.mean(x_2)

        return loss
Beispiel #51
0
def mse(output, target):
    return torch.mean(torch.pow(output - target, 2))
Beispiel #52
0
 def variance(self):
     return _moments(self.concentration1, self.concentration0,
                     2) - torch.pow(self.mean, 2)
Beispiel #53
0
 def power(self, tensor_in_1, tensor_in_2):
     tensor_in_1 = self.astensor(tensor_in_1)
     tensor_in_2 = self.astensor(tensor_in_2)
     return torch.pow(tensor_in_1, tensor_in_2)
Beispiel #54
0
    def predict_given_factorizations(self, m, s, iK, beta):
        """
        Approximate GP regression at noisy inputs via moment matching
        IN: mean (m) (row vector) and (s) variance of the state
        OUT: mean (M) (row vector), variance (S) of the action
             and inv(s)*input-ouputcovariance
        """

        if type(m) != torch.Tensor or type(s) != torch.Tensor:
            m = torch.tensor(m).float().cuda()
            s = torch.tensor(s).float().cuda()
            print(
                "Warning: gradient may break in mgpr.predict_given_factorizations"
            )

        s = s.repeat(self.num_outputs, self.num_outputs, 1, 1)
        inp = self.centralized_input(m)

        # Calculate M and V: mean and inv(s) times input-output covariance
        iL = torch.diag_embed(
            1 / (self.model.covar_module.base_kernel.lengthscale.squeeze(1)))
        iN = inp @ iL
        B = iL @ s[0, ...] @ iL + torch.eye(self.num_dims).float().cuda()

        # Redefine iN as in^T and t --> t^T
        # B is symmetric so its the same
        t, _ = torch.solve(torch.transpose(iN, dim0=1, dim1=2), B)
        t = torch.transpose(t, dim0=1, dim1=2)

        lb = torch.exp(-torch.sum(iN * t, -1) / 2) * beta
        tiL = t @ iL
        t_det = torch.det(B)

        c = self.model.covar_module.outputscale / torch.sqrt(t_det)

        M = (torch.sum(lb, -1) * c)[:, None]
        V = (torch.transpose(tiL, dim0=1, dim1=2)
             @ lb[:, :, None])[..., 0] * c[:, None]

        # Calculate S: Predictive Covariance
        R_0 = torch.diag_embed(1 / torch.pow(
            self.model.covar_module.base_kernel.lengthscale.squeeze(1)[
                None, :, :], 2) + 1 / torch.pow(
                    self.model.covar_module.base_kernel.lengthscale.squeeze(1)
                    [:, None, :], 2))
        R = s @ R_0 + torch.eye(self.num_dims).float().cuda()

        # TODO: change this block according to the PR of tensorflow. Maybe move it into a function?
        X = inp[None, :, :, :] / torch.pow(
            self.model.covar_module.base_kernel.lengthscale.squeeze(1)
            [:, None, None, :], 2)
        X2 = -inp[:, None, :, :] / torch.pow(
            self.model.covar_module.base_kernel.lengthscale.squeeze(1)[
                None, :, None, :], 2)
        q_x, _ = torch.solve(s, R)
        Q = q_x / 2
        Xs = torch.sum(X @ Q * X, -1)
        X2s = torch.sum(X2 @ Q * X2, -1)
        maha = -2 * ((X @ Q) @ torch.transpose(X2,dim0=2,dim1=3)) + \
           Xs[:, :, :, None] + X2s[:, :, None, :]

        #
        k = torch.log(self.model.covar_module.outputscale)[:, None] - \
            torch.sum(torch.pow(iN,2), -1)/2
        L = torch.exp(k[:, None, :, None] + k[None, :, None, :] + maha)
        S = beta[:, None, None, :].repeat(1, self.num_outputs, 1, 1)
        S = (beta[:, None, None, :].repeat(1, self.num_outputs, 1, 1) @ L
             @ beta[None, :, :, None].repeat(self.num_outputs, 1, 1, 1))[:, :,
                                                                         0, 0]

        diagL = torch.diagonal(L.permute((3, 2, 1, 0)), dim1=-2,
                               dim2=-1).permute(2, 1, 0)
        S = S - torch.diag_embed(torch.sum((iK * diagL), [1, 2]))
        r_det = torch.det(R)

        S = S / torch.sqrt(r_det)
        S = S + torch.diag_embed(self.model.covar_module.outputscale)
        S = S - M @ M.t()

        return M.t(), S, V.t()
 def forward(self, dist):
     dist = dist.view(-1, 1) - self.offset.view(1, -1)
     return torch.exp(self.coeff * torch.pow(dist, 2))
Beispiel #56
0
def compute_length_penalty(wl1, wl2, alpha=0.25):
    x = torch.stack((wl1.squeeze(), wl2.squeeze()), dim=1)
    x_min, _ = torch.min(x, dim=1)
    x_max, _ = torch.max(x, dim=1)
    ratio = x_max.float() / x_min.float()
    return torch.pow(torch.exp(1 - ratio.float()), alpha)
Beispiel #57
0
def rect_to_polar(real, imag):
    mag = torch.pow(real**2 + imag**2, 0.5)
    ang = torch.atan2(imag, real)
    return mag, ang
Beispiel #58
0
 def forward(self, output, clip_label, motion_mask):
     z = torch.pow((output - clip_label), 2)
     loss = torch.mean(motion_mask * z)
     return loss
 def _compute_kl(self, mu, sd):
   mu_2 = torch.pow(mu, 2)
   sd_2 = torch.pow(sd, 2)
   encoding_loss = (mu_2 + sd_2 - torch.log(sd_2)).sum() / mu_2.size(0)
   return encoding_loss
Beispiel #60
0
    """
    frame_dir = "/Users/lekhang/Desktop/Khang/data/highway/input"
    frame_files = general_utils.get_all_files(f"{frame_dir}", keep_dir=True)
    frame_files = sorted(frame_files)

    frame_0 = cv2.imread(frame_files[0], 0)
    h, w = frame_0.shape

    U = np.array([np.array(cv2.imread(frame_files[i], 0).flatten()) / 255. for i in range(k)]).T
    U = np.random.rand(*U.shape)  # TODO: set this make the result look very good - why?
    assert U.shape == (h * w, k)
    V = U ** 2
    N = np.ones((h * w, k))

    U2 = torch.from_numpy(np.random.rand(1, c * k, h, w)).float()
    V2 = torch.pow(U2, 2)
    N2 = torch.ones((1, c * k, h, w)).float()

    gmm_tensor = GMMBlock()

    for frame_file in frame_files:
        frame = cv2.imread(frame_file, 0)
        frame_rgb = cv2.imread(frame_file)

        frame = frame / 255.
        frame_rgb = frame_rgb / 255.

        U, V, N, prob = gmm(U, V, N, np.expand_dims(frame.flatten(), axis=-1), eta)

        frame_tensor = torch.from_numpy(np.expand_dims(np.moveaxis(frame_rgb[:, :, :], -1, 0), axis=0)).float()
        U2, V2, N2, prob2 = gmm_tensor(frame_tensor, U2, V2, N2, eta)