def adpW(self,x):
     '''
        calculate the pairwise_att of everypair of inputs
        output_size: (x.size(0),x.size(1)/2)
     '''
     x = x.detach()
     x = self.adp_metric_embedding1(x)
     x = self.adp_metric_embedding1_bn(x)
     x = F.relu(x)
     x = self.adp_metric_embedding2(x)
     x = self.adp_metric_embedding2_bn(x)
     x = F.relu(x)
     x = self.adp_metric_embedding3(x)
     x = self.adp_metric_embedding3_bn(x)
     x = F.relu(x)
     pairwise_att = F.sigmoid(self.adp_metric_embedding4(x))
     # x = self.adp_metric_embedding2_bn(x)
     diag_matrix1 = []
     diag_matrix2 = []
     for i in range(x.size(0)):
         diag_matrix1.append(torch.diag(pairwise_att[i, :x.size(1)/2]))
     for i in range(x.size(0)):
         diag_matrix2.append(torch.diag(pairwise_att[i, x.size(1)/2:]))
     pairwise_att1 = torch.stack(diag_matrix1)
     pairwise_att2 = torch.stack(diag_matrix1)
     return pairwise_att1, pairwise_att2
Beispiel #2
0
    def _test_jacobian(self, input_dim, hidden_dim):
        jacobian = torch.zeros(input_dim, input_dim)
        iaf = InverseAutoregressiveFlow(input_dim, hidden_dim, sigmoid_bias=0.5)

        def nonzero(x):
            return torch.sign(torch.abs(x))

        x = torch.randn(1, input_dim)
        iaf_x = iaf(x)
        analytic_ldt = iaf.log_abs_det_jacobian(x, iaf_x).data.sum()

        for j in range(input_dim):
            for k in range(input_dim):
                epsilon_vector = torch.zeros(1, input_dim)
                epsilon_vector[0, j] = self.epsilon
                iaf_x_eps = iaf(x + epsilon_vector)
                delta = (iaf_x_eps - iaf_x) / self.epsilon
                jacobian[j, k] = float(delta[0, k].data.sum())

        permutation = iaf.arn.get_permutation()
        permuted_jacobian = jacobian.clone()
        for j in range(input_dim):
            for k in range(input_dim):
                permuted_jacobian[j, k] = jacobian[permutation[j], permutation[k]]
        numeric_ldt = torch.sum(torch.log(torch.diag(permuted_jacobian)))
        ldt_discrepancy = np.fabs(analytic_ldt - numeric_ldt)

        diag_sum = torch.sum(torch.diag(nonzero(permuted_jacobian)))
        lower_sum = torch.sum(torch.tril(nonzero(permuted_jacobian), diagonal=-1))

        assert ldt_discrepancy < self.epsilon
        assert diag_sum == float(input_dim)
        assert lower_sum == float(0.0)
Beispiel #3
0
def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
    m = K_XX.size(0)    # assume X, Y are same shape

    # Get the various sums of kernels that we'll use
    # Kts drop the diagonal, but we don't need to compute them explicitly
    if const_diagonal is not False:
        diag_X = diag_Y = const_diagonal
        sum_diag_X = sum_diag_Y = m * const_diagonal
    else:
        diag_X = torch.diag(K_XX)                       # (m,)
        diag_Y = torch.diag(K_YY)                       # (m,)
        sum_diag_X = torch.sum(diag_X)
        sum_diag_Y = torch.sum(diag_Y)

    Kt_XX_sums = K_XX.sum(dim=1) - diag_X             # \tilde{K}_XX * e = K_XX * e - diag_X
    Kt_YY_sums = K_YY.sum(dim=1) - diag_Y             # \tilde{K}_YY * e = K_YY * e - diag_Y
    K_XY_sums_0 = K_XY.sum(dim=0)                     # K_{XY}^T * e

    Kt_XX_sum = Kt_XX_sums.sum()                       # e^T * \tilde{K}_XX * e
    Kt_YY_sum = Kt_YY_sums.sum()                       # e^T * \tilde{K}_YY * e
    K_XY_sum = K_XY_sums_0.sum()                       # e^T * K_{XY} * e

    if biased:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
            + (Kt_YY_sum + sum_diag_Y) / (m * m)
            - 2.0 * K_XY_sum / (m * m))
    else:
        mmd2 = (Kt_XX_sum / (m * (m - 1))
            + Kt_YY_sum / (m * (m - 1))
            - 2.0 * K_XY_sum / (m * m))

    return mmd2
Beispiel #4
0
    def __init__(self, hidden_size, num_inputs, action_space):
        super(Policy, self).__init__()
        self.action_space = action_space
        num_outputs = action_space.shape[0]

        self.bn0 = nn.BatchNorm1d(num_inputs)
        self.bn0.weight.data.fill_(1)
        self.bn0.bias.data.fill_(0)

        self.linear1 = nn.Linear(num_inputs, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn1.weight.data.fill_(1)
        self.bn1.bias.data.fill_(0)

        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.bn2.weight.data.fill_(1)
        self.bn2.bias.data.fill_(0)

        self.V = nn.Linear(hidden_size, 1)
        self.V.weight.data.mul_(0.1)
        self.V.bias.data.mul_(0.1)

        self.mu = nn.Linear(hidden_size, num_outputs)
        self.mu.weight.data.mul_(0.1)
        self.mu.bias.data.mul_(0.1)

        self.L = nn.Linear(hidden_size, num_outputs ** 2)
        self.L.weight.data.mul_(0.1)
        self.L.bias.data.mul_(0.1)

        self.tril_mask = Variable(torch.tril(torch.ones(
            num_outputs, num_outputs), diagonal=-1).unsqueeze(0))
        self.diag_mask = Variable(torch.diag(torch.diag(
            torch.ones(num_outputs, num_outputs))).unsqueeze(0))
 def forward(self, pred, labels, targets):
     indexer = labels.data - 1
     prep = pred[:, indexer, :]
     class_pred = torch.cat((torch.diag(prep[:, :,  0]).view(-1, 1),
                             torch.diag(prep[:, :, 1]).view(-1, 1)),
                            dim=1)
     loss = self.smooth_l1_loss(class_pred.view(-1), targets.view(-1)) * 2
     return loss
Beispiel #6
0
    def phi(A):
        """
        Return lower triangle of A and halve the diagonal.
        """
        B = A.tril()

        B = B - 0.5 * torch.diag(torch.diag(B))

        return B
Beispiel #7
0
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
    m = K_XX.size(0)    # assume X, Y are same shape

    # Get the various sums of kernels that we'll use
    # Kts drop the diagonal, but we don't need to compute them explicitly
    if const_diagonal is not False:
        diag_X = diag_Y = const_diagonal
        sum_diag_X = sum_diag_Y = m * const_diagonal
        sum_diag2_X = sum_diag2_Y = m * const_diagonal**2
    else:
        diag_X = torch.diag(K_XX)                       # (m,)
        diag_Y = torch.diag(K_YY)                       # (m,)
        sum_diag_X = torch.sum(diag_X)
        sum_diag_Y = torch.sum(diag_Y)
        sum_diag2_X = diag_X.dot(diag_X)
        sum_diag2_Y = diag_Y.dot(diag_Y)

    Kt_XX_sums = K_XX.sum(dim=1) - diag_X             # \tilde{K}_XX * e = K_XX * e - diag_X
    Kt_YY_sums = K_YY.sum(dim=1) - diag_Y             # \tilde{K}_YY * e = K_YY * e - diag_Y
    K_XY_sums_0 = K_XY.sum(dim=0)                     # K_{XY}^T * e
    K_XY_sums_1 = K_XY.sum(dim=1)                     # K_{XY} * e

    Kt_XX_sum = Kt_XX_sums.sum()                       # e^T * \tilde{K}_XX * e
    Kt_YY_sum = Kt_YY_sums.sum()                       # e^T * \tilde{K}_YY * e
    K_XY_sum = K_XY_sums_0.sum()                       # e^T * K_{XY} * e

    Kt_XX_2_sum = (K_XX ** 2).sum() - sum_diag2_X      # \| \tilde{K}_XX \|_F^2
    Kt_YY_2_sum = (K_YY ** 2).sum() - sum_diag2_Y      # \| \tilde{K}_YY \|_F^2
    K_XY_2_sum  = (K_XY ** 2).sum()                    # \| K_{XY} \|_F^2

    if biased:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
            + (Kt_YY_sum + sum_diag_Y) / (m * m)
            - 2.0 * K_XY_sum / (m * m))
    else:
        mmd2 = (Kt_XX_sum / (m * (m - 1))
            + Kt_YY_sum / (m * (m - 1))
            - 2.0 * K_XY_sum / (m * m))

    var_est = (
        2.0 / (m**2 * (m - 1.0)**2) * (2 * Kt_XX_sums.dot(Kt_XX_sums) - Kt_XX_2_sum + 2 * Kt_YY_sums.dot(Kt_YY_sums) - Kt_YY_2_sum)
        - (4.0*m - 6.0) / (m**3 * (m - 1.0)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2)
        + 4.0*(m - 2.0) / (m**3 * (m - 1.0)**2) * (K_XY_sums_1.dot(K_XY_sums_1) + K_XY_sums_0.dot(K_XY_sums_0))
        - 4.0*(m - 3.0) / (m**3 * (m - 1.0)**2) * (K_XY_2_sum) - (8 * m - 12) / (m**5 * (m - 1)) * K_XY_sum**2
        + 8.0 / (m**3 * (m - 1.0)) * (
            1.0 / m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum
            - Kt_XX_sums.dot(K_XY_sums_1)
            - Kt_YY_sums.dot(K_XY_sums_0))
        )
    return mmd2, var_est
Beispiel #8
0
 def fov(self, value):
     self._fov = value
     fov_factor = 1.0 / torch.tan(transform.radians(0.5 * self._fov))
     o = torch.ones([1], dtype=torch.float32)
     diag = torch.cat([fov_factor, fov_factor, o], 0)
     self._cam_to_ndc = torch.diag(diag)
     self.ndc_to_cam = torch.inverse(self._cam_to_ndc)
def knn(Mxx, Mxy, Myy, k, sqrt):
    n0 = Mxx.size(0)
    n1 = Myy.size(0)
    label = torch.cat((torch.ones(n0),torch.zeros(n1)))
    M = torch.cat((torch.cat((Mxx,Mxy),1), torch.cat((Mxy.transpose(0,1),Myy), 1)), 0)
    if sqrt:
        M = M.abs().sqrt()
    INFINITY = float('inf')
    val, idx = (M+torch.diag(INFINITY*torch.ones(n0+n1))).topk(k, 0, False)

    count = torch.zeros(n0+n1)
    for i in range(0,k):
        count = count + label.index_select(0,idx[i])
    pred = torch.ge(count, (float(k)/2)*torch.ones(n0+n1)).float()

    s = Score_knn()
    s.tp = (pred*label).sum()
    s.fp = (pred*(1-label)).sum()
    s.fn = ((1-pred)*label).sum()
    s.tn = ((1-pred)*(1-label)).sum()
    s.precision = s.tp/(s.tp+s.fp)
    s.recall = s.tp/(s.tp+s.fn)
    s.acc_t = s.tp/(s.tp+s.fn)
    s.acc_f = s.tn/(s.tn+s.fp)
    s.acc = torch.eq(label, pred).float().mean()
    s.k = k 

    return s
Beispiel #10
0
    def setUp(self, size=(2, 5), batch=3, dtype=torch.float64, device=None,
              seed=None, mu=None, cov=None, A=None, b=None):
        '''Test the correctness of batch implementation of mean().

        This function will stack `[1 * mu, 2 * mu, ..., batch * mu]`.
        Then, it will see whether the batch output is accurate or not.

        Args:
            size: Tuple size of matrix A.
            batch: The batch size > 0.
            dtype: data type.
            device: In which device.
            seed: Seed for the random number generator.
            mu: To test a specific mean mu.
            cov: To test a specific covariance matrix.
            A: To test a specific A matrix.
            b: To test a specific bias b.
        '''
        if seed is not None:
            torch.manual_seed(seed)
        if A is None:
            A = torch.rand(size, dtype=dtype, device=device)
        if b is None:
            b = torch.rand(size[0], dtype=dtype, device=device)
        if mu is None:
            mu = torch.rand(size[1], dtype=dtype, device=device)
        if cov is None:
            cov = rand.definite(size[1], dtype=dtype, device=device,
                                positive=True, semi=False, norm=10**2)
        self.A = A
        self.b = b
        var = torch.diag(cov)
        self.batch_mean = torch.stack([(i + 1) * mu for i in range(batch)])
        self.batch_cov = torch.stack([(i + 1) * cov for i in range(batch)])
        self.batch_var = torch.stack([(i + 1) * var for i in range(batch)])
Beispiel #11
0
 def __wct_core(self, cont_feat, styl_feat):
     cFSize = cont_feat.size()
     c_mean = torch.mean(cont_feat, 1)  # c x (h x w)
     c_mean = c_mean.unsqueeze(1).expand_as(cont_feat)
     cont_feat = cont_feat - c_mean
     
     iden = torch.eye(cFSize[0])  # .double()
     if self.is_cuda:
         iden = iden.cuda()
     
     contentConv = torch.mm(cont_feat, cont_feat.t()).div(cFSize[1] - 1) + iden
     # del iden
     c_u, c_e, c_v = torch.svd(contentConv, some=False)
     # c_e2, c_v = torch.eig(contentConv, True)
     # c_e = c_e2[:,0]
     
     k_c = cFSize[0]
     for i in range(cFSize[0] - 1, -1, -1):
         if c_e[i] >= 0.00001:
             k_c = i + 1
             break
     
     sFSize = styl_feat.size()
     s_mean = torch.mean(styl_feat, 1)
     styl_feat = styl_feat - s_mean.unsqueeze(1).expand_as(styl_feat)
     styleConv = torch.mm(styl_feat, styl_feat.t()).div(sFSize[1] - 1)
     s_u, s_e, s_v = torch.svd(styleConv, some=False)
     
     k_s = sFSize[0]
     for i in range(sFSize[0] - 1, -1, -1):
         if s_e[i] >= 0.00001:
             k_s = i + 1
             break
     
     c_d = (c_e[0:k_c]).pow(-0.5)
     step1 = torch.mm(c_v[:, 0:k_c], torch.diag(c_d))
     step2 = torch.mm(step1, (c_v[:, 0:k_c].t()))
     whiten_cF = torch.mm(step2, cont_feat)
     
     s_d = (s_e[0:k_s]).pow(0.5)
     targetFeature = torch.mm(torch.mm(torch.mm(s_v[:, 0:k_s], torch.diag(s_d)), (s_v[:, 0:k_s].t())), whiten_cF)
     targetFeature = targetFeature + s_mean.unsqueeze(1).expand_as(targetFeature)
     return targetFeature
    def _greedy_decode(self,
                       head_tag_representation: torch.Tensor,
                       child_tag_representation: torch.Tensor,
                       attended_arcs: torch.Tensor,
                       mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Decodes the head and head tag predictions by decoding the unlabeled arcs
        independently for each word and then again, predicting the head tags of
        these greedily chosen arcs indpendently. Note that this method of decoding
        is not guaranteed to produce trees (i.e. there maybe be multiple roots,
        or cycles when children are attached to their parents).

        Parameters
        ----------
        head_tag_representation : ``torch.Tensor``, required.
            A tensor of shape (batch_size, sequence_length, tag_representation_dim),
            which will be used to generate predictions for the dependency tags
            for the given arcs.
        child_tag_representation : ``torch.Tensor``, required
            A tensor of shape (batch_size, sequence_length, tag_representation_dim),
            which will be used to generate predictions for the dependency tags
            for the given arcs.
        attended_arcs : ``torch.Tensor``, required.
            A tensor of shape (batch_size, sequence_length, sequence_length) used to generate
            a distribution over attachements of a given word to all other words.

        Returns
        -------
        heads : ``torch.Tensor``
            A tensor of shape (batch_size, sequence_length) representing the
            greedily decoded heads of each word.
        head_tags : ``torch.Tensor``
            A tensor of shape (batch_size, sequence_length) representing the
            dependency tags of the greedily decoded heads of each word.
        """
        # Mask the diagonal, because the head of a word can't be itself.
        attended_arcs = attended_arcs + torch.diag(attended_arcs.new(mask.size(1)).fill_(-numpy.inf))
        # Mask padded tokens, because we only want to consider actual words as heads.
        if mask is not None:
            minus_mask = (1 - mask).byte().unsqueeze(2)
            attended_arcs.masked_fill_(minus_mask, -numpy.inf)

        # Compute the heads greedily.
        # shape (batch_size, sequence_length)
        _, heads = attended_arcs.max(dim=2)

        # Given the greedily predicted heads, decode their dependency tags.
        # shape (batch_size, sequence_length, num_head_tags)
        head_tag_logits = self._get_head_tags(head_tag_representation,
                                              child_tag_representation,
                                              heads)
        _, head_tags = head_tag_logits.max(dim=2)
        return heads, head_tags
 def adpW(self,x):
     # x = F.normalize(x)
     x = self.adp_metric_embedding1(x)
     # x = self.adp_metric_embedding1_bn(x)
     x = F.prelu(x)
     x = self.adp_metric_embedding2(x)
     # x = self.adp_metric_embedding2_bn(x)
     diag_matrix = []
     for i in range(x.size(0)):
         diag_matrix.append(torch.diag(x[i,:]))
     x = torch.stack(diag_matrix)
     W = torch.matmul(self.transform_matrix,torch.matmul(x,self.transform_matrix))
     return W
    def forward(self, input):
        laplacian = input.exp() + self.eps
        output = input.clone()
        for b in range(input.size(0)):
            lap = laplacian[b].masked_fill(
                torch.eye(input.size(1), device=input.device).ne(0), 0)
            lap = -lap + torch.diag(lap.sum(0))
            # store roots on diagonal
            lap[0] = input[b].diag().exp()
            inv_laplacian = lap.inverse()

            factor = inv_laplacian.diag().unsqueeze(1)\
                                         .expand_as(input[b]).transpose(0, 1)
            term1 = input[b].exp().mul(factor).clone()
            term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone()
            term1[:, 0] = 0
            term2[0] = 0
            output[b] = term1 - term2
            roots_output = input[b].diag().exp().mul(
                inv_laplacian.transpose(0, 1)[0])
            output[b] = output[b] + torch.diag(roots_output)
        return output
Beispiel #15
0
def _mix_rbf_kernel(X, Y, sigma_list):
    assert(X.size(0) == Y.size(0))
    m = X.size(0)

    Z = torch.cat((X, Y), 0)
    ZZT = torch.mm(Z, Z.t())
    diag_ZZT = torch.diag(ZZT).unsqueeze(1)
    Z_norm_sqr = diag_ZZT.expand_as(ZZT)
    exponent = Z_norm_sqr - 2 * ZZT + Z_norm_sqr.t()

    K = 0.0
    for sigma in sigma_list:
        gamma = 1.0 / (2 * sigma**2)
        K += torch.exp(-gamma * exponent)

    return K[:m, :m], K[:m, m:], K[m:, m:], len(sigma_list)
Beispiel #16
0
    def test_constant(self):
        x = Variable(torch.randn(2, 2), requires_grad=True)

        trace = torch._C._tracer_enter((x,), 0)

        y = Variable(torch.diag(torch.Tensor([2, 2])))
        z = x.matmul(y)

        torch._C._tracer_exit((z,))
        function = torch._C._jit_createAutogradClosure(trace)

        z2 = function()(x)
        self.assertEqual(z, z2)

        y.data.fill_(1000)  # make sure the data has been cloned

        x2 = Variable(torch.ones(2, 2) * 2, requires_grad=True)
        z3 = function()(x2)
        self.assertEqual(z3.data, torch.ones(2, 2) * 4)
Beispiel #17
0
def orthogonal(tensor, gain=1):
    """Fills the input Tensor or Variable with a (semi) orthogonal matrix, as
    described in "Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks" - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional torch.Tensor or autograd.Variable, where n >= 2
        gain: optional scaling factor

    Examples:
        >>> w = torch.Tensor(3, 5)
        >>> nn.init.orthogonal(w)
    """
    if isinstance(tensor, Variable):
        orthogonal(tensor.data, gain=gain)
        return tensor

    if tensor.ndimension() < 2:
        raise ValueError("Only tensors with 2 or more dimensions are supported")

    rows = tensor.size(0)
    cols = tensor[0].numel()
    flattened = torch.Tensor(rows, cols).normal_(0, 1)
    # Compute the qr factorization
    q, r = torch.qr(flattened)
    # Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf
    d = torch.diag(r, 0)
    ph = d.sign()
    q *= ph.expand_as(q)
    # Pad zeros to Q (if rows smaller than cols)
    if rows < cols:
        padding = torch.zeros(rows, cols - rows)
        if q.is_cuda:
            q = torch.cat([q, padding.cuda()], 1)
        else:
            q = torch.cat([q, padding], 1)

    tensor.view_as(q).copy_(q)
    tensor.mul_(gain)
    return tensor
Beispiel #18
0
    def _greedy_decode(arc_scores: torch.Tensor,
                       arc_tag_logits: torch.Tensor,
                       mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Decodes the head and head tag predictions by decoding the unlabeled arcs
        independently for each word and then again, predicting the head tags of
        these greedily chosen arcs independently.

        Parameters
        ----------
        arc_scores : ``torch.Tensor``, required.
            A tensor of shape (batch_size, sequence_length, sequence_length) used to generate
            a distribution over attachments of a given word to all other words.
        arc_tag_logits : ``torch.Tensor``, required.
            A tensor of shape (batch_size, sequence_length, sequence_length, num_tags) used to
            generate a distribution over tags for each arc.
        mask : ``torch.Tensor``, required.
            A mask of shape (batch_size, sequence_length).

        Returns
        -------
        arc_probs : ``torch.Tensor``
            A tensor of shape (batch_size, sequence_length, sequence_length) representing the
            probability of an arc being present for this edge.
        arc_tag_probs : ``torch.Tensor``
            A tensor of shape (batch_size, sequence_length, sequence_length, sequence_length)
            representing the distribution over edge tags for a given edge.
        """
        # Mask the diagonal, because we don't self edges.
        inf_diagonal_mask = torch.diag(arc_scores.new(mask.size(1)).fill_(-numpy.inf))
        arc_scores = arc_scores + inf_diagonal_mask
        # shape (batch_size, sequence_length, sequence_length, num_tags)
        arc_tag_logits = arc_tag_logits + inf_diagonal_mask.unsqueeze(0).unsqueeze(-1)
        # Mask padded tokens, because we only want to consider actual word -> word edges.
        minus_mask = (1 - mask).byte().unsqueeze(2)
        arc_scores.masked_fill_(minus_mask, -numpy.inf)
        arc_tag_logits.masked_fill_(minus_mask.unsqueeze(-1), -numpy.inf)
        # shape (batch_size, sequence_length, sequence_length)
        arc_probs = arc_scores.sigmoid()
        # shape (batch_size, sequence_length, sequence_length, num_tags)
        arc_tag_probs = torch.nn.functional.softmax(arc_tag_logits, dim=-1)
        return arc_probs, arc_tag_probs
Beispiel #19
0
def build_model(params, with_dis):
    """
    Build all components of the model.
    """
    # source embeddings
    src_dico, _src_emb = load_external_embeddings(params, source=True)
    params.src_dico = src_dico
    src_emb = nn.Embedding(len(src_dico), params.emb_dim, sparse=True)
    src_emb.weight.data.copy_(_src_emb)

    # target embeddings
    if params.tgt_lang:
        tgt_dico, _tgt_emb = load_external_embeddings(params, source=False)
        params.tgt_dico = tgt_dico
        tgt_emb = nn.Embedding(len(tgt_dico), params.emb_dim, sparse=True)
        tgt_emb.weight.data.copy_(_tgt_emb)
    else:
        tgt_emb = None

    # mapping
    mapping = nn.Linear(params.emb_dim, params.emb_dim, bias=False)
    if getattr(params, 'map_id_init', True):
        mapping.weight.data.copy_(torch.diag(torch.ones(params.emb_dim)))

    # discriminator
    discriminator = Discriminator(params) if with_dis else None

    # cuda
    if params.cuda:
        src_emb.cuda()
        if params.tgt_lang:
            tgt_emb.cuda()
        mapping.cuda()
        if with_dis:
            discriminator.cuda()

    # normalize embeddings
    normalize_embeddings(src_emb.weight.data, params.normalize_embeddings)
    if params.tgt_lang:
        normalize_embeddings(tgt_emb.weight.data, params.normalize_embeddings)

    return src_emb, tgt_emb, mapping, discriminator
Beispiel #20
0
def th_corrcoef(x):
    """
    mimics np.corrcoef
    """
    # calculate covariance matrix of rows
    mean_x = th.mean(x, 1)
    xm = x.sub(mean_x.expand_as(x))
    c = xm.mm(xm.t())
    c = c / (x.size(1) - 1)

    # normalize covariance matrix
    d = th.diag(c)
    stddev = th.pow(d, 0.5)
    c = c.div(stddev.expand_as(c))
    c = c.div(stddev.expand_as(c).t())

    # clamp between -1 and 1
    c = th.clamp(c, -1.0, 1.0)

    return c
Beispiel #21
0
def orthogonal_(tensor, gain=1):
    r"""Fills the input `Tensor` with a (semi) orthogonal matrix, as
    described in "Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks" - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
        gain: optional scaling factor

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.orthogonal_(w)
    """
    if tensor.ndimension() < 2:
        raise ValueError("Only tensors with 2 or more dimensions are supported")

    rows = tensor.size(0)
    cols = tensor[0].numel()
    flattened = tensor.new(rows, cols).normal_(0, 1)

    if rows < cols:
        flattened.t_()

    # Compute the qr factorization
    q, r = torch.qr(flattened)
    # Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf
    d = torch.diag(r, 0)
    ph = d.sign()
    q *= ph

    if rows < cols:
        q.t_()

    with torch.no_grad():
        tensor.view_as(q).copy_(q)
        tensor.mul_(gain)
    return tensor
Beispiel #22
0
    def __init__(self,
                 position,
                 look_at,
                 up,
                 fov,
                 clip_near,
                 resolution,
                 cam_to_ndc = None,
                 fisheye = False):
        assert(position.dtype == torch.float32)
        assert(len(position.shape) == 1 and position.shape[0] == 3)
        assert(look_at.dtype == torch.float32)
        assert(len(look_at.shape) == 1 and look_at.shape[0] == 3)
        assert(up.dtype == torch.float32)
        assert(len(up.shape) == 1 and up.shape[0] == 3)
        if fov is not None:
            assert(fov.dtype == torch.float32)
            assert(len(fov.shape) == 1 and fov.shape[0] == 1)
        assert(isinstance(clip_near, float))

        self.position = position
        self.look_at = look_at
        self.up = up
        self._fov = fov
        # self.cam_to_world = transform.gen_look_at_matrix(position, look_at, up)
        # self.world_to_cam = torch.inverse(self.cam_to_world).contiguous()
        if cam_to_ndc is None:
            fov_factor = 1.0 / torch.tan(transform.radians(0.5 * fov))
            o = torch.ones([1], dtype=torch.float32)
            diag = torch.cat([fov_factor, fov_factor, o], 0)
            self._cam_to_ndc = torch.diag(diag)
        else:
            self._cam_to_ndc = cam_to_ndc
        self.ndc_to_cam = torch.inverse(self.cam_to_ndc)
        self.clip_near = clip_near
        self.resolution = resolution
        self.fisheye = fisheye
Beispiel #23
0
 def get_probs(self, x, y):
     output = self.model(self.normalize(x.cuda())).cpu()
     probs = torch.index_select(F.softmax(output, dim=-1).data, 1, y)
     return torch.diag(probs)
Beispiel #24
0
def vector_to_diag(not_informed_fraction, beta):
    not_informed_fraction_diag = torch.zeros_like(beta)
    for s in range(beta.shape[0]):
        not_informed_fraction_diag[s] = torch.diag(
            not_informed_fraction[s].squeeze())
    return not_informed_fraction_diag
Beispiel #25
0
 def forward(self, state):
     mu = self.model(state)
     return MultivariateNormal(mu, torch.diag(self.log_std.exp()))
def quadratic_gaussian(x, mu, S):
    matrix = torch.mm(torch.mm(x - mu, S), (x - mu).T)
    matrix *= 0.5
    return torch.diag(matrix)
Beispiel #27
0
def run_seis_information_new(T, G, S, I, migration_I, migration_E, nu, mu, d,
                             beta, N, alpha_fast, alpha_slow, E,
                             beta_information, nu_max):
    '''
    Runs the linearized SEIS model, returning the total number of infected agents
    summed over all time steps.
    '''
    #read in for first period of F, informed
    #nu_sq = np.loadtxt('ann2018_clearanceProb.csv.csv', delimiter=',', skiprows=1)
    #nu_sq[np.isnan(nu_sq)] = 0
    #nu_sq = nu_sq.mean(axis = 0)
    #nu_sq = torch.from_numpy(nu_sq)

    #duplicate these variables along an additional axis to match the batch size
    beta = beta.expand_as(G)
    informed = nu.view(len(nu), 1)
    informed = informed.expand(beta.shape[0], *informed.shape)
    nu = torch.diag(1 - nu).expand_as(beta)
    num_samples = G.shape[0]
    #keep track of infected, latent, and informed at each time step
    all_I = torch.zeros(T, num_samples, beta.shape[1], 1).double()
    all_E = torch.zeros(T, num_samples, E.shape[1], E.shape[2]).double()
    all_F = torch.zeros_like(all_I).double()
    all_I[0] = I[0]
    all_E[0] = E[0]
    #all_I[0] = I[30]
    #all_E[0] = E[30]

    all_F[0] = informed

    #run the main loop for the linearized disease dynamics
    for t in range(1, T):
        #update nu with new information spread
        not_informed_fraction = 1 - informed
        not_informed_fraction_diag = vector_to_diag(not_informed_fraction,
                                                    beta)
        #constant scaling the beta for information spread
        informed = not_informed_fraction_diag @ beta_information @ informed + informed
        #print('here is info beta mat')
        #print(beta_information)
        #print('here is informed')
        #print(informed)
        #debug sze
        nu = nu_max * informed
        nu = vector_to_diag(1 - nu, beta)

        #infections
        new_infections = S[t - 1] @ mu @ beta @ N[t - 1] @ I
        new_infections_active = alpha_fast @ new_infections
        new_infections_latent = new_infections - new_infections_active
        E = mu @ E
        activations = alpha_slow @ E
        E = E - activations
        E += new_infections_latent
        E = G @ E + migration_E[
            t]  #CHANGING TO USING THE LAST MIGRATION PERIOD
        #E = G @ E + migration_E[30]

        old_infections = nu @ d @ I
        I = new_infections_active + old_infections + activations
        I = G @ I + migration_I[
            t]  #CHANGING TO USING THE LAST MIGRATION PERIOD
        #I = G @ I + migration_I[30]

        #return E, I, F by time and age group
        #mean across samples
        all_I[t] = I
        all_E[t] = E
        all_F[t] = informed

    #print(all_I)
    return all_I, all_E, all_F
Beispiel #28
0
 def decoder(self, z):
     mean = self.decoder_fc(z)
     cov_mat = torch.diag(self.std**2)
     dist = MultivariateNormal(mean, cov_mat)
     return dist
Beispiel #29
0
def loss_HardNet(anchor, positive, anchor_swap = False, anchor_ave = False,\
        margin = 2.0, batch_reduce = 'min', loss_type = "triplet_margin"):
    """HardNet margin loss - calculates loss based on distance matrix based on positive distance and closest negative distance.
    """

    assert anchor.size() == positive.size(
    ), "Input sizes between positive and negative must be equal."
    assert anchor.dim() == 2, "Inputd must be a 2D matrix."
    eps = 1e-8
    dist_matrix = distance_matrix_vector(anchor, positive) + eps
    #if args.cuda:
    eye = torch.autograd.Variable(torch.eye(dist_matrix.size(1))).cuda()
    #else:
    #eye = torch.autograd.Variable(torch.eye(dist_matrix.size(1)))

    # steps to filter out same patches that occur in distance matrix as negatives
    pos1 = torch.diag(dist_matrix)
    dist_without_min_on_diag = dist_matrix + eye * 10
    mask = (dist_without_min_on_diag.ge(0.008).float() - 1.0) * (-1)
    mask = mask.type_as(dist_without_min_on_diag) * 10
    dist_without_min_on_diag = dist_without_min_on_diag + mask
    if batch_reduce == 'min':
        idx_neg = torch.min(dist_without_min_on_diag, 1)[1].tolist()
        neg = positive[idx_neg[0]]
        for i in range(1, len(idx_neg)):
            newneg = positive[idx_neg[i]]
            neg = torch.cat((neg, newneg), 0)
        neg = neg.view(-1, 128)
        #min_neg = torch.min(dist_without_min_on_diag,1)[0]
        if anchor_swap:
            min_neg2 = torch.min(dist_without_min_on_diag, 0)[0]
            min_neg = torch.min(min_neg, min_neg2)
        if False:
            dist_matrix_a = distance_matrix_vector(anchor, anchor) + eps
            dist_matrix_p = distance_matrix_vector(positive, positive) + eps
            dist_without_min_on_diag_a = dist_matrix_a + eye * 10
            dist_without_min_on_diag_p = dist_matrix_p + eye * 10
            min_neg_a = torch.min(dist_without_min_on_diag_a, 1)[0]
            min_neg_p = torch.t(torch.min(dist_without_min_on_diag_p, 0)[0])
            min_neg_3 = torch.min(min_neg_p, min_neg_a)
            min_neg = torch.min(min_neg, min_neg_3)
            print(min_neg_a)
            print(min_neg_p)
            print(min_neg_3)
            print(min_neg)
        #min_neg = min_neg
        pos = pos1
    elif batch_reduce == 'average':
        pos = pos1.repeat(anchor.size(0)).view(-1, 1).squeeze(0)
        min_neg = dist_without_min_on_diag.view(-1, 1)
        if anchor_swap:
            min_neg2 = torch.t(dist_without_min_on_diag).contiguous().view(
                -1, 1)
            min_neg = torch.min(min_neg, min_neg2)
        min_neg = min_neg.squeeze(0)
    elif batch_reduce == 'random':
        idxs = torch.autograd.Variable(
            torch.randperm(anchor.size()[0]).long()).cuda()
        min_neg = dist_without_min_on_diag.gather(1, idxs.view(-1, 1))
        if anchor_swap:
            min_neg2 = torch.t(dist_without_min_on_diag).gather(
                1, idxs.view(-1, 1))
            min_neg = torch.min(min_neg, min_neg2)
        min_neg = torch.t(min_neg).squeeze(0)
        pos = pos1
    else:
        print('Unknown batch reduce mode. Try min, average or random')
        sys.exit(1)
    if loss_type == "triplet_margin":
        loss = F.triplet_margin_loss(anchor, positive, neg, margin, True)
        #loss = torch.clamp(margin + pos - min_neg, min=0.0)
    elif loss_type == 'softmax':
        exp_pos = torch.exp(2.0 - pos)
        exp_den = exp_pos + torch.exp(2.0 - min_neg) + eps
        loss = -torch.log(exp_pos / exp_den)
    elif loss_type == 'contrastive':
        loss = torch.clamp(margin - min_neg, min=0.0) + pos
    else:
        print('Unknown loss type. Try triplet_margin, softmax or contrastive')
        sys.exit(1)
    loss = torch.mean(loss)
    return loss
def gen_adj(A):
    D = torch.pow(A.sum(1).float(), -0.5)
    D = torch.diag(D)
    adj = torch.matmul(torch.matmul(A, D).t(), D)
    return adj
                    e = A[r, c] - U[r, :] @ V[c, :].t()
                    U[r, :] = U[r, :] + lr * e * V[c, :]
                    V[c, :] = V[c, :] + lr * e * U[r, :]
    return U, V


if __name__ == '__main__':

    test = torch.tensor([[0.3374, 0.6005, 0.1735], [3.3359, 0.0492, 1.8374],
                         [2.9407, 0.5301, 2.2620]])

    U, V = sgd_factorise(test, 2)
    loss = torch.nn.functional.mse_loss(U @ V.t(), test, reduction='sum')
    print(f"Approximation {[email protected]()}")
    print(f'Loss  is {loss}')\

    U, S, V = truncatedSVD(test)
    reconstruction = U @ torch.diag(S) @ V.t()
    loss = torch.nn.functional.mse_loss(reconstruction, test, reduction='sum')
    print(f"Approximation \n {reconstruction}")
    print(f'Loss  is {loss}')

    test_2 = torch.tensor([[0.3374, 0.6005, 0.1735], [0, 0.0492, 1.8374],
                           [2.9407, 0, 2.2620]])

    mask = torch.tensor([[1, 1, 1], [0, 1, 1], [1, 0, 1]])

    U, V = sgd_factorise_masked(test_2, mask, 2)
    loss = torch.nn.functional.mse_loss(U @ V.t(), test, reduction='sum')
    print(f"Approximation \n {U @ V.t()}")
    print(f'Loss  is {loss}')
Beispiel #32
0
def grassmanGeodesic(X,Y,t):
    svd_term = [email protected](X.t()@Y)-X
    U,s,V = to.svd(svd_term)
    theta = to.atan(s).float()
    qr_term = X@[email protected](to.cos(theta*t))[email protected](to.sin(theta*t))
    return qr_term
Beispiel #33
0
def main(cfg):
    global BN_MOMENTUM_INIT
    global BN_MOMENTUM_MAX
    global BN_DECAY_STEP
    global BN_DECAY_RATE
    global BASE_LEARNING_RATE
    global LR_DECAY_STEPS
    global LR_DECAY_RATES
    BN_MOMENTUM_INIT = 0.5
    BN_MOMENTUM_MAX = 0.001
    BN_DECAY_STEP = cfg.opt.bn_decay_step
    BN_DECAY_RATE = cfg.opt.bn_decay_rate
    BASE_LEARNING_RATE = cfg.opt.learning_rate
    LR_DECAY_STEPS = [int(x) for x in cfg.opt.lr_decay_steps.split(',')]
    LR_DECAY_RATES = [float(x) for x in cfg.opt.lr_decay_rates.split(',')]
        
    train_dataset = ScanNetXYZProbMultiDataset(cfg, training=True, augment=cfg.augment)
    val_dataset = ScanNetXYZProbMultiDataset(cfg, training=False, augment=False)
    
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, collate_fn=collate_fn, num_workers=cfg.num_workers, drop_last=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, collate_fn=collate_fn, shuffle=True, batch_size=1, num_workers=cfg.num_workers)

    logger.info('Start training...')
    
    nclasses = 9
    # each class predict xyz and scale independently
    model = MinkUNet34C(6 if cfg.use_xyz else 3, 6 * nclasses + nclasses + 1)
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=cfg.opt.learning_rate,
        weight_decay=cfg.weight_decay
    )
    bn_lbmd = lambda it: max(BN_MOMENTUM_INIT * BN_DECAY_RATE**(int(it / BN_DECAY_STEP)), BN_MOMENTUM_MAX)
    bnm_scheduler = BNMomentumScheduler(model, bn_lambda=bn_lbmd, last_epoch=cfg.start_epoch-1)

    hv = HoughVoting(cfg.scannet_res)
    
    obj_criterion = torch.nn.CrossEntropyLoss()
    model = model.cuda()
    xyz_weights = torch.tensor([float(x) for x in cfg.xyz_component_weights.split(',')]).cuda()
    
    meter = AverageMeter()
    losses = {}
    for epoch in range(cfg.start_epoch, cfg.max_epoch + 1):
        # Training
        adjust_learning_rate(optimizer, epoch)
        bnm_scheduler.step() # decay BN momentum
        
        model.train()
        meter.reset()
        
        with tqdm(enumerate(train_dataloader)) as t:
            for i, data in t:
                optimizer.zero_grad()
                _, scan_points, scan_feats, scan_xyz_labels, scan_scale_labels, scan_class_labels = data
                
                feats = scan_feats.reshape(-1, 6 if cfg.use_xyz else 3) # recenter to [-1, 1] ?
                feats[:, -3:] = feats[:, -3:] * 2. - 1.
                scan_input = ME.SparseTensor(feats, scan_points, device='cuda')
                scan_output = model(scan_input)
                
                class_label_idx = scan_class_labels.cuda().unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 3)
                class_label_idx[class_label_idx < 0] = 0  # since we have mask to filter out, just set to zero here
                class_label_idx[class_label_idx == nclasses] = 0
                scan_output_xyz = torch.gather(scan_output.F[:, :3 * nclasses].reshape(-1, nclasses, 3), 1, class_label_idx)[:, 0]
                scan_output_scale = torch.gather(scan_output.F[:, 3 * nclasses:6 * nclasses].reshape(-1, nclasses, 3), 1, class_label_idx)[:, 0]
                scan_output_class = scan_output.F[:, 6 * nclasses:]

                mask = (scan_class_labels < nclasses) & (0 <= scan_class_labels)
                
                loss_xyz = torch.zeros(()).cuda()
                loss_scale = torch.zeros(()).cuda()
                loss_class = torch.zeros(()).cuda()
                if torch.any(mask):
                    if cfg.log_scale:
                        scan_scale_target = torch.log(scan_scale_labels[mask].cuda())
                    else:
                        scan_scale_target = scan_scale_labels[mask].cuda()
                        
                    loss_scale = torch.mean((scan_output_scale[mask] - scan_scale_target) ** 2 * xyz_weights)
                    loss_xyz = torch.mean((scan_output_xyz[mask] - scan_xyz_labels[mask].cuda()) ** 2 * xyz_weights)  # only optimize xyz when there are objects
                    loss_class = obj_criterion(scan_output_class, scan_class_labels.cuda())
                    
                    loss_xyz *= cfg.xyz_factor
                    loss_scale *= cfg.scale_factor
                    
                    losses['loss_xyz'] = loss_xyz
                    losses['loss_scale'] = loss_scale
                    losses['loss_class'] = loss_class
                    
                loss = torch.sum(torch.stack(list(losses.values())))
                loss.backward()
                # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                meter.update(loss.item())
                
                t.set_postfix(loss=meter.avg, **dict([(k, v.item()) for (k, v) in losses.items()]))
                optimizer.step()
        
        if epoch % 10 == 0:
            torch.save(model.state_dict(), 'epoch{}.pth'.format(epoch))
        
        if epoch % 10 == 0:
            # validation
            model.eval()
            meter.reset()
            logger.info('epoch {} validation'.format(epoch))
            pred_map_cls = {}
            gt_map_cls = {}
            cnt = 0
            for scan_ids, scan_points, scan_feats, scan_xyz_labels, scan_scale_labels, scan_class_labels in tqdm(val_dataloader):
                cnt += 1
                id_scan = scan_ids[0]
                
                feats = scan_feats.reshape(-1, 6 if cfg.use_xyz else 3)  # recenter to [-1, 1]?
                feats[:, -3:] = feats[:, -3:] * 2. - 1.
                scan_input = ME.SparseTensor(feats, scan_points, device='cuda')
                with torch.no_grad():
                    scan_output = model(scan_input)
                
                scan_output_xyz = scan_output.F[:, :3 * nclasses]
                scan_output_scale = scan_output.F[:, 3 * nclasses:6 * nclasses]
                scan_output_class = scan_output.F[:, 6 * nclasses:]
                
                class_label_idx = scan_output_class.argmax(-1).unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 3)
                class_label_idx[class_label_idx == nclasses] = 0
                scan_output_xyz = torch.gather(scan_output_xyz.reshape(-1, nclasses, 3), 1, class_label_idx)[:, 0]
                scan_output_scale = torch.gather(scan_output_scale.reshape(-1, nclasses, 3), 1, class_label_idx)[:, 0]

                mask = (scan_class_labels < nclasses) & (0 <= scan_class_labels)
                
                loss_xyz = torch.zeros(()).cuda()
                loss_scale = torch.zeros(()).cuda()
                loss_class = torch.zeros(()).cuda()
                
                if cfg.log_scale:
                    scan_scale_target = torch.log(scan_scale_labels[mask].cuda())
                else:
                    scan_scale_target = scan_scale_labels[mask].cuda()
                    
                loss_scale = torch.mean((scan_output_scale[mask] - scan_scale_target) ** 2 * xyz_weights)
                loss_xyz = torch.mean((scan_output_xyz[mask] - scan_xyz_labels[mask].cuda()) ** 2 * xyz_weights)  # only optimize xyz when there are objects
                loss_class = obj_criterion(scan_output_class, scan_class_labels.cuda())
                
                loss_xyz *= cfg.xyz_factor
                loss_scale *= cfg.scale_factor
                
                losses['loss_xyz'] = loss_xyz
                losses['loss_scale'] = loss_scale
                losses['loss_class'] = loss_class
                
                curr_points = scan_points[:, 1:]

                xyz_pred = scan_output_xyz
                if cfg.log_scale:
                    scale_pred = torch.exp(scan_output_scale)
                else:
                    scale_pred = scan_output_scale
                class_pred = torch.argmax(scan_output_class[..., :-1], dim=-1)
                prob_pred = torch.max(torch.softmax(scan_output_class, dim=-1)[..., :-1], dim=-1)[0]

                with torch.no_grad():
                    grid_obj, grid_rot, grid_scale = hv(curr_points.to('cuda') * cfg.scannet_res, xyz_pred.contiguous(), scale_pred.contiguous(), prob_pred.contiguous())

                map_scene = []            
                boxes = []
                scores = []
                probs = []
                classes = []
                scan_points = curr_points.to('cuda') * cfg.scannet_res
                corners = torch.stack([torch.min(scan_points, 0)[0], torch.max(scan_points, 0)[0]])
                l, h, w = 2, 2, 2
                bbox_raw = torch.from_numpy(np.array([[l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], [h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2]]).T).float()
                while True:
                    cand = torch.stack(unravel_index(torch.argmax(grid_obj), grid_obj.shape))
                    cand_world = torch.tensor([corners[0, 0] + cfg.scannet_res * cand[0], corners[0, 1] + cfg.scannet_res * cand[1], corners[0, 2] + cfg.scannet_res * cand[2]]).cuda()

                    if grid_obj[cand[0], cand[1], cand[2]].item() < thresh_high:
                        break
                    
                    grid_obj[max(cand[0]-elimination,0):cand[0]+elimination+1, max(cand[1]-elimination,0):cand[1]+elimination+1, max(cand[2]-elimination,0):cand[2]+elimination+1] = 0
                    
                    rot_vec = grid_rot[cand[0], cand[1], cand[2]]
                    rot = torch.atan2(rot_vec[1], rot_vec[0])
                    rot_mat_full = torch.tensor([[torch.cos(rot), 0, -torch.sin(rot)], [0, 1, 0], [torch.sin(rot), 0, torch.cos(rot)]]).cuda()
                    scale_full = grid_scale[cand[0], cand[1], cand[2]]
                    
                    # fast filtering
                    bbox = (rot_mat_full @ torch.diag(scale_full) @ bbox_raw.cuda().T).T
                    bounding_vol = (torch.stack([torch.min(bbox, 0)[0], torch.max(bbox, 0)[0]]) / cfg.scannet_res).int()
                    cand_coords = torch.stack(torch.meshgrid(torch.arange(bounding_vol[0, 0], bounding_vol[1, 0] + 1), torch.arange(bounding_vol[0, 1], bounding_vol[1, 1] + 1), torch.arange(bounding_vol[0, 2], bounding_vol[1, 2] + 1)), -1).reshape(-1, 3).cuda()
                    cand_coords = cand_coords + cand
                    cand_coords = torch.max(torch.min(cand_coords, torch.tensor(grid_obj.shape).cuda() - 1), torch.tensor([0, 0, 0]).cuda())

                    coords_inv = (((cand_coords - cand) * cfg.scannet_res) @ rot_mat_full) / scale_full
                    bbox_mask = (-1 < coords_inv[:, 0]) & (coords_inv[:, 0] < 1) \
                                    & (-1 < coords_inv[:, 1]) & (coords_inv[:, 1] < 1) \
                                        & (-1 < coords_inv[:, 2]) & (coords_inv[:, 2] < 1)
                    bbox_coords = cand_coords[bbox_mask]
                    
                    coords_inv_world = ((scan_points - cand_world) @ rot_mat_full) / scale_full
                    bbox_mask_world = (-1 < coords_inv_world[:, 0]) & (coords_inv_world[:, 0] < 1) \
                            & (-1 < coords_inv_world[:, 1]) & (coords_inv_world[:, 1] < 1) \
                            & (-1 < coords_inv_world[:, 2]) & (coords_inv_world[:, 2] < 1)

                    # back project elimination: current off   
                    # prob_delta = torch.zeros_like(prob_pred)
                    # prob_delta[bbox_mask_world] = prob_pred[bbox_mask_world]
                    # if not torch.all(prob_delta == 0):
                    #     grid_obj_delta, _, _ = hv(scan_points.cuda(), xyz_pred.contiguous(), scale_pred.contiguous(), prob_delta.contiguous())
                    #     grid_obj -= grid_obj_delta

                    grid_obj[bbox_coords[:, 0], bbox_coords[:, 1], bbox_coords[:, 2]] = 0
                            
                    mask = prob_pred[bbox_mask_world] > 0.3
                    if torch.sum(mask) < valid_ratio * torch.sum(bbox_mask_world) or torch.sum(bbox_mask_world) < thresh_low:
                        continue
                    
                    gt_coords = coords_inv_world[bbox_mask_world][mask]
                    error = torch.mean(torch.norm(xyz_pred[bbox_mask_world][mask] - gt_coords, dim=-1) * prob_pred[bbox_mask_world][mask]).item()
                    
                    if error > 0.3:
                        continue

                    elems, counts = torch.unique(class_pred[bbox_mask_world][mask], return_counts=True)
                    best_class_idx = elems[torch.argmax(counts)].item()
                    best_class = idx2name[best_class_idx]
                    
                    probmax = torch.max(prob_pred[bbox_mask_world])
                    bbox = (rot_mat_full @ torch.diag(scale_full) @ bbox_raw.cuda().T).T + cand_world
                    boxes.append(bbox.cpu().numpy())
                    scores.append(probmax.item())
                    probs.append(probmax.item())
                    classes.append(best_class_idx)
                    
                boxes = np.array(boxes)
                scores = np.array(scores)
                probs = np.array(probs)
                classes = np.array(classes)

                if len(classes) > 0:
                    for i in range(nclasses):
                        if (classes == i).sum() > 0:
                            boxes_cls = boxes[classes == i]
                            scores_cls = scores[classes == i]
                            probs_cls = probs[classes == i]
                            pick = nms(boxes_cls, scores_cls, 0.3)
                            for j in pick:
                                map_scene.append((idx2name[i], boxes_cls[j], probs_cls[j]))
                
                pred_map_cls[id_scan] = map_scene
                
                # read ground truth
                lines = open(os.path.join(cfg.data.gt_path, '{}.txt'.format(id_scan))).read().splitlines()
                map_scene = []
                for line in lines:
                    tx, ty, tz, ry, sx, sy, sz = [float(v) for v in line.split(' ')[:7]]
                    category = line.split(' ')[-1]
                    bbox = (np.array([[np.cos(ry), 0, -np.sin(ry)], [0, 1, 0], [np.sin(ry), 0, np.cos(ry)]]) @ np.diag([sx, sy, sz]) @ bbox_raw.numpy().T).T + np.array([tx, ty, tz])
                    bbox_mat = np.eye(4)
                    bbox_mat[:3, :3] = np.array([[np.cos(ry), 0, -np.sin(ry)], [0, 1, 0], [np.sin(ry), 0, np.cos(ry)]]) @ np.diag([sx, sy, sz])
                    bbox_mat[:3, 3] = np.array([tx, ty, tz])
                    map_scene.append((category, bbox))
                
                gt_map_cls[id_scan] = map_scene
                
                loss = torch.sum(torch.stack(list(losses.values())))
                
                meter.update(loss.item())
                losses_numeral = dict([(k, v.item()) for (k, v) in losses.items()])
                logger.info(', '.join([k + ': {' + k + '}' for k in losses_numeral.keys()]).format(**losses_numeral))
            
            for thresh in [0.25, 0.5]:
                print(thresh)
                ret_dict = compute_map(pred_map_cls, gt_map_cls, thresh)
                if cfg.category != 'all':
                    logger.info('{} Recall: {}'.format(cfg.category, ret_dict['{} Recall'.format(cfg.category)]))
                    logger.info('{} Average Precision: {}'.format(cfg.category, ret_dict['{} Average Precision'.format(cfg.category)]))
                else:
                    for k in range(nclasses):
                        logger.info('{} Recall: {}'.format(idx2name[k], ret_dict['{} Recall'.format(idx2name[k])]))
                        logger.info('{} Average Precision: {}'.format(idx2name[k], ret_dict['{} Average Precision'.format(idx2name[k])]))
                    logger.info('mean Average Precision: {}'.format(ret_dict['mAP']))
 def _init_G(self, factor, module):
     """Initialize memory for factor G and its eigendecomp"""
     self.m_G[module] = torch.diag(factor.new(factor.shape[0]).fill_(1))
     self.m_dG[module] = factor.new_zeros(factor.shape[0])
     self.m_QG[module] = factor.new_zeros(factor.shape)
def add_value_to_diagonal(X, value):
    return X.add_(torch.diag(X.new(X.shape[0]).fill_(value)))
Beispiel #36
0
 def _setweights(self):
     w_hh = getattr(self, 'weight_hh_l0')
     w_hr = diag(w_hh[0, :])
     w_hz = diag(w_hh[1, :])
     w_hn = diag(w_hh[2, :])
     setattr(self.module, 'weight_hh_l0', cat([w_hr, w_hz, w_hn], dim=1))
Beispiel #37
0
    def forward(self, text_inputs, mask_input, len_seq, len_sents, tid, len_para=None, mode=""):
        # init hidden layers (teacher forcing)
        last_hid = self.init_hidden_layers(batch_size=self.batch_size, num_layers=self.num_layers,
                                           rnn_cell_size=self.rnn_cell_size)
        last_cell = self.init_hidden_layers(batch_size=self.batch_size, num_layers=self.num_layers,
                                            rnn_cell_size=self.rnn_cell_size)

        ## Stage 1: Embedding representation
        #flat_sent_input = text_inputs.view(-1, self.max_len_sent) #
        #print(text_inputs.shape)
        x_input = self.x_embed(text_inputs)  # 4dim, (batch_size, num_sents, len_sent, embed_size)

        ## Stage 2: Encoding context via LSTM from input sent
        hid_encoded = []  # list of LSTM states from sentences, denoted "H" in the paper
        hid_mask = []  # list for actual length of hidden layer
        mask = mask_input.view(text_inputs.shape)
        for ind_sent in range(self.max_num_sents):
            # embedding representation
            sent_text = text_inputs[:, ind_sent, :].contiguous()
            sent_x_inputs = x_input[:, ind_sent, :].contiguous()  # e.g., 1st sentence of all docs in batch; (batch_size, len_sent, embed_size)
            cur_sents_mask = mask_input[:, ind_sent, :]
            
            len_seq_sorted, ind_len_sorted = torch.sort(len_seq, descending=True)  # ind_len_sorted: (batch_size, num_sents)

            # x_input_sorted = self.sort_by_2d(sent_x_inputs, ind_len_sorted)
            sent_x_input_sorted = sent_x_inputs[ind_len_sorted]

            ## encoding context via LSTM from input sentneces
            # sorting last_hid and last_cell to feed to next training (not needed in nightly version)
            last_hid = self.sort_hid(last_hid, ind_len_sorted) # sort for RNN training
            last_cell = self.sort_hid(last_cell, ind_len_sorted) # sort for RNN training

            # encoding sentence via RNN
            self.lstm.flatten_parameters()
            sent_lstm_out, (last_hid, last_cell) = self.lstm(sent_x_input_sorted, (last_hid, last_cell)) # out: (batch_size, len_sent, cell_size)

            # # Note that, in the nightly version, instead, "enforce_sorted=False" for the automatic sorting ()
            # x_input_packed = pack_padded_sequence(sent_x_input_sorted, lengths=len_seq_sorted, batch_first=True)
            # # x_input_packed = pack_padded_sequence(sent_x_input_sorted, cur_len_sent_sorted, batch_first=True, enforce_sorted=False)
            # out_packed, (last_hid, last_cell) = self.lstm(x_input_packed, (last_hid, last_cell))
            # sent_lstm_out, out_len = pad_packed_sequence(out_packed, batch_first=True)

            # applying mask to last_hid and last_cell by only actual length of sentence
            _, ind_origin = torch.sort(ind_len_sorted)

            sent_lstm_out = sent_lstm_out[ind_origin]
            last_hid = self.sort_hid(last_hid, ind_origin)  # return to origin index for masking
            last_cell = self.sort_hid(last_cell, ind_origin)  # return to origin index for masking

            sent_lstm_out = sent_lstm_out * cur_sents_mask.unsqueeze(2)

            # store encoded sentence
            hid_encoded.append(sent_lstm_out)
        # end for ind_sent

        ## Stage 3: get the most similar hidden states
        vec_close_states = [] # (max_num_sents, batch_size, cell_size)
        for i in range(len(hid_encoded)-1):
            encoded_state_i = hid_encoded[i] # encoded from current sentence
            encoded_state_j = hid_encoded[i+1] # encoded from next sentence

            # get similarity (actually, original paper describes that it is just matrix multiplication)
            sim_states = torch.bmm(encoded_state_i, encoded_state_j.transpose(2,1))  # matmul corresponding to the paper
            sim_states = sim_states.clamp(min=0)  # filter for positive (relu), because it is similarity
            sim_states = self.dropout_layer(sim_states)

            # select two states with maximum similarity
            vec_H = []
            ind_max_sim = torch.argmax(sim_states.view(sim_states.shape[0], -1), dim=1)  # sim index matrix between all states
            for b_id in range(sim_states.shape[0]):
                val_ind = ind_max_sim[b_id]
                max_ind_i = math.floor(val_ind / sim_states.shape[2])
                max_ind_j = val_ind % sim_states.shape[2]

                max_state_i = encoded_state_i[b_id, max_ind_i, :]
                max_state_j = encoded_state_j[b_id, max_ind_j, :]

                vec_ij = (max_state_i + max_state_j) / 2
                vec_H.append(vec_ij)
            # end for batch_id

            vec_H = torch.stack(vec_H) # convert to torch
            vec_close_states.append(vec_H)
        # end for range(len(hid_encoded)), stage3

        ## Stage 4: produce coherence vector
        vec_coh = [] # final output vector represents coherence
        for i in range(len(vec_close_states)-1): # (max_num_sents, batch_size, cell_size)
            vec_u = vec_close_states[i] # (batch_size, cell_size)
            vec_v = vec_close_states[i+1]

            # get similarity (d) between vectors; paper describes that matrix multiplication with division by vector size
            dist_vec_states = torch.mm(vec_u, vec_v.transpose(1, 0))
            dist_states = torch.diag(dist_vec_states)  # they are already the most related states, thus only extract the sim value between them
            dist_states = dist_states / vec_u.shape[1]  # divide by vector size, i.e., cell size
            dist_states = dist_states.clamp(min=0)  # again, only positivity for similarity

            vec_coh.append(dist_states)
        # end for vec_close_states

        vec_coh = torch.stack(vec_coh)  # convert to torch variable from list, (num_sents-2, batch_size)
        vec_coh = vec_coh.permute(1, 0)  # transpose to shape for linear layer (batch * num_sents-2)
        vec_coh = self.dropout_layer(vec_coh)

        # applying convolutional layer and max_pooling
        vec_coh = vec_coh.unsqueeze(1) # nn.conv requires (mini_batch x in_channel x w)
        vec_coh = self.conv(vec_coh)
        vec_coh = self.leak_relu(vec_coh)
        vec_coh = vec_coh.squeeze(1)

        ##
        linear_out_coh = self.linear_1(vec_coh)
        linear_out_coh = self.tanh(linear_out_coh)
        #linear_out_coh = self.dropout_layer(linear_out_coh)
        coh_score = self.linear_out(linear_out_coh)
        
        if self.loss_type.lower() == "mseloss":
            coh_score = self.sigmoid(coh_score)


        outputs = []
        outputs.append(coh_score)

        # return coh_score
        return outputs
Beispiel #38
0
def compute_mmd(self,
                unbiaised=False,
                approximation='standard',
                shared_anchors=True,
                name=None,
                verbose=0,
                anchors_basis=None):

    self.verbosity(function_name='compute_mmd',
                   dict_of_variables={
                       'unbiaised': unbiaised,
                       'approximation': approximation,
                       'shared_anchors': shared_anchors,
                       'name': name
                   },
                   start=True,
                   verbose=verbose)

    if approximation == 'standard':
        m = self.compute_omega(sample='xy', quantization=False)
        K = self.compute_gram()
        if unbiaised:
            K.masked_fill_(torch.eye(K.shape[0], K.shape[0]).byte(), 0)
        mmd = torch.dot(mv(K, m), m)**2

    if approximation == 'nystrom' and shared_anchors:
        m = self.compute_omega(sample='xy', quantization=False)
        Up = self.spev['xy']['anchors'][anchors_basis][anchors_basis]['ev']
        Lp_inv2 = torch.diag(
            self.spev['xy']['anchors'][anchors_basis]['sp']**-(1 / 2))
        Pm = self.compute_centering_matrix(sample='xy', landmarks=True)
        Kmn = self.compute_kmn(sample='xy')
        psi_m = mv(Lp_inv2, mv(Up.T, mv(Pm, mv(Kmn, m))))
        mmd = torch.dot(psi_m, psi_m)**2

    if approximation == 'nystrom' and not shared_anchors:

        mx = self.compute_omega(sample='x', quantization=False)
        my = self.compute_omega(sample='y', quantization=False)
        Upx = self.spev['x']['anchors'][anchors_basis]['ev']
        Upy = self.spev['y']['anchors'][anchors_basis]['ev']
        Lpx_inv2 = torch.diag(
            self.spev['x']['anchors'][anchors_basis]['sp']**-(1 / 2))
        Lpy_inv2 = torch.diag(
            self.spev['y']['anchors'][anchors_basis]['sp']**-(1 / 2))
        Lpy_inv = torch.diag(
            self.spev['y']['anchors'][anchors_basis]['sp']**-1)
        Pmx = self.compute_centering_matrix(sample='x', landmarks=True)
        Pmy = self.compute_centering_matrix(sample='y', landmarks=True)
        Kmnx = self.compute_kmn(sample='x')
        Kmny = self.compute_kmn(sample='y')

        Km = self.compute_gram(sample='xy', landmarks=True)
        m1 = Kmnx.shape[0]
        m2 = Kmny.shape[0]
        Kmxmy = Km[:m1, m2:]

        psix_mx = mv(Lpx_inv2, mv(Upx.T, mv(Pmx, mv(Kmnx, mx))))
        psiy_my = mv(Lpy_inv2, mv(Upy.T, mv(Pmy, mv(Kmny, my))))
        Cpsiy_my = mv(Lpx_inv2,mv(Upx.T,mv(Pmx,mv(Kmxmy,\
            mv(Pmy,mv(Upy,mv(Lpy_inv,mv(Upy.T,mv(Pmy,mv(Kmny,my))))))))))
        mmd = torch.dot(psix_mx, psix_mx)**2 + torch.dot(
            psiy_my, psiy_my)**2 - 2 * torch.dot(psix_mx, Cpsiy_my)

    if approximation == 'quantization':
        mq = self.compute_omega(sample='xy', quantization=True)
        Km = self.compute_gram(sample='xy', landmarks=True)
        mmd = torch.dot(mv(Km, mq), mq)**2

    if name is None:
        name = f'{approximation}'
        if approximation == 'nystrom':
            name += 'shared' if shared_anchors else 'diff'

    self.dict_mmd[name] = mmd.item()

    self.verbosity(function_name='compute_mmd',
                   dict_of_variables={
                       'unbiaised': unbiaised,
                       'approximation': approximation,
                       'shared_anchors': shared_anchors,
                       'name': name
                   },
                   start=False,
                   verbose=verbose)
Beispiel #39
0
    def forward_with_log_data(
        self, maximizer_outputs: torch.Tensor,
        correspondence_outputs: torch.Tensor, inlier_labels: torch.Tensor,
        outlier_labels: torch.Tensor
    ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
        # maximizer_outputs: BxCx1x1 where B == C
        # correspondence_outputs: BxCx1x1 where B == C
        # If h and w are not 1 w.r.t. maximizer_outpus and correspondence_outputs,
        # the center values will be extracted.

        assert (maximizer_outputs.shape[0] % maximizer_outputs.shape[1] == 0)
        assert (maximizer_outputs.shape[2] == maximizer_outputs.shape[3])

        if maximizer_outputs.shape[2] != 1:
            center_px = (maximizer_outputs.shape[2] - 1) // 2
            maximizer_outputs = maximizer_outputs[:, :, center_px, center_px]
        if correspondence_outputs.shape[2] != 1:
            center_px = (correspondence_outputs.shape[2] - 1) // 2
            correspondence_outputs = correspondence_outputs[:, :, center_px,
                                                            center_px]

        maximizer_outputs = torch.sigmoid(
            maximizer_outputs.squeeze())  # BxCx1x1 -> BxC
        correspondence_outputs = torch.sigmoid(
            correspondence_outputs.squeeze())  # BxCx1x1 -> BxC

        # convert the label types so we can use torch.diag() on the labels
        if inlier_labels.dtype == torch.bool:
            inlier_labels = inlier_labels.to(torch.uint8)

        if outlier_labels.dtype == torch.bool:
            outlier_labels = outlier_labels.to(torch.uint8)

        corr_outlier_index_2d = torch.diag(outlier_labels)

        aligned_outlier_corr_outputs = correspondence_outputs[
            corr_outlier_index_2d]
        if aligned_outlier_corr_outputs.numel() == 0:
            outlier_correspondence_loss = None
        else:
            outlier_correspondence_loss = torch.sum(-1 * torch.log(
                torch.max(aligned_outlier_corr_outputs, self._epsilon)))

        # expand inlier_labels by num_patches_per_channel
        # inlier should begin every segment of (num patches per channel)
        num_patches_per_channel = maximizer_outputs.shape[
            0] // maximizer_outputs.shape[1]

        expanded_inlier_labels = torch.zeros(inlier_labels.shape[0] *
                                             num_patches_per_channel,
                                             dtype=torch.uint8,
                                             device=inlier_labels.device)
        maximum_patch_index = torch.arange(0, maximizer_outputs.shape[0],
                                           num_patches_per_channel)
        expanded_inlier_labels[maximum_patch_index] = inlier_labels

        has_data_labels = inlier_labels | outlier_labels  # B
        expanded_outlier_labels = (
            has_data_labels.repeat_interleave(num_patches_per_channel)
            & torch.logical_not(expanded_inlier_labels).to(dtype=torch.uint8))

        maxima_inlier_index_2d = expanded_inlier_labels[:, None] * torch.repeat_interleave(
            torch.eye(maximizer_outputs.shape[1],
                      device=maximizer_outputs.device,
                      dtype=torch.uint8),
            num_patches_per_channel,
            dim=0)

        maxima_outlier_index_2d = expanded_outlier_labels[:, None] * torch.repeat_interleave(
            torch.eye(maximizer_outputs.shape[1],
                      device=maximizer_outputs.device,
                      dtype=torch.uint8),
            num_patches_per_channel,
            dim=0)

        aligned_outlier_maximizer_scores = maximizer_outputs[
            maxima_outlier_index_2d]
        if aligned_outlier_maximizer_scores.numel() == 0:
            outlier_maximizer_loss = None
        else:
            outlier_maximizer_loss = torch.sum(-1 * torch.log(
                torch.max(-1 * aligned_outlier_maximizer_scores + 1,
                          self._epsilon)))

        aligned_inlier_maximizer_scores = maximizer_outputs[
            maxima_inlier_index_2d]
        if aligned_inlier_maximizer_scores.numel() == 0:
            inlier_loss = None
        else:
            inlier_loss = torch.sum(-1 * torch.log(
                torch.max(aligned_inlier_maximizer_scores, self._epsilon)))

        # Finally, if a channel attains its maximum response inside of a given radius
        # about it's target correspondence site, the responses of all the other channels
        # to it's maximizing patch are minimized.

        maximizer_outputs = maximizer_outputs[
            maximum_patch_index]  # BxC where B==C
        # equivalent: inlier_labels.unsqueeze(1).repeat(1, inlier_labels.shape[0]) - inlier_labels.diag()
        unaligned_inlier_index = torch.diag(
            inlier_labels) ^ inlier_labels.unsqueeze(1)
        unaligned_inlier_maximizer_scores = maximizer_outputs[
            unaligned_inlier_index]
        unaligned_maximizer_loss = torch.sum(unaligned_inlier_maximizer_scores)
        if unaligned_inlier_maximizer_scores.nelement() == 0:
            unaligned_maximizer_loss = torch.zeros(
                [1],
                requires_grad=True,
                device=unaligned_maximizer_loss.device)

        total_loss = torch.zeros(1,
                                 device=maximizer_outputs.device,
                                 dtype=maximizer_outputs.dtype,
                                 requires_grad=True)

        # imips just adds the unaligned scores to the loss directly
        total_loss = self._add_if_not_none(total_loss,
                                           outlier_correspondence_loss)
        total_loss = self._add_if_not_none(total_loss, outlier_maximizer_loss)
        total_loss = self._add_if_not_none(total_loss, inlier_loss)
        total_loss = self._add_if_not_none(total_loss,
                                           unaligned_maximizer_loss)

        return total_loss, {
            "loss":
            total_loss.detach(),
            "outlier_correspondence_loss":
            self._detach_if_not_none(outlier_correspondence_loss),
            "outlier_maximizer_loss":
            self._detach_if_not_none(outlier_maximizer_loss),
            "inlier_maximizer_loss":
            self._detach_if_not_none(inlier_loss),
            "unaligned_maximizer_loss":
            self._detach_if_not_none(unaligned_maximizer_loss),
        }
    def forward(self, states_prev, controls, noisy=False):
        # states_prev:  (N, M, state_dim)
        # controls: (N, control_dim)

        self.jacobian = False
        if self.use_particles:
            assert len(states_prev.shape) == 3  # (N, M, state_dim)
            N, M, state_dim = states_prev.shape
            dimensions = (N, M)
        else:
            if len(states_prev.shape) > 2:
                #this is due to mask for jacobian
                N, X, state_dim = states_prev.shape
                dimensions = (N, X)
                self.jacobian = True
            else:
                assert len(states_prev.shape) == 2  # (N, M, state_dim)
                N, state_dim = states_prev.shape
                dimensions = (N, )
                assert len(controls.shape) == 2  # (N, control_dim,)

        # N := distinct trajectory count
        # M := particle count

        # (N, control_dim) => (N, units // 2)
        control_features = self.control_layers(controls)

        # (N, units // 2) => (N, M, units // 2)
        if self.use_particles:
            control_features = control_features[:, np.newaxis, :].expand(
                N, M, self.units)
            assert control_features.shape == (N, M, self.units)

        # (N, M, state_dim) => (N, M, units // 2)
        state_features = self.state_layers(states_prev)
        assert state_features.shape == dimensions + (self.units, )

        # (N, M, units)
        merged_features = torch.cat((control_features, state_features), dim=-1)
        assert merged_features.shape == dimensions + (self.units * 2, )

        # (N, M, units * 2) => (N, M, state_dim + 1)
        output_features = self.shared_layers(merged_features)

        # We separately compute a direction for our network and a "gate"
        # These are multiplied to produce our final state output
        if self.use_particles or self.jacobian:
            state_update_direction = output_features[:, :, :state_dim]
            state_update_gate = torch.sigmoid(output_features[:, :, -1:])
        else:
            state_update_direction = output_features[:, :state_dim]
            state_update_gate = torch.sigmoid(output_features[:, -1:])
        state_update = state_update_direction * state_update_gate
        assert state_update.shape == dimensions + (state_dim, )

        # Compute new states

        ## TODO: fix this? should be simple -- currently breaks Jacobians
        # update_dims = tuple(slice(None) for _ in dimensions)
        # update_dims += (tuple(i for i in range(state_dim)
        #                       if i not in self.identity_prediction_dims), )
        #
        # states_new = states_prev.clone()
        # states_new[update_dims] += state_update

        states_new = states_prev + state_update
        assert states_new.shape == dimensions + (state_dim, )

        self.Q = torch.diag(self.Q_l**2)

        # print("q: ", self.Q)
        # Add noise if desired
        if noisy:
            dist = torch.distributions.MultivariateNormal(
                torch.zeros(self.state_dim,
                            dtype=torch.float32).to(states_new.device),
                self.Q,
            )
            # Taking sqrt of the covariance matrix since it is diagonal...
            # Normal takes in std instead of variance
            if self.learnable_Q:
                noise = dist.rsample(dimensions)
            else:
                noise = dist.sample(dimensions)
            assert noise.shape == dimensions + (state_dim, )
            states_new = states_new + noise

        # Return (N, M, state_dim)
        return states_new
Beispiel #41
0
 def recover_full_adj_from_lower(self, lower):
     diag = torch.diag(torch.diag(lower, 0))
     return lower + torch.transpose(lower, 0, 1) - diag
 def compute(self):
     h = self.mat.float()
     acc_global = torch.diag(h).sum() / h.sum()
     acc = torch.diag(h) / h.sum(1)
     iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
     return acc_global, acc, iu
Beispiel #43
0
def compute_pkm(self):
    """
    pkm is an alias for the product PKomega in the standard KFDA statistic. 
    This functions computes the corresponding block with respect to the model parameters. 
    """
    cov, mmd = self.approximation_cov, self.approximation_mmd
    anchors_basis = self.anchors_basis
    cov_anchors = 'shared'  # pas terminé

    if 'nystrom' in cov or 'nystrom' in mmd:
        r = self.r

    omega = self.compute_omega(quantization=(mmd == 'quantization'))
    Pbi = self.compute_centering_matrix(sample='xy',
                                        quantization=(cov == 'quantization'))

    if any([
            ny in [mmd, cov]
            for ny in ['nystrom1', 'nystrom2', 'nystrom3', 'nystrom']
    ]):
        Uz = self.spev['xy']['anchors'][anchors_basis]['ev']
        Lz = torch.diag(self.spev['xy']['anchors'][anchors_basis]['sp']**-1)

    if not (mmd == cov) or mmd == 'nystrom':
        Kzx = self.compute_kmn(sample='xy')

    if cov == 'standard':
        if mmd == 'standard':
            Kx = self.compute_gram()
            pkm = mv(Pbi, mv(Kx, omega))

        elif mmd == 'nystrom':
            Pi = self.compute_centering_matrix(sample='xy', landmarks=True)
            pkm = 1 / r * mv(
                Pbi,
                mv(Kzx.T,
                   mv(Pi, mv(Uz, mv(Lz, mv(Uz.T, mv(Pi, mv(Kzx, omega))))))))
            # pkm = mv(Pbi,mv(Kzx.T,mv(Pi,mv(Uz,mv(Lz,mv(Uz.T,mv(Pi,mv(Kzx,omega))))))))

        elif mmd == 'quantization':
            pkm = mv(Pbi, mv(Kzx.T, omega))

    if cov == 'nystrom1' and cov_anchors == 'shared':
        if mmd in ['standard', 'nystrom']:  # c'est exactement la même stat
            Pi = self.compute_centering_matrix(sample='xy', landmarks=True)
            pkm = 1 / r**2 * mv(
                Pbi,
                mv(Kzx.T,
                   mv(Pi, mv(Uz, mv(Lz, mv(Uz.T, mv(Pi, mv(Kzx, omega))))))))
            # pkm = mv(Pbi,mv(Kzx.T,mv(Pi,mv(Uz,mv(Lz,mv(Uz.T,mv(Pi,mv(Kzx,omega))))))))

        elif mmd == 'quantization':
            Kz = self.compute_gram(landmarks=True)
            pkm = 1 / r**2 * mv(
                Pbi, mv(Kzx.T, mv(Uz, mv(Lz, mv(Uz.T, mv(Kz, omega))))))
            # pkm = mv(Pbi,mv(Kzx.T,mv(Uz,mv(Lz,mv(Uz.T,mv(Kz,omega))))))

    if cov == 'nystrom2' and cov_anchors == 'shared':
        Lz12 = torch.diag(
            self.spev['xy']['anchors'][anchors_basis]['sp']**-(1 / 2))
        if mmd in ['standard', 'nystrom']:  # c'est exactement la même stat
            Pi = self.compute_centering_matrix(sample='xy', landmarks=True)
            pkm = 1 / r**3 * mv(
                Lz12,
                mv(
                    Uz.T,
                    mv(
                        Pi,
                        mv(
                            Kzx,
                            mv(
                                Pbi,
                                mv(
                                    Kzx.T,
                                    mv(
                                        Pi,
                                        mv(
                                            Uz,
                                            mv(
                                                Lz,
                                                mv(Uz.T, mv(
                                                    Pi, mv(Kzx,
                                                           omega))))))))))))
            # pkm = mv(Lz12,mv(Uz.T,mv(Pi,mv(Kzx,mv(Pbi,mv(Kzx.T,mv(Pi,mv(Uz,mv(Lz,mv(Uz.T,mv(Pi,mv(Kzx,omega))))))))))))

        elif mmd == 'quantization':  # pas à jour
            # il pourrait y avoir la dichotomie anchres centrees ou non ici.
            Kz = self.compute_gram(landmarks=True)
            pkm = 1 / r**3 * mv(
                Lz12,
                mv(
                    Uz.T,
                    mv(
                        Kzx,
                        mv(Pbi,
                           mv(Kzx.T, mv(Uz, mv(Lz, mv(Uz.T, mv(Kz,
                                                               omega)))))))))
            # pkm = mv(Lz12,mv(Uz.T,mv(Kzx,mv(Pbi,mv(Kzx.T,mv(Uz,mv(Lz,mv(Uz.T,mv(Kz,omega)))))))))

    if cov == 'nystrom3' and cov_anchors == 'shared':
        Lz12 = torch.diag(
            self.spev['xy']['anchors'][anchors_basis]['sp']**-(1 / 2))
        Pi = self.compute_centering_matrix(sample='xy', landmarks=True)
        if mmd in ['standard', 'nystrom']:  # c'est exactement la même stat
            pkm = 1 / r * mv(Lz12, mv(Uz.T, mv(Pi, mv(Kzx, omega))))
            # pkm = mv(Lz12,mv(Uz.T,mv(Pi,mv(Kzx,omega))))

        elif mmd == 'quantization':  # pas à jour
            # il faut ajouter Pi ici .
            Kz = self.compute_gram(landmarks=True)
            pkm = 1 / r**2 * mv(
                Lz12,
                mv(
                    Uz.T,
                    mv(
                        Pi,
                        mv(
                            Kzx,
                            mv(
                                Pbi,
                                mv(
                                    Kzx.T,
                                    mv(Pi,
                                       mv(Uz, mv(Lz, mv(Uz.T, mv(
                                           Kz, omega)))))))))))
            # pkm = mv(Lz12,mv(Uz.T,mv(Kzx,mv(Pbi,mv(Kzx.T,mv(Uz,mv(Lz,mv(Uz.T,mv(Kz,omega)))))))))

    if cov == 'nystrom1' and cov_anchors == 'separated':
        if mmd == 'standard':
            x, y = self.get_xy()
            z1, z2 = self.get_xy(landmarks=True)
            Kz1x = self.kerne(z1, x)
            Kz1y = self.kerne(z1, y)
            Kz2x = self.kerne(z2, x)
            Kz2y = self.kerne(z2, y)
            Uz1 = self.spev['x']['anchors'][anchors_basis]['ev']
            Lz1 = torch.diag(
                self.spev['x']['anchors'][anchors_basis]['sp']**-1)
            Uz2 = self.spev['y']['anchors'][anchors_basis]['ev']
            Lz2 = torch.diag(
                self.spev['y']['anchors'][anchors_basis]['sp']**-1)
            omega1 = self.compute_omega(sample='x', quantization=False)
            omega2 = self.compute_omega(sample='y', quantization=False)
            Pn1 = self.compute_centering_matrix(sample='x')
            Pn2 = self.compute_centering_matrix(sample='y')
            haut = mv(
                Lz1,
                mv(
                    Uz1,
                    mv(
                        Kz1x,
                        mv(
                            Pn1,
                            mv(
                                Kz1x,
                                mv(
                                    Uz1,
                                    mv(
                                        Lz1,
                                        mv(Uz1.T,
                                           mv(Kz1y, omega2) -
                                           mv(Kz1x, omega1)))))))))
            bas = mv(
                Lz2,
                mv(
                    Uz2,
                    mv(
                        Kz2y,
                        mv(
                            Pn2,
                            mv(
                                Kz2y,
                                mv(
                                    Uz2,
                                    mv(
                                        Lz2,
                                        mv(Uz2.T,
                                           mv(Kz2y, omega2) -
                                           mv(Kz2x, omega1)))))))))

    if cov == 'quantization':  # pas à jour
        A = self.compute_quantization_weights(power=1 / 2, sample='xy')
        if mmd == 'standard':
            pkm = mv(Pbi, mv(A, mv(Kzx, omega)))

        elif mmd == 'nystrom':
            Pi = self.compute_centering_matrix(sample='xy', landmarks=True)
            Kz = self.compute_gram(landmarks=True)
            pkm = 1 / r * mv(
                Pbi,
                mv(A, mv(Kz, mv(Uz, mv(Lz, mv(Uz.T, mv(Pi, mv(Kzx,
                                                              omega))))))))

        elif mmd == 'quantization':
            Kz = self.compute_gram(landmarks=True)
            pkm = mv(Pbi, mv(A, mv(Kz, omega)))
    return (pkm)
 def _init_A(self, factor, module):
     """Initialize memory for factor A and its inverse"""
     self.m_A[module] = torch.diag(factor.new_ones(factor.shape[0]))
     self.m_inv_A[module] = factor.new_zeros(factor.shape)
Beispiel #45
0
def _repulsion_loss(pred_box,
                    gt_box,
                    pred_label,
                    pred_box_gt_index,
                    num_gt_box,
                    sigma_gt=1,
                    sigma_box=0):
    """This function compute repulsion loss. This function only support the
    classification task with 2 categories
    :arg
        rois: [bs, num_rois, 4], each roi is denoted as [x1, y1, x2, y2]
        gt_box: [bs, num_gts, 4], each gt_box is denoted as [x1, y1, x2, y2]
        rois_label: [bs, num_rois], the label of rois 1 (fg) or 0 (bg)
        rois_gt_index: [bs, num_rois], the assigned gt box index for each roi

    """
    # TODO: support multi-categories classification

    # ------------------ rep gt loss ----------------------------
    bs = pred_box.size()[0]
    pred_box_gt_index = pred_box_gt_index.data.long()
    pred_label = pred_label.data.long()
    num_gt_box = num_gt_box.data.long()

    rep_gt_loss = Variable(pred_box.data.new([0]), requires_grad=True)
    count = 0
    for b in range(bs):
        gt_index = pred_box_gt_index.new(range(num_gt_box[b]))

        true_pred_box_index = ((pred_label[b] > 0) &
                               (pred_box_gt_index[b] < num_gt_box[b]))
        true_pred_box_index = torch.nonzero(true_pred_box_index).squeeze()
        for i in true_pred_box_index:
            index = gt_index != pred_box_gt_index[b][i]
            index = torch.nonzero(index).squeeze()
            if index.size():
                one_pred_box = pred_box[b][i].unsqueeze(dim=0)
                one_gt_boxes = gt_box[b][index]

                iog = _IoG(one_pred_box, one_gt_boxes)
                iog = iog.view(-1)
                # filter out 1. Because some proposal maybe the gt box, so remove it
                index = (iog < 1).data.long()
                index = torch.nonzero(index).squeeze()

                if index.size():
                    iog = iog[index]

                    # the repulsion loss in https://github.com/bailvwangzi/repulsion_loss_ssd
                    rep_gt_loss = rep_gt_loss + iog.max()
                    count += 1

                    # the repulsion loss in origin paperhttps: https://arxiv.org/abs/1711.07752
                    # iog = iog.max()
                    # if iog.data[0] > 0:
                    #     count = count + 1
                    #     if iog.data[0] <= sigma_gt:
                    #         loss_tmp = -torch.log(1 -iog)
                    #     if iog.data[0] > sigma_gt:
                    #         loss_tmp = (iog - sigma_gt) / (1 - sigma_gt) - Variable(torch.log(iog.data.new([1-sigma_gt])))
                    #     rep_gt_loss = rep_gt_loss + loss_tmp

    if count > 0:
        rep_gt_loss = rep_gt_loss / count

    # ------------------- rep box loss -------------------------------
    rep_box_loss = Variable(pred_box.data.new([0]), requires_grad=True)
    count = 0
    for b in range(bs):
        # get the index of gt box
        pred_box_gt_index_tmp = pred_box_gt_index[b]
        if pred_box_gt_index_tmp.is_cuda:
            assigned_gt_index = np.unique(pred_box_gt_index_tmp.cpu().numpy())
        else:
            assigned_gt_index = np.unique(pred_box_gt_index_tmp.numpy())
        assigned_gt_index = pred_box_gt_index.new(assigned_gt_index)

        # the gt_index for bg rois are -1, so we need to filter out it
        assigned_gt_index = assigned_gt_index[assigned_gt_index >= 0]
        assigned_gt_index = assigned_gt_index[
            assigned_gt_index < num_gt_box[b]]

        # used to store the index of chosen rois for each gt box
        chosen_rois_index = []

        for gt_i in assigned_gt_index:
            index = pred_box_gt_index[
                b] == gt_i  # each gt box has at least 1 roi
            index = torch.nonzero(index).squeeze()

            # choose one roi random
            roi_index_random = index[np.random.choice(range(index.size()[0]))]
            chosen_rois_index.append(roi_index_random)

        chosen_rois_index = pred_box_gt_index.new(chosen_rois_index)
        if chosen_rois_index.size():
            chosen_rois = pred_box[b][chosen_rois_index]

            iou = _IoU(chosen_rois, chosen_rois)
            iou = iou - torch.diag(iou.diag())

            iou = iou.view(-1)
            index = (iou > 0).data.long()
            index = torch.nonzero(index).squeeze()
            if index.size():
                iou = iou[index]
                count = count + index.size()[0]

                # we use the iou directly, without smooth
                rep_box_loss = rep_box_loss + iog.sum()

                # # the repulsion loss in origin paperhttps: https://arxiv.org/abs/1711.07752
                # index = (iou <= sigma_box).data.long()
                # index = torch.nonzero(index).squeeze()
                # if index.size():
                #     iou_1 = iou[index]
                #     iou_1 = - torch.log(1 - iou_1)
                #     loss_tmp_1 = iou_1.sum()
                # else:
                #     loss_tmp_1 = 0
                #
                # index = (iou > sigma_box).data.long()
                # index = torch.nonzero(index).squeeze()
                # if index.size():
                #     iou_2 = iou[index]
                #     iou_2 = (iou_2 - sigma_box) / (1 - sigma_box) - Variable(torch.log(iou.data.new([1-sigma_box])))
                #     loss_tmp_2 = iou_2.sum()
                # else:
                #     loss_tmp_2 = 0
                #
                # rep_box_loss = rep_box_loss + loss_tmp_1 + loss_tmp_2

    if count > 0:
        rep_box_loss = rep_box_loss / count

    return rep_gt_loss, rep_box_loss
Beispiel #46
0
def test_diag():
    A = torch.tensor([[1, -4], [-2, 3]])
    assert (torch.diag(A) == utils.diag(A)).all()
    X = torch.randn(4, 5, 5)
    assert (utils.batch_diag(X) == torch.stack([torch.diag(x) for x in X])).all()
Beispiel #47
0
def INF(B, H, W):
    return -torch.diag(torch.tensor(float("inf")).cuda().repeat(H),
                       0).unsqueeze(0).repeat(B * W, 1, 1)
Beispiel #48
0
def gen_scale_matrix(scale):
    o = torch.ones([1], dtype=torch.float32)
    return torch.diag(torch.cat([scale, o], 0))
Beispiel #49
0
else:
    config['cf_fair']['type'] = 'regression'

# -------------------------------------------------------------------------
# Fit assumed model A via cross validation
# -------------------------------------------------------------------------
logger.info("Fit model A via CV, compute phi and residuals...")
model_a = models.ModelA(g_noy)
_, phi, vareps = model_a.fit(data, config['cf_fair'])
logger.info(f"Best parameters: {model_a.best_parameters}")

# Refit as torch with weighted ridge
logger.info("Refit model analytically...")
targets = utils.data_to_tensor(data, list(model_a.targets.keys()), numpy=True)
phi, a, targets = [torch.tensor(_) for _ in (phi, a, targets)]
sigma = torch.diag(torch.tensor(vareps.std(axis=0)**2))
wdagger, vareps = utils.weighted_ridge(phi, targets, sigma, model_a.alpha)

model_a.model.regressor_.coef_ = wdagger.clone().numpy().squeeze()
vareps = vareps.clone().numpy().squeeze()

if debug > 0:
    logger.info("Plot conditional histograms...")
    for i, target in enumerate(g_noy.vertices()[1:]):
        plotters.plot_conditional_histograms(
            {
                f'resid_{target}': vareps[:, i],
                'A': a
            }, f'resid_{target}', 'A', fig_dir)

# -------------------------------------------------------------------------
Beispiel #50
0
 def gen_adj(self, A):
     A += torch.eye(A.size(0)).cuda()
     D = torch.pow(A.sum(1).float(), -0.5)
     D = torch.diag(D)
     adj = torch.matmul(torch.matmul(A, D).t(), D)
     return adj
Beispiel #51
0
    def __init__(
        self,
        n_inpt: int,
        n_neurons: int = 100,
        exc: float = 22.5,
        inh: float = 17.5,
        dt: float = 1.0,
        nu: Optional[Union[float, Sequence[float]]] = (1e-4, 1e-2),
        reduction: Optional[callable] = None,
        wmin: float = 0.0,
        wmax: float = 1.0,
        norm: float = 78.4,
        theta_plus: float = 0.05,
        tc_theta_decay: float = 1e7,
        inpt_shape: Optional[Iterable[int]] = None,
    ) -> None:
        # language=rst
        """
        Constructor for class ``DiehlAndCook2015``.

        :param n_inpt: Number of input neurons. Matches the 1D size of the input data.
        :param n_neurons: Number of excitatory, inhibitory neurons.
        :param exc: Strength of synapse weights from excitatory to inhibitory layer.
        :param inh: Strength of synapse weights from inhibitory to excitatory layer.
        :param dt: Simulation time step.
        :param nu: Single or pair of learning rates for pre- and post-synaptic events,
            respectively.
        :param reduction: Method for reducing parameter updates along the minibatch
            dimension.
        :param wmin: Minimum allowed weight on input to excitatory synapses.
        :param wmax: Maximum allowed weight on input to excitatory synapses.
        :param norm: Input to excitatory layer connection weights normalization
            constant.
        :param theta_plus: On-spike increment of ``DiehlAndCookNodes`` membrane
            threshold potential.
        :param tc_theta_decay: Time constant of ``DiehlAndCookNodes`` threshold
            potential decay.
        :param inpt_shape: The dimensionality of the input layer.
        """
        super().__init__(dt=dt)

        self.n_inpt = n_inpt
        self.inpt_shape = inpt_shape
        self.n_neurons = n_neurons
        self.exc = exc
        self.inh = inh
        self.dt = dt

        # Layers
        input_layer = Input(n=self.n_inpt,
                            shape=self.inpt_shape,
                            traces=True,
                            tc_trace=20.0)
        exc_layer = DiehlAndCookNodes(
            n=self.n_neurons,
            traces=True,
            rest=-65.0,
            reset=-60.0,
            thresh=-52.0,
            refrac=5,
            tc_decay=100.0,
            tc_trace=20.0,
            theta_plus=theta_plus,
            tc_theta_decay=tc_theta_decay,
        )
        inh_layer = LIFNodes(
            n=self.n_neurons,
            traces=False,
            rest=-60.0,
            reset=-45.0,
            thresh=-40.0,
            tc_decay=10.0,
            refrac=2,
            tc_trace=20.0,
        )

        # Connections
        w = 0.3 * torch.rand(self.n_inpt, self.n_neurons)
        input_exc_conn = Connection(
            source=input_layer,
            target=exc_layer,
            w=w,
            update_rule=PostPre,
            nu=nu,
            reduction=reduction,
            wmin=wmin,
            wmax=wmax,
            norm=norm,
        )
        w = self.exc * torch.diag(torch.ones(self.n_neurons))
        exc_inh_conn = Connection(source=exc_layer,
                                  target=inh_layer,
                                  w=w,
                                  wmin=0,
                                  wmax=self.exc)
        w = -self.inh * (torch.ones(self.n_neurons, self.n_neurons) -
                         torch.diag(torch.ones(self.n_neurons)))
        inh_exc_conn = Connection(source=inh_layer,
                                  target=exc_layer,
                                  w=w,
                                  wmin=-self.inh,
                                  wmax=0)

        # Add to network
        self.add_layer(input_layer, name="X")
        self.add_layer(exc_layer, name="Ae")
        self.add_layer(inh_layer, name="Ai")
        self.add_connection(input_exc_conn, source="X", target="Ae")
        self.add_connection(exc_inh_conn, source="Ae", target="Ai")
        self.add_connection(inh_exc_conn, source="Ai", target="Ae")
Beispiel #52
0
    def calc_weight(self):
        weight = (self.w_p @ (self.w_l * self.l_mask + self.l_eye) @ (
            (self.w_u * self.u_mask) +
            torch.diag(self.s_sign * torch.exp(self.w_s))))

        return weight.unsqueeze(2).unsqueeze(3)
import torch
import matplotlib.pyplot as plt
a = torch.tensor([3, 2, -0.1])
Q = torch.diag(a)

alpha = 0.6


def count(iteration_time, X):
    function_error = []
    for iter in range(iteration_time):
        f = torch.matmul(torch.matmul(torch.transpose(X, 0, 1), Q), X) * 0.5
        g = torch.matmul(Q, X)
        #print("Loss: %f; ||g||: %f,iteration: %f, error:%f" % (torch.log10(f), torch.norm(g),iter,torch.log10(itr_error*itr_error)))
        g = g.view(-1, 1)
        X = X - alpha * g
        #Loss_list.append(torch.log(f))
        #Grad_norm.append(torch.log(torch.norm(g)))
        function_error.append(torch.log10(f))
    return (function_error)


f1 = count(1000, torch.ones(3, 1))
#f2=count(1000000,torch.ones(3,1))
itr_time1 = range(0, 1000)
#itr_time2 = range(0, 10000)
plt.subplot(1, 2, 1)
plt.plot(itr_time1, f1, label='GD with constant stepsize')
plt.title('Function_error v.s. iteration in 1000 iterations')
plt.ylabel('Log of function_error')
plt.legend()
    def __init__(self, device, num_nodes, dropout=0.3, supports=None, gcn_bool=True, addaptadj=True, aptinit=None, in_dim=2,out_dim=12,residual_channels=32,dilation_channels=32,skip_channels=256,end_channels=512,kernel_size=2,blocks=4,layers=2):
        super(gwnet, self).__init__()
        self.dropout = dropout
        self.blocks = blocks
        self.layers = layers
        self.gcn_bool = gcn_bool
        self.addaptadj = addaptadj

        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()
        self.residual_convs = nn.ModuleList()
        self.skip_convs = nn.ModuleList()
        self.bn = nn.ModuleList()
        self.gconv = nn.ModuleList()

        self.start_conv = nn.Conv2d(in_channels=in_dim,
                                    out_channels=residual_channels,
                                    kernel_size=(1,1))
        self.supports = supports

        receptive_field = 1

        self.supports_len = 0
        if supports is not None:
            self.supports_len += len(supports)

        if gcn_bool and addaptadj:
            if aptinit is None:
                if supports is None:
                    self.supports = []
                self.nodevec1 = nn.Parameter(torch.randn(num_nodes, 10).to(device), requires_grad=True).to(device)
                self.nodevec2 = nn.Parameter(torch.randn(10, num_nodes).to(device), requires_grad=True).to(device)
                self.supports_len +=1
            else:
                if supports is None:
                    self.supports = []
                m, p, n = torch.svd(aptinit)
                initemb1 = torch.mm(m[:, :10], torch.diag(p[:10] ** 0.5))
                initemb2 = torch.mm(torch.diag(p[:10] ** 0.5), n[:, :10].t())
                self.nodevec1 = nn.Parameter(initemb1, requires_grad=True).to(device)
                self.nodevec2 = nn.Parameter(initemb2, requires_grad=True).to(device)
                self.supports_len += 1




        for b in range(blocks):
            additional_scope = kernel_size - 1
            new_dilation = 1
            for i in range(layers):
                # dilated convolutions
                self.filter_convs.append(nn.Conv2d(in_channels=residual_channels,
                                                   out_channels=dilation_channels,
                                                   kernel_size=(1,kernel_size),dilation=new_dilation))

                self.gate_convs.append(nn.Conv1d(in_channels=residual_channels,
                                                 out_channels=dilation_channels,
                                                 kernel_size=(1, kernel_size), dilation=new_dilation))

                # 1x1 convolution for residual connection
                self.residual_convs.append(nn.Conv1d(in_channels=dilation_channels,
                                                     out_channels=residual_channels,
                                                     kernel_size=(1, 1)))

                # 1x1 convolution for skip connection
                self.skip_convs.append(nn.Conv1d(in_channels=dilation_channels,
                                                 out_channels=skip_channels,
                                                 kernel_size=(1, 1)))
                self.bn.append(nn.BatchNorm2d(residual_channels))
                new_dilation *=2
                receptive_field += additional_scope
                additional_scope *= 2
                if self.gcn_bool:
                    self.gconv.append(gcn(dilation_channels,residual_channels,dropout,support_len=self.supports_len))



        self.end_conv_1 = nn.Conv2d(in_channels=skip_channels,
                                  out_channels=end_channels,
                                  kernel_size=(1,1),
                                  bias=True)

        self.end_conv_2 = nn.Conv2d(in_channels=end_channels,
                                    out_channels=out_dim,
                                    kernel_size=(1,1),
                                    bias=True)

        self.receptive_field = receptive_field