def Sever_CG(actor_loss_grad, actor_grad_logp, n, nsteps=10, r=4, p=0.05):
    search_dir = None
    indices = list(range(n))
    for i in range(r):
        start_time = time.time()
        search_dir = conjugate_gradient_sever(
            actor_grad_logp[indices],
            actor_loss_grad[indices].mean(dim=0),
            x=search_dir,
            nsteps=nsteps)
        #         print("--- conjugate_gradient_sever: %s seconds ---" % (time.time() - start_time))
        grads = actor_grad_logp[indices] * torch.mv(
            actor_grad_logp[indices],
            search_dir).unsqueeze(dim=1) - actor_loss_grad[indices]  ## Fx-g
        mean_grads = grads.mean(dim=(-2, ), keepdim=True)
        grads = grads - grads.mean(dim=(-2, ), keepdim=True)

        start_time = time.time()
        u, s, v = torch.svd_lowrank(grads)
        #         print("--- svd time: %s seconds ---" % (time.time() - start_time))
        start_time = time.time()
        top_right_eigenvector = v[:, 0]
        u, s, v = torch.svd_lowrank(grads)
        top_right_eigenvector = v[:, 0]

        outlier_score = torch.mv(grads, top_right_eigenvector)**2
        _, topk_index = torch.topk(outlier_score, k=round(n * p))
        for index in sorted(topk_index.tolist(), reverse=True):
            del indices[index]
#         print("--- time after svd: %s seconds ---" % (time.time() - start_time))
    return search_dir, indices
Beispiel #2
0
def get_jacobian_svd(train_loader, net, batchsize, num_classes, weights=None, device=None):
    log("Computing jacobian")
    slices = get_slices(net)
    jac = get_jacobian(train_loader, net, batchsize, num_classes, device)
    if weights is not None:
        weighted_jacobian(jac, slices, weights)
    log("Computing SVD of jacobian with shape {}".format(jac.shape))
    U, D, V = torch.svd_lowrank(jac, q=len(jac))
    
    splitted_norms = np.zeros([len(slices), len(D)])
    for i, v in enumerate(V.T):
        for j, [ind_s, ind_e] in enumerate(slices):
            d = ind_e - ind_s
            vec_partial = v[ind_s: ind_e]

            splitted_norms[j][i] = torch.norm(vec_partial)
    return D.cpu().numpy(), splitted_norms, slices, net.layer_names
Beispiel #3
0
def main():
    log_path = __file__[:-3]
    init_run(log_path, 2021)

    device = torch.device('cuda')
    config = get_gowalla_config(device)
    dataset_config, model_config, trainer_config = config[2]
    dataset_config['path'] = dataset_config['path'][:-4] + str(1)

    dataset = get_dataset(dataset_config)
    adj = generate_daj_mat(dataset)
    part_adj = adj[:dataset.n_users, dataset.n_users:]
    part_adj_tensor = get_sparse_tensor(part_adj, 'cpu')
    with torch.no_grad():
        u, s, v = torch.svd_lowrank(part_adj_tensor, 64)

    sort_ranked_users, sort_ranked_items = graph_rank_nodes(dataset, 'sort')
    degree_ranked_users, degree_ranked_items = graph_rank_nodes(
        dataset, 'degree')
    pr_ranked_users, pr_ranked_items = graph_rank_nodes(dataset, 'page_rank')
    ranked_users = (sort_ranked_users, degree_ranked_users, pr_ranked_users)
    ranked_items = (sort_ranked_items, degree_ranked_items, pr_ranked_items)
    pdf = PdfPages('figure_5.pdf')
    fig, ax = plt.subplots(nrows=1,
                           ncols=2,
                           constrained_layout=True,
                           figsize=(11, 4))
    axes = ax.flatten()
    plot_error(part_adj,
               u.cpu().numpy(), ranked_users, axes[0], device, 'users')
    plot_error(
        part_adj.T,
        v.cpu().numpy(),
        ranked_items,
        axes[1],
        device,
        'items',
    )
    pdf.savefig()
    plt.close(fig)
    pdf.close()
def eigenthings_tensor_utils(t,
                             device=None,
                             out_device='cpu',
                             symmetric=False,
                             topn=-1):
    t = t.to(device)
    if topn >= 0:
        _, eigenvals, eigenvecs = torch.svd_lowrank(t,
                                                    q=min(
                                                        topn,
                                                        t.size()[0],
                                                        t.size()[1]))
        eigenvecs.transpose_(0, 1)
    else:
        if symmetric:
            eigenvals, eigenvecs = torch.symeig(t, eigenvectors=True)  # pylint: disable=no-member
            eigenvals = eigenvals.flip(0)
            eigenvecs = eigenvecs.transpose(0, 1).flip(0)
        else:
            _, eigenvals, eigenvecs = torch.svd(t, compute_uv=True)  # pylint: disable=no-member
            eigenvecs = eigenvecs.transpose(0, 1)
    return eigenvals, eigenvecs
Beispiel #5
0
def approximate_sim(src: Tensor,
                    mapping: Tensor,
                    trg: Tensor,
                    rank=1000,
                    niter=2,
                    keep_k=100,
                    batch_size=5000):
    srclen = src.size(-1)
    trglen = trg.size(-1)
    tgm = spspmm(src.t(), mapping)
    us, ss, vs = torch.svd_lowrank(tgm, rank, niter)
    vs = vs.t()
    print('calculate svd complete')
    # [N, R] [R] [R, T]
    result = None

    def merge(a, b):
        if a is None:
            return b
        return torch.sparse_coo_tensor(
            torch.cat([a._indices(), b._indices()], 1),
            torch.cat([a._values(), b._values()], 0),
            [a.size(0) + b.size(0), a.size(1)])

    for i_batch in range(0, srclen, batch_size):
        i_end = min(i_batch + batch_size, srclen)
        batched_tgm = us[i_batch:i_end].mm(torch.diag(ss)).mm(vs)
        val, ind = batched_tgm.topk(dim=-1, k=keep_k)
        batched_tgm = topk2spmat(val, ind, batched_tgm.size(), 0,
                                 batched_tgm.device)
        batched_tgm = spspmm(batched_tgm, trg)
        # save gpu memory
        result = merge(result, batched_tgm)
        if i_batch % 10 * batch_size == 0:
            print('batch', i_batch, 'complete, result size',
                  result._values().size())

    return result
Beispiel #6
0
def mds_torch(pre_dist_mat, weights=None, iters=10, tol=1e-5, eigen=False, verbose=2):
    """ Gets distance matrix. Outputs 3d. See below for wrapper. 
        Assumes (for now) distogram is (N x N) and symmetric
        Outs: 
        * best_3d_coords: (batch x 3 x N)
        * historic_stresses: (batch x steps)
    """
    device, dtype = pre_dist_mat.device, pre_dist_mat.type()
    # ensure batched MDS
    pre_dist_mat = expand_dims_to(pre_dist_mat, length = ( 3 - len(pre_dist_mat.shape) ))
    # start
    batch, N, _ = pre_dist_mat.shape
    diag_idxs = np.arange(N)
    his = [torch.tensor([np.inf]*batch, device=device)]

    # initialize by eigendecomposition: https://www.lptmc.jussieu.fr/user/lesne/bioinformatics.pdf
    # follow : https://www.biorxiv.org/content/10.1101/2020.11.27.401232v1.full.pdf
    D = pre_dist_mat**2
    M =  0.5 * (D[:, :1, :] + D[:, :, :1] - D) 
    # do loop svd bc it's faster: (2-3x in CPU and 1-2x in GPU)
    # https://discuss.pytorch.org/t/batched-svd-lowrank-being-much-slower-than-loop-implementation-both-cpu-and-gpu/119336
    svds = [torch.svd_lowrank(mi) for mi in M]
    u = torch.stack([svd[0] for svd in svds], dim=0)
    s = torch.stack([svd[1] for svd in svds], dim=0)
    v = torch.stack([svd[2] for svd in svds], dim=0)
    best_3d_coords = torch.bmm(u, torch.diag_embed(s).sqrt())[..., :3]

    # only eigen - way faster but not weights
    if weights is None and eigen==True:
        return torch.transpose( best_3d_coords, -1, -2), torch.zeros_like(torch.stack(his, dim=0))
    elif eigen==True:
        if verbose:
            print("Can't use eigen flag if weights are active. Fallback to iterative")

    # continue the iterative way
    if weights is None:
        weights = torch.ones_like(pre_dist_mat)

    # iterative updates:
    for i in range(iters):
        # compute distance matrix of coords and stress
        best_3d_coords = best_3d_coords.contiguous()
        dist_mat = torch.cdist(best_3d_coords, best_3d_coords, p=2).clone()

        stress   = ( weights * (dist_mat - pre_dist_mat)**2 ).sum(dim=(-1,-2)) * 0.5
        # perturb - update X using the Guttman transform - sklearn-like
        dist_mat[ dist_mat <= 0 ] += 1e-7
        ratio = weights * (pre_dist_mat / dist_mat)
        B = -ratio
        B[:, diag_idxs, diag_idxs] += ratio.sum(dim=-1)

        # update
        coords = (1. / N * torch.matmul(B, best_3d_coords))
        dis = torch.norm(coords, dim=(-1, -2))

        if verbose >= 2:
            print('it: %d, stress %s' % (i, stress))
        # update metrics if relative improvement above tolerance
        if (his[-1] - stress / dis).mean() <= tol:
            if verbose:
                print('breaking at iteration %d with stress %s' % (i,
                                                                   stress / dis))
            break

        best_3d_coords = coords
        his.append( stress / dis )

    return torch.transpose(best_3d_coords, -1,-2), torch.stack(his, dim=0)
Beispiel #7
0
def mds_torch(pre_dist_mat, weights=None, iters=10, tol=1e-5, eigen=False, verbose=2):
    """ Gets distance matrix. Outputs 3d. See below for wrapper. 
        Assumes (for now) distogram is (N x N) and symmetric
        Outs: 
        * best_3d_coords: (batch x 3 x N)
        * historic_stresses: (batch x steps)
    """
    device, dtype = pre_dist_mat.device, pre_dist_mat.type()

    # start
    batch, N, _ = pre_dist_mat.shape
    diag_idxs = np.arange(N)
    his = [torch.tensor([np.inf]*batch, device=device)]

    # do it by eigendecomposition - way faster but not weights
    # https://www.biorxiv.org/content/10.1101/2020.11.27.401232v1.full.pdf
    if eigen == True and weights is None:
        preds_3d = []
        for bi in range(pre_dist_mat.shape[0]):
            D = pre_dist_mat[bi]**2
            M = D[:1, :] + D[:, :1] - D 
            u,s,v = torch.svd_lowrank(M/2)
            preds_3d.append( ([email protected](s).sqrt())[:, :3].t() )
        return torch.stack(preds_3d, dim=0), torch.zeros_like(torch.stack(his, dim=0))
    elif eigen == True:
        if verbose: 
            print("Can't use eigen flag if weights are active. Fallback to iterative")

    # continue the iterative way
    if weights is None:
        weights = torch.ones_like(pre_dist_mat)

    # ensure batched MDS
    pre_dist_mat = expand_dims_to(pre_dist_mat, length = ( 3 - len(pre_dist_mat.shape) ))

    # init random coords
    best_stress = float("Inf") * torch.ones(batch, device = device).type(dtype)
    best_3d_coords = 2*torch.rand(batch, N, 3, device = device).type(dtype) - 1
    # iterative updates:
    for i in range(iters):
        # compute distance matrix of coords and stress
        dist_mat = torch.cdist(best_3d_coords, best_3d_coords, p=2).clone()
        stress   = ( weights * (dist_mat - pre_dist_mat)**2 ).sum(dim=(-1,-2)) * 0.5
        # perturb - update X using the Guttman transform - sklearn-like
        dist_mat[ dist_mat <= 0 ] += 1e-7
        ratio = weights * (pre_dist_mat / dist_mat)
        B = -ratio
        B[:, diag_idxs, diag_idxs] += ratio.sum(dim=-1)

        # update
        coords = (1. / N * torch.matmul(B, best_3d_coords))
        dis = torch.norm(coords, dim=(-1, -2))
        if verbose >= 2:
            print('it: %d, stress %s' % (i, stress))
        # update metrics if relative improvement above tolerance
        if (best_stress - stress / dis).mean() <= tol:
            if verbose:
                print('breaking at iteration %d with stress %s' % (i,
                                                                   stress / dis))
            break

        pre_dist_mat = dist_mat
        best_3d_coords = coords
        his.append( stress / dis )

    return torch.transpose(best_3d_coords, -1,-2), torch.stack(his, dim=0)
Beispiel #8
0
#使用clone还有一个好处是会被记录在计算图中,即梯度回传到副本时也会传到源Tensor

x = torch.randn(1)
print(x)
print(x.item())

#线性代数
x = torch.rand(5, 3)
print(torch.trace(x))
print(torch.diag(x))
print(torch.triu(x))  #上三角
print(torch.mm(x, torch.rand(3, 1)))
print(torch.t(x))
print(torch.dot(x[0, :], torch.rand(3)))  #一维向量
print(torch.inverse(x[:3, :]))
print(torch.svd_lowrank(x))

#广播
x = torch.arange(1, 3).view(1, 2)
print(x)
y = torch.arange(1, 4).view(3, 1)
print(y)
print(x + y)

#加法会开辟新内存
x = torch.rand(3, 1)
y = torch.rand(3, 1)
id_before = id(y)
y = x + y
print(id(y) == id_before)
Beispiel #9
0
    def _initialize_H_W(self, eps=1e-6):
        n_samples, n_features = self.X.shape
        if self._init_method is None:
            if self.k < min(n_samples, n_features):
                self._init_method = 'nndsvdar'
            else:
                self._init_method = 'random'

        if self._init_method in ['nndsvd', 'nndsvda', 'nndsvdar']:
            U, S, V = torch.svd_lowrank(self.X, q=self.k)

            H = torch.zeros_like(U,
                                 dtype=self._tensor_dtype,
                                 device=self._device_type)
            W = torch.zeros_like(V.T,
                                 dtype=self._tensor_dtype,
                                 device=self._device_type)
            H[:, 0] = S[0].sqrt() * U[:, 0]
            W[0, :] = S[0].sqrt() * V[:, 0]

            for j in range(2, self.k):
                x, y = U[:, j], V[:, j]
                x_p, y_p = x.maximum(
                    torch.zeros_like(x, device=self._device_type)), y.maximum(
                        torch.zeros_like(y, device=self._device_type))
                x_n, y_n = x.minimum(
                    torch.zeros_like(
                        x, device=self._device_type)).abs(), y.minimum(
                            torch.zeros_like(y,
                                             device=self._device_type)).abs()
                x_p_nrm, y_p_nrm = x_p.norm(p=2), y_p.norm(p=2)
                x_n_nrm, y_n_nrm = x_n.norm(p=2), y_n.norm(p=2)
                m_p, m_n = x_p_nrm * y_p_nrm, x_n_nrm * y_n_nrm

                if m_p > m_n:
                    u, v, sigma = x_p / x_p_nrm, y_p / y_p_nrm, m_p
                else:
                    u, v, sigma = x_n / x_n_nrm, y_n / y_n_nrm, m_n

                factor = (S[j] * sigma).sqrt()
                H[:, j] = factor * u
                W[j, :] = factor * v

            H[H < eps] = 0
            W[W < eps] = 0

            if self._init_method == 'nndsvda':
                avg = self.X.mean()
                H[H == 0] = avg
                W[W == 0] = avg
            elif self._init_method == 'nndsvdar':
                avg = self.X.mean()
                H[H == 0] = avg / 100 * torch.rand(H[H == 0].shape,
                                                   dtype=self._tensor_dtype,
                                                   device=self._device_type)
                W[W == 0] = avg / 100 * torch.rand(W[W == 0].shape,
                                                   dtype=self._tensor_dtype,
                                                   device=self._device_type)
        elif self._init_method == 'random':
            avg = torch.sqrt(self.X.mean() / self.k)
            H = torch.abs(avg * torch.randn((self.X.shape[0], self.k),
                                            dtype=self._tensor_dtype,
                                            device=self._device_type))
            W = torch.abs(avg * torch.randn((self.k, self.X.shape[1]),
                                            dtype=self._tensor_dtype,
                                            device=self._device_type))
        else:
            raise ValueError(
                f"Invalid init parameter. Got {self._init_method}, but require one of (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random')."
            )

        self.H = H
        self.W = W
Beispiel #10
0
                                          center].contiguous()
                curr_img_crop = curr_img_stack[:, :, coord_to_crop[0] -
                                               center:coord_to_crop[0] +
                                               center, coord_to_crop[1] -
                                               center:coord_to_crop[1] +
                                               center].detach()

                # Reconstruction error
                Y = (curr_img_crop - dense_crop - sparse_crop)
                # Nuclear norm
                dense_vector = dense_crop.view(dense_part.shape[0],
                                               dense_part.shape[1], -1)
                with autocast(enabled=False):
                    (u, s,
                     v) = torch.svd_lowrank(dense_vector.permute(0, 2,
                                                                 1).float(),
                                            q=args.rank)
                    sOriginal = torch.autograd.Variable(s.clone())
                    # eigenvalues thresholding operation
                    s = torch.sign(s) * torch.max(
                        s.abs() - net.mu_sum_constraint, torch.zeros_like(s))

                mean_eigen_values += sOriginal.mean(dim=0).detach().cpu()
                mean_eigen_values_cropped += s.mean(dim=0).detach().cpu()

                # Reconstruct the images from the eigen information
                for nB in range(s.shape[0]):
                    currS = torch.diag(s[nB, :])
                    dense_vector[nB,
                                 ...] = torch.mm(torch.mm(u[nB, ...], currS),
                                                 v[nB, ...].t()).t()