Esempio n. 1
0
def augment_with_user_similarity_best_scores(urm: sps.csr_matrix,
                                             similarity,
                                             topK,
                                             value=0.5,
                                             remove_seen=True,
                                             users=None):
    # Create a copy of the urm
    augmented_urm = urm.tolil(copy=True).astype(np.float)

    # Compute the score matrix
    score_matrix = similarity.dot(urm).astype(np.float)

    # Remove items that has already been seen
    if remove_seen:
        indices_seen = urm.nonzero()
        score_matrix[indices_seen] = float("-inf")

    # Filtering the data that are not in the users list
    if users is not None:
        score_matrix = score_matrix[users]

    # Find the topK generated interactions
    top_indices = score_matrix.data.argpartition(-topK)[-topK:]
    max_k = score_matrix.data[top_indices].min()
    x = sps.find(score_matrix)
    user_item_data = zip(x[0], x[1], x[2])
    user_item = [(user, item) for user, item, data in user_item_data
                 if data >= max_k]

    # Insert the best items in the urm matrix
    for user, item in user_item:
        augmented_urm[user, item] += value

    # Return the augmented urm
    return augmented_urm.tocsr()
def augment_with_best_recommended_items(urm: sps.csr_matrix, rec, users, cutoff, value=0.5):
    augmented_urm = urm.tolil(copy=True).astype(np.float)
    for user in users:
        recommended_items = rec.recommend(user, cutoff=cutoff)
        for item in recommended_items:
            augmented_urm[user, item] += value

    # Return the augmented urm
    return augmented_urm.tocsr()
Esempio n. 3
0
 def attack(self, model, X, A: sp.csr_matrix, labels, train_idx):
     origin_dim = A.shape[0]
     num_attack = 500
     pre_attack = np.zeros(num_attack)
     self.eval_real(X, A, labels, train_idx)
     A_hat = normalize(A, symmetric=True)
     for i in range(self.args.num_adv):
         print(
             '----------------  adding node %d -------------------------' %
             (i + 1))
         D_hat = np.sum(A, axis=1) + 1
         D_inv_sqrt = np.power(D_hat, -0.5)
         c_new, dif = self.retrain_model(model, X, A_hat, labels, train_idx)
         d = 100
         attack_idx = np.argpartition(dif, -num_attack)[-num_attack:]
         e = np.random.choice(X.shape[0], d)
         x = np.random.randn(X.shape[1])
         W = model.W.cpu().detach().numpy()
         # num_attack = np.sum(np.sort(pre_attack) == np.sort(attack_idx))
         print('# of same attack node :%d' %
               len(set(pre_attack) & set(attack_idx)))
         pre_attack = attack_idx
         done = False
         count = 0
         # while not done:
         # count += 1
         # print('iteration %d' % count)
         dx = self.compute_dx(d + 1, D_inv_sqrt, A_hat, e, attack_idx, W,
                              c_new, labels)
         x = 0.5 - 0.5 * np.sign(dx)
         de = self.compute_de(d + 1, D_inv_sqrt, X, A_hat, x, e, attack_idx,
                              W, c_new, labels)
         e = np.argpartition(de, d)[:d]
         # e_.sort()
         # done = np.all(e_ == e)
         # e = e_
         X = np.concatenate([X, x[np.newaxis, :]], axis=0)
         new_dim = A.shape[0] + 1
         lil_A = A.tolil()
         lil_A.resize(new_dim, new_dim)
         lil_A[e, new_dim - 1] = 1
         lil_A[new_dim - 1, e] = 1
         A = lil_A.tocsr()
         A_hat = normalize(A, symmetric=True)
         self.eval_model(model, X, A_hat, labels, train_idx)
         self.eval_real(X, A, labels, train_idx)
     new_adj = A[origin_dim:, :]
     new_feat = X[origin_dim:, :]
     np.save('feature.npy', new_feat)
     with open('adj.pkl', 'wb') as f:
         pk.dump(f, new_adj)
Esempio n. 4
0
 def normalize_adj(adj : sp.csr_matrix):
     """Normalize adjacency matrix and convert it to a sparse tensor."""
     if sp.isspmatrix(adj):
         adj = adj.tolil()
         adj.setdiag(1)
         adj = adj.tocsr()
         deg = np.ravel(adj.sum(1))
         deg_sqrt_inv = 1 / np.sqrt(deg)
         adj_norm = adj.multiply(deg_sqrt_inv[:, None]).multiply(deg_sqrt_inv[None, :])
     elif torch.is_tensor(adj):
         deg = adj.sum(1)
         deg_sqrt_inv = 1 / torch.sqrt(deg)
         adj_norm = adj * deg_sqrt_inv[:, None] * deg_sqrt_inv[None, :]
     return to_sparse_tensor(adj_norm)
Esempio n. 5
0
def with_cliques(adjacency: sp.csr_matrix,
                 clique_size: int,
                 num_cliques: int = 1) -> Tuple[sp.csr_matrix, np.ndarray]:
    """
    Get adjacency matrix of dataset with cliques.

    A clique is defined as a set of nodes where each node is a neighbor of every other
    node.

    Args:
        adjacency: adjacency matrix to start with.
        clique_size: size of each clique.
        num_cliques: number of cliques to add.

    Returns:
        augmented_adjacency: adjacency with cliques added.
        cliques: [num_cliques, clique_size] int32 array of indices of clique nodes.
    """
    num_nodes = adjacency.shape[0]
    adjacency = adjacency.tolil()
    dtype = adjacency.dtype
    rows = adjacency.rows
    data = adjacency.data
    cliques = np.empty((num_cliques, clique_size), dtype=np.int32)
    for i in range(num_cliques):
        clique = np.random.choice(num_nodes, clique_size, replace=False)
        clique.sort()
        cliques[i] = clique
        for c in clique:
            row = set(rows[c])
            contains_c = c in row
            row.update(clique)
            if not contains_c:
                row.remove(c)
            rows[c] = sorted(row)
            data[c] = np.ones((len(row), ), dtype=dtype)
    return adjacency.tocsr(), cliques
Esempio n. 6
0
def with_attribute_anomolies(
        node_attrs: sp.csr_matrix,
        num_candidates: int,
        num_anomolies: int = 1) -> Tuple[sp.csr_matrix, np.ndarray]:
    """
    Get attribute matrix with some rows replaced with others.

    For each anomoly, we replace the attributes with those of the node with attributes
    furthest away from the original w.r.t. Euclidean norm from `num_candidates`
    candidates of the original.

    Args:
        node_attrs: [num_nodes, num_attrs] sparse attributes.
        num_candidates: number of candidates per anomoly.
        num_anomolies: number of anomolies to overwrite.

    Returns:
        augmented_node_attrs: node attributes with anomolous node attributes replaced.
        mapping: [num_anomolies, 2] int32 array, where
        `augmented_node_attrs[mapping[i, 1]] == node_attrs[mapping[i, 0]]`
    """
    num_nodes = node_attrs.shape[0]
    node_attrs_lil = node_attrs.tolil()
    anomolies = np.random.choice(num_nodes, num_anomolies, replace=False)
    anomolies.sort()
    mapping = np.empty((num_anomolies, 2), dtype=np.int32)
    for i, a in enumerate(anomolies):
        candidates = np.random.choice(num_nodes, num_candidates, replace=False)
        norms = np.linalg.norm(node_attrs[a].todense() -
                               node_attrs[candidates].todense(),
                               axis=-1)
        max_norm = np.argmax(norms)
        replacement = candidates[max_norm]
        node_attrs_lil[a] = node_attrs[replacement]
        mapping[i] = a, replacement
    return node_attrs_lil.tocsr(), mapping
Esempio n. 7
0
 def removeEye(adj: sp.csr_matrix):
     adj = adj.tolil(copy=True)
     adj.setdiag(0)
     return adj.tocsr()
Esempio n. 8
0
 def addEye(adj: sp.csr_matrix):
     adj = adj.tolil(copy=True)
     adj.setdiag(1)
     return adj.tocsr()