Exemple #1
0
def main():
    parser = ArgumentParser('randne',
                            formatter_class=ArgumentDefaultsHelpFormatter,
                            conflict_handler='resolve')
    parser.add_argument('--input',
                        nargs='?',
                        required=True,
                        help='Input graph file')
    parser.add_argument(
        '--matfile-variable-name',
        default='network',
        help='Variable name of adjacency matrix inside a .mat file')
    parser.add_argument('--output',
                        required=True,
                        help='Output representation file')
    parser.add_argument(
        '--use-trans-matrix',
        default=False,
        action='store_true',
        help='''The input matrix for RandNE. Adjacency matrix is used by default;
                        set this flag to use the transition matrix instead.''')
    parser.add_argument('-q',
                        '--order',
                        default=3,
                        type=int,
                        help='Maximum order of adjacency matrix.')
    parser.add_argument(
        '-d',
        '--representation-size',
        default=128,
        type=int,
        help='Number of latent dimensions to learn for each node.')
    parser.add_argument(
        '--weights',
        nargs='+',
        required=True,
        help=
        'Weights for each power of the adjacency matrix (or transition matrix).'
    )
    args = parser.parse_args()

    # Process args
    mat_obj = loadmat(args.input)
    A = mat_obj[args.matfile_variable_name]
    if args.use_trans_matrix:
        N = A.shape[0]
        normalizer = spdiags(np.squeeze(1.0 / csc_matrix.sum(A, axis=1)), 0, N,
                             N)
        input_mat = normalizer @ A
    else:
        input_mat = A
    weights = list(map(float, args.weights))

    # Start RandNE
    U_list = randne_projection(input_mat,
                               q=args.order,
                               dim=args.representation_size)
    U = randne_merge(U_list, weights)

    savemat(args.output, {'emb': U})
Exemple #2
0
def fastrp_projection(A,
                      q=3,
                      dim=128,
                      projection_method='gaussian',
                      input_matrix='adj',
                      alpha=None):
    assert input_matrix == 'adj' or input_matrix == 'trans'
    assert projection_method == 'gaussian' or projection_method == 'sparse'

    if input_matrix == 'adj':
        M = A
    else:
        N = A.shape[0]
        normalizer = spdiags(np.squeeze(1.0 / csc_matrix.sum(A, axis=1)), 0, N,
                             N)
        M = normalizer @ A
    # Gaussian projection matrix
    if projection_method == 'gaussian':
        transformer = random_projection.GaussianRandomProjection(
            n_components=dim, random_state=42)
    # Sparse projection matrix
    else:
        transformer = random_projection.SparseRandomProjection(
            n_components=dim, random_state=42)
    Y = transformer.fit(M)
    # Random projection for A
    if alpha is not None:
        Y.components_ = Y.components_ @ spdiags( \
                        np.squeeze(np.power(csc_matrix.sum(A, axis=1), alpha)), 0, N, N)
    cur_U = transformer.transform(M)
    U_list = [cur_U]

    for i in range(2, q + 1):
        cur_U = M @ cur_U
        U_list.append(cur_U)
    return U_list
def RefineBound(S_ori, S_add, Loss_ori, K):
    """Function to  calculate the objective funciton or loss.
       
       Loss_Bound = Loss_ori + trace_change(S x S^T) - eigs(delta(S x S^T),K)

        Args:
            S_ori (Sparse Matrix): Sparse scipy of original adjancey matrix
            S_add (Sparse Matrix): Sparse scipy of added adjancey matrix
            K (int): Embedding dimension
            loss_ori (float): Original loss value
	
        Returns:
        	Float: New calculcated loss bound
    """
    # Calculate the trace change

    S_overlap = (S_add != 0).multiply(S_ori)
    S_temp = S_add + S_overlap
    trace_change = csc_matrix.sum(
        S_temp.multiply(S_temp) - S_overlap.multiply(S_overlap))

    # Calculate eigenvalues sum of delta(S * S^T)
    # Note: we only need to deal with non-zero rows/columns
    S_temp = S_ori.dot(S_add)

    # import pdb
    # pdb.set_trace()
    S_temp = S_temp + S_temp.transpose() + S_add.dot(S_add)
    # _,S_choose,_ = find(csc_matrix.sum(S_temp, axis=0))
    # S_temp = S_temp[S_choose,S_choose]

    temp_eigs, _ = eigs(S_temp, min(2 * K, S_temp.shape[0]))
    temp_eigs = np.absolute(temp_eigs)
    temp_eigs = temp_eigs[temp_eigs >= 0]
    temp_eigs = np.sort(temp_eigs)[::-1]

    if len(temp_eigs) >= K:
        eigen_sum = sum(temp_eigs[:K])
    else:
        temp_l = len(temp_eigs)
        eigen_sum = sum(temp_eigs) + temp_eigs[temp_l - 1] * (K - temp_l)

    return Loss_ori + trace_change - eigen_sum
    K = int(sys.argv[2])
    convergeDist = float(sys.argv[3])

    kPoints = data.repartition(1).takeSample(False, K, 1)
    tempDist = 1.0

    while tempDist > convergeDist:
        closest = data.map(
            lambda p: (closestPoint(p, kPoints), (p, 1)))
        pointStats = closest.reduceByKey(
            lambda p1_c1, p2_c2: (p1_c1[0] + p2_c2[0], p1_c1[1] + p2_c2[1]))
        newPoints = pointStats.map(
            lambda st: (st[0], st[1][0] / st[1][1])).collect()

        tempDist = sum(csc_matrix.sum((kPoints[iK] - p).power(2)) for (iK, p) in newPoints)

        for (iK, p) in newPoints:
            kPoints[iK] = p

    count = [0] * len(kPoints)
    for i in range(0,len(kPoints)):
        count[i] = len(kPoints[i].nonzero()[0])

    opfile = open(sys.argv[4], "w")
    for i in count:
        opstr = str(i)
        opfile.write("%s" % opstr)
        opfile.write("\n")

    sc.stop()
def chol_approx(S, epsilon, ra):
    mu = S.mean(0)
    [n, d] = S.shape
    mean_S = csc_matrix.sum(S, 0)
    Var = S.transpose().dot(S)
    G = np.zeros((d, ra))
    Qjj = np.array([])

    for i in range(d):
        start_time = timeit.default_timer()
        Qjj.append(Q_kj(mu, Var, mean_S, n, i, i))
        end_time = timeit.default_timer()
        print('The time used is')
        print(end_time - start_time)

    epsilon = epsilon * sum(Qjj)

    # tmpV=np.zeros((d,1))
    # tmp=0
    perm = list(range(d))
    ind = 0

    for i in range(ra):
        #print('current iter is %d' % i)
        for j in range(i, d):
            # print (perm[j])
            # print (Qjj[perm[j]])
            G[j, i] = Qjj[perm[j]]
            for m in range(i):
                G[j, i] = G[j, i] - G[j, m] * G[j, m]

        sump = np.sum(G[i:d, i])

        if ind == 0 and sump > epsilon and G[i, i] == 0:
            G[i:d, i] = np.zeros((d - i, ))
            continue
        else:
            ind += 1

        if sump > epsilon:
            val = G[i:d, i].max()
            idx = G[i:d, i].argmax()

            if abs(val) < 1e-8:
                break

            idx = idx + i

            if ind == 1:
                idx = i

            temp = perm[i]
            perm[i] = perm[idx]
            perm[idx] = temp

            for m in range(i):
                tmpW = G[idx, m]
                G[idx, m] = G[i, m]
                G[i, m] = tmpW

            if ind == 1:
                G[i, i] = np.sqrt(G[i, i])
            else:
                G[i, i] = np.sqrt(val)

            for m in range(i + 1, d):
                start_time = timeit.default_timer()
                G[m, i] = Q_kj(mu, Var, mean_S, n, perm[m], perm[i])

                # for j in range(i):
                #     G[m, i] = G[m, i] - G[m, j] * G[i, j]
                G[m, i] -= np.dot(G[m, 0:i], G[i, 0:i])
                end_time = timeit.default_timer()
                print('The time used are')
                print(end_time - start_time)

            G[i + 1:d, i] = G[i + 1:d, i] / G[i, i]

        else:
            k = i - 1
            break
    per = np.argsort(perm)
    return G[per, :]
Exemple #6
0
n, d = X_train_n.shape
# X_train_n = sparse.hstack((X_train_n, np.ones((n, 1)))).tocsc()
# print(1)
# print(sparse.isspmatrix_csc(X_train_n))

X_train_p1 = matfile['data_tra_DFO'].X.toarray()  # positive training
X_train_n1 = matfile['data_tra_DFO'].Y.toarray()  # negative training
print(sparse.isspmatrix_csc(X_train_n))
CC = np.mean(X_train_n1, 0)
mu = X_train_n.mean(0)
k = 2
j = 3
S = X_train_n
[n, d] = S.shape
print(S.shape)
mean_S = csc_matrix.sum(S, 0)
print(mean_S.shape)
print(type(mean_S))
print(type(S[:, 2]))

start_time = timeit.default_timer()
S[:, k].T * S[:, j]
end_time = timeit.default_timer()
print('1', end_time - start_time)

start_time = timeit.default_timer()
csc_matrix(mu[0, j] * np.ones((n, ))) * S[:, k]
end_time = timeit.default_timer()
print('2', end_time - start_time)

start_time = timeit.default_timer()