Example #1
0
def testcluster_query(lamBda=0.00001, ncluster=2, kx=65, rate=0.001, func=sign,
                      query_func = query.queryZero, queryNum=50):
    S = zeros([n_total, n_total])
    r_index, l_index = randindex(n_total, rate, -1)
    S[r_index, l_index] = groundTruth[r_index, l_index]
    # function varctorization
    sign = func
    sign = np.vectorize(sign)
    S = sign(S + S.T)
    S = sparse.csr_matrix(S)
    #
    U, D = idc.inductive(X, S, kx, ncluster, lamBda, 50)
    #
    # active query
    ### ADD: how to calculate the estimated similarity matrix?
    est_S = U.dot(D).dot(U.T)
    #queryNum = math.floor(query_rate * n_total)
    seed = -1
    r_query, l_query = query_func(est_S, queryNum, r_index, l_index, seed)
    S = zeros([n_total, n_total])
    S[r_index +  r_query, l_index +  l_query] = groundTruth[r_index +  r_query, l_index +  l_query ]

    # function varctorization
    sign = func
    sign = np.vectorize(sign)
    S = sign((S + S.T))
    S = sparse.csr_matrix(S)
    #
    U, D = idc.inductive(X, S, kx, ncluster, lamBda, 50)
    

    # kmeans
    Xresult = matrix(U[:, 0:ncluster])
    Xresult = Xresult / (matlib.repmat(np.sqrt(np.square(Xresult).sum(axis=1)),
                                       1,
                                       ncluster) * 1.0)
    label = KMeans(n_clusters=ncluster).fit_predict(Xresult)
    label = array(label)
    predictA = - ones([n_total, n_total])
   #
    #for i in range(ncluster):
     #   pos = np.where(label == i)[0]
      #  for j in pos:
       #     for k in pos:
        #        predictA[j, k] = 1

    for i in range(n_total):
        for j in range(n_total):
            if label[i] == label[j]:
                predictA[i, j] = 1
    #
    accbias = sum(predictA != groundTruth).sum() / float(np.product(groundTruth.shape))
    print('sample rate: ', rate, "  ", "query rate:", queryNum, "err: ", accbias)
    return accbias
Example #2
0
def testcluster(lamBda=0.00001, ncluster=2, kx=50, rate=0.001, func=sign):
    S = zeros([n_total, n_total])
    r_index, l_index = randindex(n_total, rate, -1)
    S[r_index, l_index] = groundTruth[r_index, l_index]
    # function varctorization
    sign = func
    sign = np.vectorize(sign)
    S = sign(S + S.T)
    S = sparse.csr_matrix(S)
    #
    U, D = idc.inductive(X, S, kx, ncluster, lamBda, 50)
    #
    Xresult = matrix(U[:, 0:ncluster])
    Xresult = Xresult / (matlib.repmat(np.sqrt(np.square(Xresult).sum(axis=1)),
                                       1,
                                       ncluster) * 1.0)
    label = KMeans(n_clusters=ncluster).fit_predict(Xresult)
    label = array(label)
    predictA = - ones([n_total, n_total])
   #
    for i in range(ncluster):
        pos = np.where(label == i)[0]
        for j in pos:
            for k in pos:
                predictA[j, k] = 1
    #
    accbias = sum(predictA != groundTruth).sum() / float(np.product(groundTruth.shape))
    print 'sample rate: ', rate, "  ", "err: ", accbias