コード例 #1
0
    #
    anchor_pos = kmeans(X1, 5)
    anchor_neg = kmeans(X2, 5)
    anchors = np.vstack((anchor_pos[0], anchor_neg[0]))

    test_ins = [Instance() for i in range(len(test_data))]
    for b in range(len(test_ins)):
        test_ins[b].feature_vector = test_data[b]

    instances = createInstances(data, labels)

    ###########################################################
    compute_gammas(instances, K=anchors, gamma=1.0)
    compute_gammas(test_ins, K=anchors, gamma=1.0)
    #########################################################
    c = cafeMap(T=50000, beta=0.1, Lambda=0.1)
    c.train(instances, K=anchors, gamma=1.0)
    plt.figure()
    ########################Testing ##########################
    predictions = []
    l_w1 = []
    l_w2 = []
    for t in test_ins:
        predictions += [c.predict_instance(t)]
        local_weight = c.W.dot(t.gammas)
        local_bias = c.bias.dot(t.gammas)

        l_w1 += [local_weight[0]]
        l_w2 += [local_weight[1]]
    l_w1 = np.array(np.absolute(l_w1))
コード例 #2
0
ファイル: arcene.py プロジェクト: foxtrotmike/cafemap
    Ytemp = np.append(Y, Yv)
    M = np.mean(Xtemp, axis=0)
    S = np.std(Xtemp, axis=0) + 1e-7
    N, d = X.shape

    X = (X.T / np.linalg.norm(X, axis=1)).T

    Xv = (Xv.T / np.linalg.norm(Xv, axis=1)).T

    I = createInstances(X, Y)
    Iv = createInstances(Xv, Yv)
    llc = compute_gammas(I + Iv, K=50, gamma=1e-3)
    classifier = cafeMap(Lambda=1e-2,
                         T=20e3,
                         no_bias=False,
                         encoder=None,
                         c_arg=True)
    classifier.train(I, history=500)
    scores = np.array(classifier.test(Iv))
    aidx = np.argsort(scores)
    pidx = Yv == 1
    nidx = Yv != 1
    scores = scores[aidx]
    Yv = Yv[aidx]
    amax = 0
    for s in scores:
        a = np.mean((2 * (scores[pidx] > s) - 1) == Yv[pidx])
        a += np.mean((2 * (scores[nidx] > s) - 1) == Yv[nidx])
        a /= 2
        if a > amax:
コード例 #3
0
if __name__ == '__main__':

    fname = 'data\\prostate_preprocessed.txt'
    features, labels, genes = readData(
        fname, 'tumor')  # 'tumor' will be considered as +1 label

    instances = createInstances(features, labels)
    #instances= data points of type Instance as needed by cafemap

    compute_gammas(instances, K=10, k=10, gamma=0.1)  # locally linear coding.
    #K= number of Anchor points in llc
    #k= number of non zero coefficients
    #gamma= hyper parameter >0 for llc to enforce sparsity and locality

    c = cafeMap(T=1000, Lambda=0.00001, beta=0.1)
    # T= number of iterations
    #Lambda= regularization parameter. default 1e-3
    #beta= beta parameter in coordinate descent algorithm. default value 0.25

    result, folds = kFoldCV(
        c, instances)  # perform k fold cross validation. by default 10 fold CV
    # c= trained cafemap classifier
    #result, folds= kFoldCV(c, instances,folds=5) for 5 fold CV
    #result, folds= kFoldCV(c, instances, parallel=4)
    # parallel= number of Cpu cores to be used
    # parallel implementation requires "joblib"

    scores, labels, classifiers = zip(*result)
    perFoldAuc, perFoldAcc, perFoldBestAcc, perFoldThresh = perFoldAUC(
        scores, labels)
コード例 #4
0
ファイル: prostate.py プロジェクト: foxtrotmike/cafemap
                for f in ln[1:]:
                    vector += [np.float(f)]
                feat_vecs += [vector]
                genes += [ln[0]]

        return np.array(feat_vecs).T, np.array(labels), genes


if __name__ == '__main__':

    fname = 'data/prostate_preprocessed.txt'
    X, Y, genes = readData(fname, 'tumor')
    X = (X.T / np.linalg.norm(X, axis=1)).T
    instances = createInstances(X, Y)
    llc = compute_gammas(instances, K=10, k=10)
    c = cafeMap(Lambda=1e-3, T=1e2, no_bias=True)
    result, folds = c.kFoldCV(instances, parallel=1)
    scores, labels, classifiers = zip(*result)
    classifier = classifiers[0]
    Wb = classifiers[0].localWb(instances)  #[:-1]
    for c in classifiers[1:]:
        Wb += c.localWb(instances)  #[:-1]

    from sklearn.cluster import KMeans
    Wb0 = Wb * 1
    Wb = 100 * Wb
    idx = np.sum(np.abs(Wb) > 1e-6, axis=1) > 0
    idx = np.argsort(np.sum(np.abs(Wb), axis=1))[-40:]
    Wbr = Wb[idx, :]
    Wbr = Wbr[:, Y == -1]
    model = KMeans(init='k-means++', n_clusters=5)
コード例 #5
0
    test_data = np.array(test_data)

    test_ins = [Instance() for i in range(len(test_data))]
    for b in range(len(test_ins)):
        test_ins[b].feature_vector = test_data[b]

    all_data = np.vstack((data, test_data))
    anchors = kmeans(all_data, 4)

    instances = createInstances(data, labels)
    ###########################################################

    compute_gammas(instances, K=anchors[0], k=2, gamma=1.0)
    compute_gammas(test_ins, K=anchors[0], k=2, gamma=1.0)
    #########################################################
    c = cafeMap(T=10000, beta=10.0, Lambda=0.1)
    c.train(instances)
    plt.figure()
    ########################Testing ##########################
    predictions = []
    l_w1 = []
    l_w2 = []
    for t in test_ins:
        predictions += [c.predict_instance(t)]
        local_weight = c.W.dot(t.gammas)
        local_bias = c.bias.dot(t.gammas)

        l_w1 += [local_weight[0]]
        l_w2 += [local_weight[1]]
    l_w1 = np.array(np.absolute(l_w1))
コード例 #6
0
ファイル: toy_circle.py プロジェクト: foxtrotmike/cafemap
    from circle import getCircle
    Xp, Xn = getCircle(N)
    X = np.vstack((Xp, Xn))
    d = X.shape[1]
    Nu = nu * (2 * np.random.rand(2 * N, d) - 1)
    print "NSR", np.mean(100 * np.linalg.norm(Nu, axis=1) /
                         np.linalg.norm(X, axis=1))
    X += Nu
    Y = np.array([1] * N + [-1] * N)

    instances = createInstances(X, Y)

    classifier = cafeMap(Lambda=1e-1,
                         T=5e3,
                         no_bias=False,
                         encoder='llc',
                         K=5,
                         gamma=1e-3)
    result, folds = classifier.kFoldCV(instances,
                                       folds=5,
                                       shuffle=True,
                                       history=100,
                                       parallel=4)  #10-fold CV,, parallel = 3,
    scores, labels, classifiers = zip(*result)
    Wb = np.array([c.localWb(instances) for c in classifiers])
    W = np.mean(Wb, axis=0)[:-1]
    fpr, tpr, auc = roc_VA(zip(*(scores, labels)))
    #generate vertically averaged ROC curve
    plt.figure()
    plt.plot(fpr, tpr)
    plt.xlabel('FPR')
コード例 #7
0
ファイル: results_table1.py プロジェクト: foxtrotmike/cafemap
                    vector+=[np.float(f)]
                feat_vecs+=[vector]
                genes+=[ln[0]]            
            
        return np.array(feat_vecs).T, np.array(labels), genes

if __name__ == '__main__':
    
    
    
   #========================lymphoma==================================#
    fname='data/dlbcl_preprocessed.txt'
    features,labels, genes=readData(fname, '1')
    instances=createInstances(features, labels)
    compute_gammas(instances, K=10, k=10, gamma=0.1)
    c=cafeMap(T=100000, Lambda=0.0001, beta=0.1)
    result, folds= c.kFoldCV(instances,  parallel=4)
    scores,labels,classifiers = zip(*result)
    perFoldAuc, perFoldAcc, perFoldBestAcc, perFoldThresh= perFoldAUC(scores, labels)
    print "The AVG AUC for 10 folds(Lymphoma)=", np.mean(perFoldAuc)
    print "The AVG Accuracy (zero threshold) for 10 folds(Lymphoma)=", np.mean(perFoldAcc)
    print "The AVG Accuracy for 10 folds(Lymphoma best threshold)=", np.mean(perFoldBestAcc) # (best threshold)
  
#===============================breast cancer=====================#
#    
    fname='data/breast_preprocessed.txt'
    features,labels, genes=readData(fname, 'luminal')    
    instances=createInstances(features, labels)
    compute_gammas(instances, K=10, k=10, gamma=10.0)
    c=cafeMap( T=100000, Lambda=0.01, beta=0.1)
    result, folds= c.kFoldCV(instances, parallel=4)
コード例 #8
0
ファイル: toy_lin.py プロジェクト: foxtrotmike/cafemap
    Xp = np.repeat(np.atleast_2d(x),N,axis = 0)    
    Xn = np.repeat(np.atleast_2d(x[::-1]),N,axis = 0)    
    
    from circle import getCircle
    
    X = np.vstack((Xp,Xn))
    d = X.shape[1]
    Nu = nu*(2*np.random.rand(2*N,d)-1)
    print "NSR",np.mean(100*np.linalg.norm(Nu,axis=1)/np.linalg.norm(X,axis=1))       
    X+=Nu    
    Y = np.array([1]*N+[-1]*N)
    
    
    instances=createInstances(X, Y)
    
    classifier = cafeMap(Lambda = 1e-1, T = 1e3, no_bias = False)    
    result,folds = classifier.kFoldCV(instances, K = 5, gamma = 1e-3, folds = 5, shuffle = True, history = 100) #10-fold CV,, parallel = 3,
    scores,labels,classifiers = zip(*result)    
    Wb = np.array([c.localWb(instances) for c in classifiers])
    W = np.mean(Wb,axis = 0)[:-1]
    fpr,tpr,auc = roc_VA(zip(*(scores,labels))) 
    
    plt.figure()
    plt.plot(fpr,tpr)
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.axis([0,1,0,1])
    plt.grid()
    plt.title(str(auc))