Exemple #1
0
    def metric(self):
        totalTimer = Timer()
        with totalTimer:
            model = mlpy.KNN(**self.build_opts)
            model.learn(self.data[0], self.data[1])

        metric = {}
        metric["runtime"] = totalTimer.ElapsedTime()
        return metric
Exemple #2
0
        def RunAllKnnMlpy(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the query
            # file.
            Log.Info("Loading dataset", self.verbose)
            if len(self.dataset) == 2:
                referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
                queryData = np.genfromtxt(self.dataset[1], delimiter=',')
            else:
                referenceData = np.genfromtxt(self.dataset, delimiter=',')

            # Labels are the last row of the dataset.
            labels = referenceData[:, (referenceData.shape[1] - 1)]
            referenceData = referenceData[:, :-1]

            try:
                with totalTimer:
                    # Get all the parameters.
                    if not "k" in options:
                        Log.Fatal(
                            "Required option: Number of furthest neighbors to find."
                        )
                        q.put(-1)
                        return -1
                    else:
                        k = options.pop("k")
                        if (k < 1 or k > referenceData.shape[0]):
                            Log.Fatal("Invalid k: " + k +
                                      "; must be greater than 0 " +
                                      "and less or equal than " +
                                      str(referenceData.shape[0]))
                            q.put(-1)
                            return -1

                    if len(options) > 0:
                        Log.Fatal("Unknown parameters: " + str(options))
                        raise Exception("unknown parameters")

                    # Perform All K-Nearest-Neighbors.
                    model = mlpy.KNN(k)
                    model.learn(referenceData, labels)

                    if len(self.dataset) == 2:
                        out = model.pred(queryData)
                    else:
                        out = model.pred(referenceData)
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Exemple #3
0
def knn_exh(train_data,test_data,Y_train,Y_test,d,genes,indices,K):   
    ################# 3NN-Error Estimation(exhaustive search)  #############################
    err_set =defaultdict(list)
    dlda = mlpy.KNN(k=K)
    for subset in itertools.combinations(indices,d):
        mismatch=0
        selX =[]
        selX = select(train_data,list(subset))
        dlda.learn(selX, Y_train)
        Y_pred = dlda.pred(selX) 
        for i in range(len(Y_pred)):
            if int(Y_pred[i])!=int(Y_train[i]): 
                mismatch += 1
        err_set[float(mismatch)/float(len(Y_pred))].append(subset)             
    print min(err_set.keys()), err_set[min(err_set.keys())]  # Error estimate 
    print [genes[x] for x in err_set[min(err_set.keys())][0]]
    selX = select(test_data,list(err_set[min(err_set.keys())][0]))
    mismatch=0
    Y_pred = dlda.pred(selX) 
    for i in range(len(Y_pred)):
        if int(Y_pred[i])!=int(Y_test[i]): 
            mismatch += 1
    print float(mismatch)/float(len(Y_test))# test set error estimate 
Exemple #4
0
def knn_fss(train_data,test_data,Y_train,Y_test,d,genes,K):
    ################# KNN-Error Estimation(forward sequential search) #############################
    selX = [] 
    taken = defaultdict(int)
    final_error=0.0
    final_subset=[]
    dlda = mlpy.KNN(k=K)
    for i in xrange(d):
        err_set =defaultdict(list)
        for j in xrange(train_data.shape[1]):
            if taken[j]!=0:
                continue
            mismatch=0
            if(np.array(selX).shape[0]==0):
                selX.append(np.ravel(train_data[:,j]))
                selX = np.array(selX).transpose()
            else:
                selX = np.append(selX,train_data[:,j],axis=1)
            dlda.learn(selX, Y_train)
            Y_pred = dlda.pred(selX) 
            for i in range(len(Y_pred)):
                if int(Y_pred[i])!=int(Y_train[i]): 
                    mismatch += 1
            err_set[float(mismatch)/float(len(Y_train))].append(j)    
            selX= np.delete(selX,-1,1)
        selX = np.append(selX,train_data[:,err_set[min(err_set.keys())][0]],axis=1)
        final_subset.append(err_set[min(err_set.keys())][0])
        final_error = min(err_set.keys())
        taken[err_set[min(err_set.keys())][0]]=1 
    print final_error, [genes[x] for x in final_subset] # Error estimate 
    mismatch=0
    selX = select(test_data,final_subset)
    Y_pred = dlda.pred(selX) 
    for i in range(len(Y_pred)):
        if int(Y_pred[i])!=int(Y_test[i]): 
            mismatch += 1
    print float(mismatch)/float(len(Y_test))      # test set error estimate 
Exemple #5
0
directory = raw_input("What directory are the XML files located:\n")
regexParse = raw_input("How would you like to parse the words, leave it blank if you would like to parse by whitespace:\n")
if(regexParse == ""):
	regexParse = None
[vocab,indexToWord,fullDataPoints] = parseDataPoints(directory,regexParse)
[X,Y] = packageData(fullDataPoints,regexParse,vocab, indexToWord)


testModel(mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=1000), X, Y, "Perceptron")
testModel(mlpy.ElasticNetC(lmb=0.01, eps=0.001),X,Y, "ElasticNet")
testModel(mlpy.LibLinear(solver_type='l2r_l2loss_svc_dual', C=1), X, Y, "LibLinear")
testModel(mlpy.DLDA(delta=0.1), X, Y, "DLDA")
testModel(mlpy.Golub(), X, Y, "Golub")
testModel(mlpy.Parzen(),X,Y,"Parzen")
testModel(mlpy.KNN(2),X,Y,"KNN")
testModel(mlpy.ClassTree(),X,Y,"Classification Tree")
testModel(mlpy.MaximumLikelihoodC(),X,Y,"Maximum Likelihood Classifer")












Exemple #6
0
import mlpy

BEST = {
    'knn': mlpy.KNN(1),
    'tree': mlpy.ClassTree(stumps=0, minsize=0),
    'svm': mlpy.LibSvm(svm_type='c_svc',
                       kernel=mlpy.KernelGaussian(10),
                       C=10000)
}
Exemple #7
0
 def BuildModel(self, data, labels):
     # Create and train the classifier.
     knc = mlpy.KNN(k=self.n_neighbors)
     knc.learn(data, labels)
     return knc
Exemple #8
0
import mlpy


BEST = {'knn': mlpy.KNN(1),
        'tree': mlpy.ClassTree(stumps=0, minsize=0),
        'svm': mlpy.LibSvm(svm_type='c_svc',
                           kernel=mlpy.KernelGaussian(10), C=10**4)
}

Exemple #9
0
def main(xfile,yfile,algorithm=""):
    x = np.loadtxt(open(xfile,"rb"),delimiter=" ")
    y = np.loadtxt(open(yfile,"rb"),delimiter=",")

    
    x,y = shuffle_in_unison_inplace(x,y)

    tr_size = 6000
    te_size = 4000

    xtrain = x[0:tr_size]
    xtest = x[tr_size:(tr_size+te_size)]


    ytrain =  y[0:tr_size]
    ytest = y[tr_size:(tr_size+te_size)]
    
    algorithms = ['l1r_l2loss_svc','l1r_lr']
    for algorithm in algorithms:
    	print algorithm
	ftest = open(str(algorithm) +'_Test.csv','w')
	ftrain = open(str(algorithm) +'_Train.csv','w')
	ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
	ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
        for i in range(1,10):
	    for b in range(1,20):
	    	
            	beta = .2 + .1*b
            	w={0:1, 1:(+i*.5)}
            	solver = mlpy.LibLinear(solver_type=algorithm, C=beta, eps=0.01, weight=w)
            	solver.learn(xtrain, ytrain)         

            	yhat = solver.pred(xtrain)
            	printStats(ytrain,yhat,algorithm,.0+i*.2,beta,"train errors",ftrain)
        
        	yhat = solver.pred(xtest)
            	printStats(ytest,yhat,algorithm,.0+i*.2,beta,"test errors", ftest)
	ftest.close()
	ftrain.close()
        
    print "kmeans"
    ftest = open("Kmeans"+'_Test.csv','w')
    ftrain = open("Kmeans" +'_Train.csv','w')
    ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
    ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
    solver = mlpy.KNN(2)
    solver.learn(xtrain, ytrain) 
    yhat = solver.pred(xtrain)
    printStats(ytrain,yhat,"Kmeans","none","none","train errors", ftrain)
    yhat = solver.pred(xtest)
    printStats(ytest,yhat,"Kmeans","none","none","test errors", ftest)
    ftest.close()
    ftrain.close()

    ftest = open("Classification" +'_Test.csv','w')
    print "Class"
    ftrain = open("Classification"+'_Train.csv','w')
    ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
    ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")    
    solver = mlpy.ClassTree()
    solver.learn(xtrain, ytrain)         
    yhat = solver.pred(xtrain)
    printStats(ytrain,yhat,"Classification Tree","none","none","train errors", ftrain)
    yhat = solver.pred(xtest)
    printStats(ytest,yhat,"Classification Tree","none","none","test errors", ftest)
    ftest.close()
    ftrain.close()
Exemple #10
0
    da = mlpy.DLDA(delta=0.1)
    da.learn(x, y)
    test = da.pred(xcontrol)  # test points
    print 'DLDA: %.1f percent predicted' % (100 * len(test[test == ycontrol]) /
                                            len(test))
    dic['da'].append(100 * len(test[test == ycontrol]) / len(test))

    golub = mlpy.Golub()
    golub.learn(x, y)
    test = golub.pred(xcontrol)  # test points
    print 'Golub: %.1f percent predicted' % (
        100 * len(test[test == ycontrol]) / len(test))
    dic['golub'].append(100 * len(test[test == ycontrol]) / len(test))

    knn = mlpy.KNN(k=7)
    knn.learn(x, y)
    test = knn.pred(xcontrol)  # test points
    print 'KNN: %.1f percent predicted' % (100 * len(test[test == ycontrol]) /
                                           len(test))
    dic['knn'].append(100 * len(test[test == ycontrol]) / len(test))

    tree = mlpy.ClassTree(stumps=0, minsize=100)
    tree.learn(x, y)
    test = tree.pred(xcontrol)  # test points
    print 'ClassTree: %.1f percent predicted' % (
        100 * len(test[test == ycontrol]) / len(test))
    dic['tree'].append(100 * len(test[test == ycontrol]) / len(test))

    rank = mlpy.rfe_w2(x, y, p=0, classifier=ld)
    print ''
Exemple #11
0
#KNN
import numpy as np
import matplotlib.pyplot as plt
import mlpy
np.random.seed(0)
mean1, cov1, n1 = [1, 5], [[1,1],[1,2]], 200  # 200 samples of class 1
x1 = np.random.multivariate_normal(mean1, cov1, n1)
y1 = np.ones(n1, dtype=np.int)
mean2, cov2, n2 = [2.5, 2.5], [[1,0],[0,1]], 300 # 300 samples of class 2
x2 = np.random.multivariate_normal(mean2, cov2, n2)
y2 = 2 * np.ones(n2, dtype=np.int)
mean3, cov3, n3 = [5, 8], [[0.5,0],[0,0.5]], 200 # 200 samples of class 3
x3 = np.random.multivariate_normal(mean3, cov3, n3)
y3 = 3 * np.ones(n3, dtype=np.int)
x = np.concatenate((x1, x2, x3), axis=0) # concatenate the samples
y = np.concatenate((y1, y2, y3))
knn = mlpy.KNN(k=3)
knn.learn(x, y)
xmin, xmax = x[:,0].min()-1, x[:,0].max()+1
ymin, ymax = x[:,1].min()-1, x[:,1].max()+1
xx, yy = np.meshgrid(np.arange(xmin, xmax, 0.1), np.arange(ymin, ymax, 0.1))
xnew = np.c_[xx.ravel(), yy.ravel()]
ynew = knn.pred(xnew).reshape(xx.shape)
ynew[ynew == 0] = 1 # set the samples with no unique classification to 1
fig = plt.figure(1)
cmap = plt.set_cmap(plt.cm.Paired) #This line is not working, rest is Ok
plot1 = plt.pcolormesh(xx, yy, ynew)
plot2 = plt.scatter(x[:,0], x[:,1], c=y)
plt.show()