def metric(self): totalTimer = Timer() with totalTimer: model = mlpy.KNN(**self.build_opts) model.learn(self.data[0], self.data[1]) metric = {} metric["runtime"] = totalTimer.ElapsedTime() return metric
def RunAllKnnMlpy(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the query # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') else: referenceData = np.genfromtxt(self.dataset, delimiter=',') # Labels are the last row of the dataset. labels = referenceData[:, (referenceData.shape[1] - 1)] referenceData = referenceData[:, :-1] try: with totalTimer: # Get all the parameters. if not "k" in options: Log.Fatal( "Required option: Number of furthest neighbors to find." ) q.put(-1) return -1 else: k = options.pop("k") if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k + "; must be greater than 0 " + "and less or equal than " + str(referenceData.shape[0])) q.put(-1) return -1 if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") # Perform All K-Nearest-Neighbors. model = mlpy.KNN(k) model.learn(referenceData, labels) if len(self.dataset) == 2: out = model.pred(queryData) else: out = model.pred(referenceData) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def knn_exh(train_data,test_data,Y_train,Y_test,d,genes,indices,K): ################# 3NN-Error Estimation(exhaustive search) ############################# err_set =defaultdict(list) dlda = mlpy.KNN(k=K) for subset in itertools.combinations(indices,d): mismatch=0 selX =[] selX = select(train_data,list(subset)) dlda.learn(selX, Y_train) Y_pred = dlda.pred(selX) for i in range(len(Y_pred)): if int(Y_pred[i])!=int(Y_train[i]): mismatch += 1 err_set[float(mismatch)/float(len(Y_pred))].append(subset) print min(err_set.keys()), err_set[min(err_set.keys())] # Error estimate print [genes[x] for x in err_set[min(err_set.keys())][0]] selX = select(test_data,list(err_set[min(err_set.keys())][0])) mismatch=0 Y_pred = dlda.pred(selX) for i in range(len(Y_pred)): if int(Y_pred[i])!=int(Y_test[i]): mismatch += 1 print float(mismatch)/float(len(Y_test))# test set error estimate
def knn_fss(train_data,test_data,Y_train,Y_test,d,genes,K): ################# KNN-Error Estimation(forward sequential search) ############################# selX = [] taken = defaultdict(int) final_error=0.0 final_subset=[] dlda = mlpy.KNN(k=K) for i in xrange(d): err_set =defaultdict(list) for j in xrange(train_data.shape[1]): if taken[j]!=0: continue mismatch=0 if(np.array(selX).shape[0]==0): selX.append(np.ravel(train_data[:,j])) selX = np.array(selX).transpose() else: selX = np.append(selX,train_data[:,j],axis=1) dlda.learn(selX, Y_train) Y_pred = dlda.pred(selX) for i in range(len(Y_pred)): if int(Y_pred[i])!=int(Y_train[i]): mismatch += 1 err_set[float(mismatch)/float(len(Y_train))].append(j) selX= np.delete(selX,-1,1) selX = np.append(selX,train_data[:,err_set[min(err_set.keys())][0]],axis=1) final_subset.append(err_set[min(err_set.keys())][0]) final_error = min(err_set.keys()) taken[err_set[min(err_set.keys())][0]]=1 print final_error, [genes[x] for x in final_subset] # Error estimate mismatch=0 selX = select(test_data,final_subset) Y_pred = dlda.pred(selX) for i in range(len(Y_pred)): if int(Y_pred[i])!=int(Y_test[i]): mismatch += 1 print float(mismatch)/float(len(Y_test)) # test set error estimate
directory = raw_input("What directory are the XML files located:\n") regexParse = raw_input("How would you like to parse the words, leave it blank if you would like to parse by whitespace:\n") if(regexParse == ""): regexParse = None [vocab,indexToWord,fullDataPoints] = parseDataPoints(directory,regexParse) [X,Y] = packageData(fullDataPoints,regexParse,vocab, indexToWord) testModel(mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=1000), X, Y, "Perceptron") testModel(mlpy.ElasticNetC(lmb=0.01, eps=0.001),X,Y, "ElasticNet") testModel(mlpy.LibLinear(solver_type='l2r_l2loss_svc_dual', C=1), X, Y, "LibLinear") testModel(mlpy.DLDA(delta=0.1), X, Y, "DLDA") testModel(mlpy.Golub(), X, Y, "Golub") testModel(mlpy.Parzen(),X,Y,"Parzen") testModel(mlpy.KNN(2),X,Y,"KNN") testModel(mlpy.ClassTree(),X,Y,"Classification Tree") testModel(mlpy.MaximumLikelihoodC(),X,Y,"Maximum Likelihood Classifer")
import mlpy BEST = { 'knn': mlpy.KNN(1), 'tree': mlpy.ClassTree(stumps=0, minsize=0), 'svm': mlpy.LibSvm(svm_type='c_svc', kernel=mlpy.KernelGaussian(10), C=10000) }
def BuildModel(self, data, labels): # Create and train the classifier. knc = mlpy.KNN(k=self.n_neighbors) knc.learn(data, labels) return knc
import mlpy BEST = {'knn': mlpy.KNN(1), 'tree': mlpy.ClassTree(stumps=0, minsize=0), 'svm': mlpy.LibSvm(svm_type='c_svc', kernel=mlpy.KernelGaussian(10), C=10**4) }
def main(xfile,yfile,algorithm=""): x = np.loadtxt(open(xfile,"rb"),delimiter=" ") y = np.loadtxt(open(yfile,"rb"),delimiter=",") x,y = shuffle_in_unison_inplace(x,y) tr_size = 6000 te_size = 4000 xtrain = x[0:tr_size] xtest = x[tr_size:(tr_size+te_size)] ytrain = y[0:tr_size] ytest = y[tr_size:(tr_size+te_size)] algorithms = ['l1r_l2loss_svc','l1r_lr'] for algorithm in algorithms: print algorithm ftest = open(str(algorithm) +'_Test.csv','w') ftrain = open(str(algorithm) +'_Train.csv','w') ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") for i in range(1,10): for b in range(1,20): beta = .2 + .1*b w={0:1, 1:(+i*.5)} solver = mlpy.LibLinear(solver_type=algorithm, C=beta, eps=0.01, weight=w) solver.learn(xtrain, ytrain) yhat = solver.pred(xtrain) printStats(ytrain,yhat,algorithm,.0+i*.2,beta,"train errors",ftrain) yhat = solver.pred(xtest) printStats(ytest,yhat,algorithm,.0+i*.2,beta,"test errors", ftest) ftest.close() ftrain.close() print "kmeans" ftest = open("Kmeans"+'_Test.csv','w') ftrain = open("Kmeans" +'_Train.csv','w') ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") solver = mlpy.KNN(2) solver.learn(xtrain, ytrain) yhat = solver.pred(xtrain) printStats(ytrain,yhat,"Kmeans","none","none","train errors", ftrain) yhat = solver.pred(xtest) printStats(ytest,yhat,"Kmeans","none","none","test errors", ftest) ftest.close() ftrain.close() ftest = open("Classification" +'_Test.csv','w') print "Class" ftrain = open("Classification"+'_Train.csv','w') ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") solver = mlpy.ClassTree() solver.learn(xtrain, ytrain) yhat = solver.pred(xtrain) printStats(ytrain,yhat,"Classification Tree","none","none","train errors", ftrain) yhat = solver.pred(xtest) printStats(ytest,yhat,"Classification Tree","none","none","test errors", ftest) ftest.close() ftrain.close()
da = mlpy.DLDA(delta=0.1) da.learn(x, y) test = da.pred(xcontrol) # test points print 'DLDA: %.1f percent predicted' % (100 * len(test[test == ycontrol]) / len(test)) dic['da'].append(100 * len(test[test == ycontrol]) / len(test)) golub = mlpy.Golub() golub.learn(x, y) test = golub.pred(xcontrol) # test points print 'Golub: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['golub'].append(100 * len(test[test == ycontrol]) / len(test)) knn = mlpy.KNN(k=7) knn.learn(x, y) test = knn.pred(xcontrol) # test points print 'KNN: %.1f percent predicted' % (100 * len(test[test == ycontrol]) / len(test)) dic['knn'].append(100 * len(test[test == ycontrol]) / len(test)) tree = mlpy.ClassTree(stumps=0, minsize=100) tree.learn(x, y) test = tree.pred(xcontrol) # test points print 'ClassTree: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['tree'].append(100 * len(test[test == ycontrol]) / len(test)) rank = mlpy.rfe_w2(x, y, p=0, classifier=ld) print ''
#KNN import numpy as np import matplotlib.pyplot as plt import mlpy np.random.seed(0) mean1, cov1, n1 = [1, 5], [[1,1],[1,2]], 200 # 200 samples of class 1 x1 = np.random.multivariate_normal(mean1, cov1, n1) y1 = np.ones(n1, dtype=np.int) mean2, cov2, n2 = [2.5, 2.5], [[1,0],[0,1]], 300 # 300 samples of class 2 x2 = np.random.multivariate_normal(mean2, cov2, n2) y2 = 2 * np.ones(n2, dtype=np.int) mean3, cov3, n3 = [5, 8], [[0.5,0],[0,0.5]], 200 # 200 samples of class 3 x3 = np.random.multivariate_normal(mean3, cov3, n3) y3 = 3 * np.ones(n3, dtype=np.int) x = np.concatenate((x1, x2, x3), axis=0) # concatenate the samples y = np.concatenate((y1, y2, y3)) knn = mlpy.KNN(k=3) knn.learn(x, y) xmin, xmax = x[:,0].min()-1, x[:,0].max()+1 ymin, ymax = x[:,1].min()-1, x[:,1].max()+1 xx, yy = np.meshgrid(np.arange(xmin, xmax, 0.1), np.arange(ymin, ymax, 0.1)) xnew = np.c_[xx.ravel(), yy.ravel()] ynew = knn.pred(xnew).reshape(xx.shape) ynew[ynew == 0] = 1 # set the samples with no unique classification to 1 fig = plt.figure(1) cmap = plt.set_cmap(plt.cm.Paired) #This line is not working, rest is Ok plot1 = plt.pcolormesh(xx, yy, ynew) plot2 = plt.scatter(x[:,0], x[:,1], c=y) plt.show()