def dlda(X_train, y_train, X_test, y_test, delta=DELTA): clf = mlpy.DLDA(delta=delta) y_train = np.transpose(y_train)[0] clf.learn(X_train, y_train) y_pred_train = clf.pred(X_train) y_pred_test = clf.pred(X_test) coef = np.mean(clf._dprime, axis=0) training_acc = metrics.accuracy_score(y_train, y_pred_train) testing_acc = metrics.accuracy_score(y_test, y_pred_test) return training_acc, testing_acc, np.array(coef)
def __init__(self): try: import mlpy except ImportError: grass.fatal(_("Cannot import mlpy (http://mlpy.sourceforge.net)" " library." " Please install it or ensure that it is on path" " (use PYTHONPATH variable).")) # Pytlit has a problem with this mlpy and v.class.mlpy.py # thus, warinings for objects from mlpy has to be disabled self.mlclassifier = mlpy.DLDA(delta=0.01) # pylint: disable=E1101
def dlda_exh(train_data,test_data,Y_train,Y_test,d,genes,indices): ################# DLDA-Error Estimation(exhaustive search) ############################# err_set =defaultdict(list) dlda = mlpy.DLDA(delta=0.5) for subset in itertools.combinations(indices,d): mismatch=0 selX =[] selX = select(train_data,list(subset)) dlda.learn(selX, Y_train) Y_pred = dlda.pred(selX) for i in range(len(Y_pred)): if int(Y_pred[i])!=int(Y_train[i]): mismatch += 1 err_set[float(mismatch)/float(len(Y_pred))].append(subset) print min(err_set.keys()), err_set[min(err_set.keys())] # Error estimate print [genes[x] for x in err_set[min(err_set.keys())][0]] selX = select(test_data,list(err_set[min(err_set.keys())][0])) mismatch=0 Y_pred = dlda.pred(selX) for i in range(len(Y_pred)): if int(Y_pred[i])!=int(Y_test[i]): mismatch += 1 print float(mismatch)/float(len(Y_test)) # test set error estimate
def dlda_fss(train_data,test_data,Y_train,Y_test,d,genes): ################# DLDA-Error Estimation(forward sequential search) ###################### selX = [] taken = defaultdict(int) final_error=0.0 final_subset=[] dlda = mlpy.DLDA(delta=0.5) for i in xrange(d): err_set =defaultdict(list) for j in xrange(train_data.shape[1]): if taken[j]!=0: continue mismatch=0 if(np.array(selX).shape[0]==0): selX.append(np.ravel(train_data[:,j])) selX = np.array(selX).transpose() else: selX = np.append(selX,train_data[:,j],axis=1) dlda.learn(selX, Y_train) Y_pred = dlda.pred(selX) for i in range(len(Y_pred)): if int(Y_pred[i])!=int(Y_train[i]): mismatch += 1 err_set[float(mismatch)/float(len(Y_train))].append(j) selX= np.delete(selX,-1,1) selX = np.append(selX,train_data[:,err_set[min(err_set.keys())][0]],axis=1) final_subset.append(err_set[min(err_set.keys())][0]) final_error = min(err_set.keys()) taken[err_set[min(err_set.keys())][0]]=1 print final_error, [genes[x] for x in final_subset] # Error estimate mismatch=0 selX = select(test_data,final_subset) Y_pred = dlda.pred(selX) for i in range(len(Y_pred)): if int(Y_pred[i])!=int(Y_test[i]): mismatch += 1 print float(mismatch)/float(len(Y_test)) # test set error estimate
print "Error in", modelName directory = raw_input("What directory are the XML files located:\n") regexParse = raw_input("How would you like to parse the words, leave it blank if you would like to parse by whitespace:\n") if(regexParse == ""): regexParse = None [vocab,indexToWord,fullDataPoints] = parseDataPoints(directory,regexParse) [X,Y] = packageData(fullDataPoints,regexParse,vocab, indexToWord) testModel(mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=1000), X, Y, "Perceptron") testModel(mlpy.ElasticNetC(lmb=0.01, eps=0.001),X,Y, "ElasticNet") testModel(mlpy.LibLinear(solver_type='l2r_l2loss_svc_dual', C=1), X, Y, "LibLinear") testModel(mlpy.DLDA(delta=0.1), X, Y, "DLDA") testModel(mlpy.Golub(), X, Y, "Golub") testModel(mlpy.Parzen(),X,Y,"Parzen") testModel(mlpy.KNN(2),X,Y,"KNN") testModel(mlpy.ClassTree(),X,Y,"Classification Tree") testModel(mlpy.MaximumLikelihoodC(),X,Y,"Maximum Likelihood Classifer")
perc = mlpy.Perceptron() perc.learn(x, y) test = perc.pred(xcontrol) # test points print 'Perceptron: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['perc'].append(100 * len(test[test == ycontrol]) / len(test)) elnet = mlpy.ElasticNetC(lmb=0.01, eps=0.001) elnet.learn(x, y) test = elnet.pred(xcontrol) # test points print 'Elastic Net: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['elnet'].append(100 * len(test[test == ycontrol]) / len(test)) da = mlpy.DLDA(delta=0.1) da.learn(x, y) test = da.pred(xcontrol) # test points print 'DLDA: %.1f percent predicted' % (100 * len(test[test == ycontrol]) / len(test)) dic['da'].append(100 * len(test[test == ycontrol]) / len(test)) golub = mlpy.Golub() golub.learn(x, y) test = golub.pred(xcontrol) # test points print 'Golub: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['golub'].append(100 * len(test[test == ycontrol]) / len(test)) knn = mlpy.KNN(k=7) knn.learn(x, y)