def perceptron_histo(): "Interprétation des images comme histogrammes de couleurs et classification via le Perceptron" alphas = np.arange(0.01,1.01,0.1) best=np.zeros(4) _, data, target, _ = utils.chargementHistogrammesImages(mer,ailleurs,1,-1) X_train,X_test,Y_train,Y_test=train_test_split(data,target,test_size=0.3,random_state=random.seed()) for iterations in range(1,5): for a in alphas: start_time = time.time() p = Perceptron(alpha=a, n_iter=iterations, random_state=random.seed(), n_jobs=-1) x1=np.array(X_train) x2=np.array(X_test) p.fit(X=x1, y=Y_train) score = p.score(x2,Y_test) end_time = time.time() if score>best[0]: best[0] = score best[1] = a best[2] = iterations best[3] = end_time-start_time print("| Perceptron simple | V.Histo | alpha={:1.2f} iterations={:1.0f} | {:10.3f}ms | {:1.3f} |".format(best[1],best[2],best[3]*1000,best[0]))
def perceptron_vecteur(): "Interprétation des images comme vecteurs de pixels et classification via le Perceptron" alphas = np.arange(0.01,1.01,0.1) best=np.zeros(5) for npix in range(50,200,50): _, data, target, _ = utils.chargementVecteursImages(mer,ailleurs,1,-1,npix) X_train,X_test,Y_train,Y_test=train_test_split(data,target,test_size=0.3,random_state=random.seed()) for iterations in range(1,5): for a in alphas: start_time = time.time() p = Perceptron(alpha=a, n_iter=iterations, random_state=random.seed(), n_jobs=-1) #X_train, etc, sont des tableaux à 3 dimensiosn par défaut, (93,1,30000) par exemple, qu'il faut remmener en 2 dimensions x1=np.array(X_train) x1 = np.reshape(x1, (x1.shape[0],x1.shape[2])) x2=np.array(X_test) x2 = np.reshape(x2, (x2.shape[0],x2.shape[2])) p.fit(X=x1, y=Y_train) score = p.score(x2,Y_test) end_time = time.time() if score>best[0]: best[0] = score best[1] = a best[2] = iterations best[3] = end_time-start_time best[4] = npix print("| Perceptron simple | V.Pix {:4.0f} | alpha={:1.2f} iterations={:1.0f} | {:10.3f}ms | {:1.3f} |".format(best[4],best[1],best[2],best[3]*1000,best[0]))
def t(): # 1 from pandas import read_csv df = read_csv('w2/perceptron-train.csv', header=None) dt = read_csv('w2/perceptron-test.csv', header=None) yf = df[0] xf = df.drop([0], axis=1) # print(yf, xf) yt = dt[0] xt = dt.drop([0], axis=1) # print(yt, xt) # 2 from sklearn.linear_model import Perceptron clf = Perceptron(random_state=241) clf.fit(xf, yf) af1 = clf.score(xf, yf) at1 = clf.score(xt, yt) rf = clf.predict(xf) rt = clf.predict(xt) # print(list(yf)) # print(pf) # print(list(yt)) # print(pt) # 3 from sklearn.metrics import accuracy_score af = accuracy_score(yf, rf) at = accuracy_score(yt, rt) print(af, at) print(af1, at1) # 4 from sklearn.preprocessing import StandardScaler scaler = StandardScaler() xfs = scaler.fit_transform(xf) xts = scaler.transform(xt) clf.fit(xfs, yf) afs1 = clf.score(xfs, yf) ats1 = clf.score(xts, yt) pfs = clf.predict(xfs) pts = clf.predict(xts) afs = accuracy_score(yf, pfs) ats = accuracy_score(yt, pts) print(afs, ats) print(afs1, ats1) pf('5', round(ats - at, 3))
def neural_net(train, test): y = [] xTrain, yTrain = loadData(train) xTest, yTest = loadData(test) nN = Perceptron() nN.fit(xTrain, yTrain) y = nN.predict(xTest) testError = 1 - nN.score(xTest, yTest) print 'Test error: ' , testError return y
def neural_net(train, test): y = [] trainY, trainX = loadData(train) testY, testX = loadData(test) neuralNet = Perceptron() neuralNet.fit(trainX, trainY) y = neuralNet.predict(testX) testError = 1 - neuralNet.score(testX, testY) print 'Test error: ' + str(testError) return y
def test_model(training_data, testing_data, word2vec_model): v = DictVectorizer() train_features, train_labels = build_features(training_data, word2vec_model, v, 'train') test_features, test_labels = build_features(testing_data, word2vec_model, v) # create the perceptron model model = Perceptron(n_iter = 5) # fit the model to the training data model.fit(train_features, train_labels) # get the accuracy on the testing data accuracy = model.score(test_features, test_labels) return accuracy
def __Accuracy(dataDict, parameterDict): train_X = dataDict['train_X'] train_Y = dataDict['train_Y'] cross_X = dataDict['cross_X'] cross_Y = dataDict['cross_Y'] penalty = parameterDict['penalty'] alpha = parameterDict['alpha'] fit_intercept = parameterDict['fit_intercept'] n_iter = parameterDict['n_iter'] shuffle = parameterDict['shuffle'] eta0 = parameterDict['eta0'] clf = Perceptron(penalty=penalty, alpha=alpha, fit_intercept=fit_intercept, n_iter=n_iter, shuffle=shuffle, random_state=1, eta0=eta0, warm_start=False) model = clf.fit(train_X, train_Y) # All features must be float. accuracy = clf.score(cross_X, cross_Y) # Score=Accuracy=(TP+TN)/(TP+TN+FP+FN)=%Correct return accuracy
def main( argv ): try: training_filename = argv[ 1 ] testing_filename = argv[ 2 ] output_filename = argv[ 3 ] except IndexError: print( "Error, usage: \"python3 {} <training> <testing> <output>\"".format( argv[ 0 ] ) ) return Training_DataFrame = pd.read_csv( training_filename ) X = Training_DataFrame.ix[:,0:-1] Y = Training_DataFrame.ix[:,-1] Testing_DataFrame = pd.read_csv( testing_filename ) testing_X = Testing_DataFrame.ix[:,0:-1] testing_Y = Testing_DataFrame.ix[:,-1] ''' Perceptron ''' from sklearn.linear_model import Perceptron # Hyper Parameters: alpha = 0.0001 n_iter = 20 # Fit Classifier print( "{} Started training".format( str( datetime.now() ) ) ) P_classifier = Perceptron( alpha = alpha, n_iter = n_iter ) P_classifier.fit( X, Y ) print( "{} Stopped training".format( str( datetime.now() ) ) ) # Report results P_score = P_classifier.score( testing_X, testing_Y ) print( "\nPerceptron Accuracy:", P_score )
def train(a,sizel,intercept): d = a.copy() pes = Perceptron(n_jobs=4,n_iter=500,fit_intercept=intercept) # d = d.tolist() train = d[:len(d)/sizel] C = d[len(d)/sizel:] train_res = numpy.zeros(shape=(len(train)))#[0.0 for i in range(len(train))] C_res = numpy.zeros(shape=(len(C)))#[0.0 for i in range(len(C))] # C = [0.0 for i in range(len(C))] class_index = len(d[0])-1 for i in range(len(train)): train_res[i] = (train[i][class_index] > 1)# and train[i][class_index] < 16) train[i][class_index] = 0 C_res[i] = (C[i][class_index]> 1)# and C[i][class_index] < 16) C[i][class_index] = 0 pes.fit(train,train_res) output = pes.predict(C) (falsepr, truepr, thr) = roc_curve(C_res, output, 1) area = auc(falsepr, truepr) output = pes.score(C,C_res) return (output, area)
# generate a random prediction (majority class) ns_probs = [0 for _ in range(len(y_test))] clf = Perceptron(eta0=0.1, random_state=0, max_iter=1000) clf.fit(X_train, y_train.argmax(axis=1)) # Split dataset in to Train:Test - 75:25 # Instead of targets, store output as prediction probabilities y_score = clf.predict(X_test) clf_predict = clf.predict(X_test) clf_predict_on_train = clf.predict(X_train) # Accuracy factors print('acc for training data: {:.3f}'.format( clf.score(X_train, y_train.argmax(axis=1)))) print('acc for test data: {:.3f}'.format( clf.score(X_test, y_test.argmax(axis=1)))) print('MLP Classification report:\n\n', classification_report(y_test.argmax(axis=1), clf_predict)) # disp = metrics.plot_confusion_matrix(clf, X_test, y_test.argmax(axis=1)) # disp.figure_.suptitle("Confusion Matrix") # print("Confusion matrix:\n%s" % disp.confusion_matrix) # # # plt.show() # # cm = confusion_matrix(y_test.argmax(axis=1), clf_predict) cm_on_train = confusion_matrix(y_train.argmax(axis=1), clf_predict_on_train) # print(cm)
mat = Arff("standardVoting.arff",label_count=1) data = mat.data[:,0:-1] labels = mat.data[:,-1] X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3) # In[21]: ptron.fit(X_train,y_train) # In[22]: ptron.score(X_test,y_test) # We see that our naive perceptron does fairly well compaired to the sklearn version. # # 6. Iris Data Set # In[23]: from sklearn.datasets import load_iris # In[24]:
def test_perceptron_accuracy(): for data in (X, X_csr): clf = Perceptron(max_iter=100, tol=None, shuffle=False) clf.fit(data, y) score = clf.score(data, y) assert score > 0.7
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) # 为了看模型在没有见过数据集上的表现,随机拿出数据集中30%的部分做测试 # 为了追求机器学习和最优化算法的最佳性能,我们将特征缩放 from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(X_train) # 估算每个特征的平均值和标准差 sc.mean_ # 查看特征的平均值,由于Iris我们只用了两个特征,结果是array([ 3.82857143, 1.22666667]) sc.scale_ # 查看特征的标准差,结果是array([ 1.79595918, 0.77769705]) X_train_std = sc.transform(X_train) # 注意:这里我们要用同样的参数来标准化测试集,使得测试集和训练集之间有可比性 X_test_std = sc.transform(X_test) # 训练感知机模型 from sklearn.linear_model import Perceptron # n_iter:可以理解成梯度下降中迭代的次数 # eta0:可以理解成梯度下降中的学习率 # random_state:设置随机种子的,为了每次迭代都有相同的训练集顺序 ppn = Perceptron(n_iter=40, eta0=0.2, random_state=0) ppn.fit(X_train_std, y_train) # 分类测试集,这将返回一个测试结果的数组 y_pred = ppn.predict(X_test_std) # 计算模型在测试集上的准确性 print(y_pred) print(ppn.coef_) print(ppn.n_iter_) print(ppn.intercept_) print(accuracy_score(y_test, y_pred)) print(ppn.score(X_test_std, y_test))
def CheckingClassifer(ClassiferName): if ClassiferName == "perceptron": #perceptron classifer with fit and predict function #computing Running Time and Accuracy print( "-----------------------------Perceptron------------------------------------------------" ) start_time = time.time() pop = Perceptron(penalty=None, alpha=0.0001, fit_intercept=True, max_iter=50, tol=None, shuffle=True, verbose=0, eta0=0.01, n_jobs=None, random_state=0, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight=None, warm_start=False) pop.fit(X, y) pop.predict(Xtest) print("--- %s seconds ---" % (time.time() - start_time)) print(pop.score(Xtest, ytest)) elif ClassiferName == "RBFSVC": #SVM classifier with RBF Kernel # computing Running Time and Accuracy print( "------------------------NON linear SVC-------------------------------------" ) start_time = time.time() pip = SVC(gamma='auto', C=15) df = pip.fit(X, y) dd = pip.predict(Xtest) print(dd) print("--- %s seconds ---" % (time.time() - start_time)) print(pip.score(Xtest, ytest)) elif ClassiferName == "LinerSVC": #the SVC Classifier with linear Kernel with fit and predict function # computing Running Time and Accuracy print( "------------------------linear SVC-------------------------------------" ) start_time = time.time() pip = SVC(gamma='auto', kernel='linear') df = pip.fit(X, y) dd = pip.predict(Xtest) print(dd) print("--- %s seconds ---" % (time.time() - start_time)) print(pip.score(Xtest, ytest)) elif ClassiferName == "TreeDescion": #the descision tree classifer with fit and predict function # computing Running Time and Accuracy print( "-----------------------------TreeDescion---------------------------------------" ) start_time = time.time() clf = DecisionTreeClassifier(random_state=0, max_depth=15) df = clf.fit(X, y) dd = clf.predict(Xtest) print(dd) print("--- %s seconds ---" % (time.time() - start_time)) print(clf.score(Xtest, ytest)) elif ClassiferName == "KNN": #the K-neaset neighbors with fit and predict function # computing Running Time and Accuracy print( "--------------------------KNN--------------------------------------------" ) start_time = time.time() model = KNeighborsClassifier(n_neighbors=1) nr = model.fit(X, y) nrd = model.predict(Xtest) print("--- %s seconds ---" % (time.time() - start_time)) print("Accuracy:", metrics.accuracy_score(ytest, nrd)) elif ClassiferName == "LG": #this is logestic Regression Classifier with fit and predict method # computing Running Time and Accuracy print( "-------------------------LG---------------------------------------------" ) start_time = time.time() model2 = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, max_iter=500, solver='liblinear') nr1 = model2.fit(X, y) nrd1 = model2.predict(Xtest) print("--- %s seconds ---" % (time.time() - start_time)) print("Accuracy:", metrics.accuracy_score(ytest, nrd1))
'examples_file', default=None, help= 'Exemples utilisés comme voisins pour la prédiction KNN (au format .examples)' ) parser.add_argument('test_file', default=None, help='Exemples de test (au format .examples)') parser.add_argument('--tfidf', '-i', action='store_true', help='Exemples de test (au format .examples)') args = parser.parse_args() #------------------------------------------------------------ if args.tfidf: vectorizer = TfidfVectorizer(token_pattern=r"\w+") else: vectorizer = CountVectorizer(token_pattern=r"\w+") # Chargement des exemples d'apprentissage du classifieur KNN Y_train, X_train = read_examples(args.examples_file, vectorizer) # Chargement des exemples de test Y_test, X_test = read_examples(args.test_file, vectorizer, False) #Creation des matrices perceptron = Perceptron().fit(X_train, Y_train) print(perceptron.score(X_test, Y_test))
data_train = read_csv('perceptron-train.csv', header=None) data_test = read_csv('perceptron-test.csv', header=None) y_train = data_train[data_train.columns[0]] y_test = data_test[data_test.columns[0]] X_train = data_train[data_train.columns[1:]] X_test = data_test[data_test.columns[1:]] perceptron = Perceptron(random_state=241) perceptron.fit(X_train, y_train) accuracy = perceptron.score(X_test, y_test) print() print('Accuracy:', accuracy) scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) perceptron2 = Perceptron(random_state=241) perceptron2.fit(X_train_scaled, y_train) accuracy2 = perceptron2.score(X_test_scaled, y_test) accuracy_delta = accuracy2 - accuracy
y_pred = knn.predict(x_test) acc_knn = round(knn.score(x_train, y_train)*100, 2) print("KNN Acc: ", acc_knn) # Gaussian Naive Bayes gaussian = GaussianNB() gaussian.fit(x_train, y_train) y_pred = gaussian.predict(x_test) acc_gaussian = round(gaussian.score(x_train, y_train)*100, 2) print("Gaussian NB Acc: ", acc_gaussian) # perceptron perceptron = Perceptron() perceptron.fit(x_train, y_train) y_pred = perceptron.predict(x_test) acc_perceptron = round(perceptron.score(x_train, y_train)*100, 2) print("Perceptron Acc: ", acc_perceptron) # Linear SVC linear_svc = LinearSVC() linear_svc.fit(x_train, y_train) y_pred = linear_svc.predict(x_test) acc_linear_svc = round(linear_svc.score(x_train, y_train)*100, 2) print("Linear SVC Acc: ", acc_linear_svc) # SGD sgd = SGDClassifier() sgd.fit(x_train, y_train) y_pred = sgd.predict(x_test) acc_sgd = round(sgd.score(x_train, y_train)*100, 2) print("SGD Acc: ", acc_sgd)
y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) # In[10]: from sklearn.metrics import accuracy_score print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) # In[11]: print('Accuracy: %.2f' % ppn.score(X_test_std, y_test)) # In[12]: from matplotlib.colors import ListedColormap import matplotlib.pyplot as plt def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): # setup marker generator and color map markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))])
# Perceptron import numpy as np from sklearn import datasets from sklearn.linear_model import Perceptron # load the diabetes datasets dataset = datasets.load_diabetes() # fit a Perceptron model to the data model = Perceptron() model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model mse = np.mean((predicted-expected)**2) print(mse) print(model.score(dataset.data, dataset.target))
train = pandas.read_csv('perceptron-train.csv') test = pandas.read_csv('perceptron-test.csv') y = train[['class']] X = train[['p1', 'p2']] y_test = test[['class']] X_test = test[['p1', 'p2']] ''' 2. Обучите персептрон со стандартными параметрами и random_state=241. ''' clf = Perceptron(random_state=241) clf.fit(X, y) ''' 3. Подсчитайте качество (долю правильно классифицированных объектов, accuracy) полученного классификатора на тестовой выборке. ''' scores = clf.score(X_test, y_test) print("score of simple data clf = %0.3f" % scores) print("use metric acc = %0.3f " % accuracy_score(y_test, clf.predict(X_test))) ''' 4. Нормализуйте обучающую и тестовую выборку с помощью класса StandardScaler. ''' scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X) X_test_scaled = scaler.transform(X_test) ''' 5. Обучите персептрон на новых выборках. Найдите долю правильных ответов на тестовой выборке. ''' clf2 = Perceptron(random_state=241) clf2.fit(X_train_scaled, y)
classifierNB=LinearSVC(C=5.0) #0.75 #classifier= GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, # max_depth=1, random_state=0) #classifier= RandomForestClassifier() classifier=Perceptron(penalty=None, alpha=0.0001, fit_intercept=True, n_iter=5, shuffle=True, verbose=0, eta0=1.0, n_jobs=1, random_state=0, class_weight=None, warm_start=False) #classifierKNN=KNeighborsClassifier(n_neighbors=3, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None) #classifierNB=MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True) classifierSGDC=SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, n_iter=5, shuffle=True, verbose=0, epsilon=0.1, n_jobs=1, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, class_weight=None, warm_start=False) #0.65 #classifier=LDA(n_components=None, priors=None, shrinkage=None,store_covariance=False, tol=0.0001) #fit the model on the training data classifier.fit(transformed_train,pol_train) classifierSGDC.fit(transformed_train,pol_train) classifierNB.fit(transformed_train,pol_train) #get the accuracy on the test data print ' lregACCURACY:\t',classifier.score(transformed_test,pol_test) #0.866095238095 #print 'PREDICTED:\t',classifier.predict(transformed_test) #print 'CORRECT:\t', array(pol_test) logit_list = list(array(classifier.predict(transformed_test))) #print logit_list print 'sgdc ACCURACY:\t',classifierSGDC.score(transformed_test,pol_test) #print 'PREDICTED:\t',classifierSGDC.predict(transformed_test) #print 'CORRECT:\t', array(pol_test) #print 'SGDC_List' SGDC_list = list(array(classifier.predict(transformed_test))) print ' nbACCURACY:\t',classifierNB.score(transformed_test,pol_test) #print 'PREDICTED:\t',classifierNB.predict(transformed_test) #print 'NB_List'
X = np.array([[1,1], [2,2], [4,4], [5,5]]) y = np.array([-1, -1, 1, 1]) w = np.array([0,99, 5]) Example2Perceptron = Perceptron(X,y,plot_data_lines = True, plot_errors = True) w_ex2 = Example2Perceptron.train(w,epochs = 20) print(w_ex2) from sklearn.linear_model import Perceptron sk_perceptron = Perceptron(tol=1e-5, random_state=0) sk_perceptron.fit(X,y) print(sk_perceptron.score(X,y)) print(sk_perceptron.get_params()) print([sk_perceptron.coef_, sk_perceptron.intercept_]) print(sk_perceptron.n_iter_) sk_bigdata = Perceptron(max_iter = 1000, eta0=0.1, tol=1e-5, random_state=0) sk_bigdata.fit(X_train, y_train) print([sk_bigdata.coef_, sk_bigdata.intercept_]) print('Accuracy Training= ',sk_bigdata.score(X_train, y_train)*100) print('Accuracy Testing= ',sk_bigdata.score(X_test, y_test)*100) ciplakAyak = X-np.mean(X, axis = 0) cov = np.dot(ciplakAyak.T,ciplakAyak)/X.shape[0] print(cov) cov_numpy = np.cov(X, rowvar = False, ddof = 0)
def test_perceptron_accuracy(): for data in (X, X_csr): clf = Perceptron(max_iter=100, tol=None, shuffle=False) clf.fit(data, y) score = clf.score(data, y) assert_greater(score, 0.7)
print "Training..." for i in range(10): random.shuffle(rawData) trainClass = [] trainData = [] testClass = [] testData = [] for i in range(len(rawData)): if i%10 == 0: testClass.append(rawData[i][0]) testData.append(rawData[i][1:]) else: trainClass.append(rawData[i][0]) trainData.append(rawData[i][1:]) trainClass = np.array(trainClass) trainData = np.array(trainData) testClass = np.array(testClass) testData = np.array(testData) model = Perceptron() model.fit(trainData, trainClass) model1 = tree.DecisionTreeClassifier(max_depth=3) model1.fit(trainData, trainClass) print model.score(testData, testClass) dot_data = StringIO() tree.export_graphviz(model1, out_file=dot_data) graph = pydot.graph_from_dot_data(dot_data.getvalue()) graph.write_pdf("tree.pdf")
knn = KNeighborsClassifier(n_neighbors=19, weights='distance') knn.fit(X_train, Y_train) Y_pred = knn.predict(X_test) Y_pred.tolist() Y_test.tolist() acc_knn = round(knn.score(X_test, Y_test) * 100, 2) acc_knn #------------------------------------------------------------------------------------------------------- # Perceptron perceptron = Perceptron() perceptron.fit(X_train, Y_train) Y_pred = perceptron.predict(X_test) Y_pred.tolist() Y_test.tolist() acc_perceptron = round(perceptron.score(X_test, Y_test) * 100, 2) acc_perceptron #------------------------------------------------------------------------------------------------------- #�����ɸ� 2 # Linear SVC linear_svc = LinearSVC() linear_svc.fit(X_train, Y_train) Y_pred = linear_svc.predict(X_test) Y_pred.tolist() Y_test.tolist() acc_linear_svc = round(linear_svc.score(X_test, Y_test) * 100, 2) acc_linear_svc #-------------------------------------------------------------------------------------------------------
def main( argv ): try: input_csv = argv[ 1 ] output_model = argv[ 2 ] except IndexError: print( "Error, usage: \"python3 {} <input_csv> <output_csv>\"".format( argv[ 0 ] ) ) return df = pd.read_csv( input_csv ) convert_to_num = True print_individual_classifier_accuracies( df ) # Split data into test and train msk = np.random.rand( len( df ) ) < 0.8 Training_DataFrame = df[ msk ].copy() if convert_to_num: X = Training_DataFrame.ix[:,0:-1].applymap( str_to_num ) Y = Training_DataFrame.ix[:,-1].map( str_to_num ) else: X = Training_DataFrame.ix[:,0:-1] Y = Training_DataFrame.ix[:,-1] Testing_DataFrame = df[ ~msk ].copy() if convert_to_num: testing_X = Testing_DataFrame.ix[:,0:-1].applymap( str_to_num ) testing_Y = Testing_DataFrame.ix[:,-1].map( str_to_num ) else: testing_X = Testing_DataFrame.ix[:,0:-1] testing_Y = Testing_DataFrame.ix[:,-1] print( "\nTraining on Classifier Predictions:" ) ''' LINEAR CLASSIFIERS ''' print( "Linear Classifiers\n" ) ''' Logistic Regression ''' from sklearn.linear_model import LogisticRegression # Hyper Parameters: tol = 0.0001 # Fit Classifier LR_classifier = LogisticRegression( ) LR_classifier.fit( X, Y ) # Report results LR_score = LR_classifier.score( testing_X, testing_Y ) printAccuracy( "Logistic Regression", LR_score ) # ''' Perceptron ''' from sklearn.linear_model import Perceptron # Hyper Parameters: # Fit Classifier P_classifier = Perceptron( ) P_classifier.fit( X, Y ) # Report results P_score = P_classifier.score( testing_X, testing_Y ) printAccuracy( "Perceptron", P_score ) # ''' Gaussian Naive Bayes ''' from sklearn.naive_bayes import GaussianNB # Hyper Parameters # Fit Classifier MNB_classifier = GaussianNB( ) MNB_classifier.fit( X, Y ) # Report results MNB_score = MNB_classifier.score( testing_X, testing_Y ) printAccuracy( "Gaussian Naive Bayes", MNB_score ) # ''' Linear Support Vector Machine ( SVM ) ''' from sklearn.svm import LinearSVC # Hyper Parameters # Fit Classifier LSVC_classifier = LinearSVC( ) LSVC_classifier.fit( X, Y ) # Report results LSVC_score = LSVC_classifier.score( testing_X, testing_Y ) printAccuracy( "Linear SVM", LSVC_score ) # ''' NONLINEAR ALGOS ''' print( "\nNonlinear Classifiers\n" ) ''' Decision Tree ''' from sklearn.tree import DecisionTreeClassifier # Hyper Parameters # Fit Classifier DT_classifier = DecisionTreeClassifier( ) DT_classifier.fit( X, Y ) # Report results DT_score = DT_classifier.score( testing_X, testing_Y ) printAccuracy( "Decision Tree", DT_score ) # ''' Random Forest ''' from sklearn.ensemble import RandomForestClassifier # Hyper Parameters n_estimators = 22 # Fit Classifier RF_classifier = RandomForestClassifier( n_estimators=n_estimators ) RF_classifier.fit( X, Y ) # Report results RF_score = RF_classifier.score( testing_X, testing_Y ) printAccuracy( "Random Forest", RF_score ) # ''' KNN ''' from sklearn.neighbors import KNeighborsClassifier # Hyper Parameters n_neighbors = 20 # Fit Classifier KNN_classifier = KNeighborsClassifier( ) KNN_classifier.fit( X, Y ) # Report results KNN_score = KNN_classifier.score( testing_X, testing_Y ) printAccuracy( "KNN", KNN_score ) # ''' VOTING ''' print( "\nMajority Vote Classifier\n" ) V_correct = 0 V_incorrect = 0 V_total = len( testing_X ) for idx, row in testing_X.iterrows(): prediction = Counter( row ).most_common()[0][0] if testing_Y[ idx ] == prediction: V_correct += 1 else: V_incorrect += 1 printAccuracy( "Voting", V_correct / V_total ) print( "\n\nDone." )
knn = KNeighborsClassifier(n_neighbors = 3) knn.fit(X_train, Y_train) Y_pred = knn.predict(X_test) acc_knn = round(knn.score(X_train, Y_train) * 100, 2) #Gaussian Naive Bayes: gaussian = GaussianNB() gaussian.fit(X_train, Y_train) Y_pred = gaussian.predict(X_test) acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2) #Perceptron: perceptron = Perceptron(max_iter=5) perceptron.fit(X_train, Y_train) Y_pred = perceptron.predict(X_test) acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2) #Linear Support Vector Machine: linear_svc = LinearSVC() linear_svc.fit(X_train, Y_train) Y_pred = linear_svc.predict(X_test) acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2) #Decision Tree decision_tree = DecisionTreeClassifier() decision_tree.fit(X_train, Y_train) Y_pred = decision_tree.predict(X_test) acc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2) """==================================== Which is the best Model ? ================================"""
# Step 1: Fit the CountVectorizer to the trainTweets countVec.fit(trainTweets) print "Vocab of countVec" print countVec.get_feature_names() # Step 2: Implement getFeautres() to return a feature matrix for any # list of tweets. #Now get train features. trainX = getFeatures(trainTweets, countVec, dictVec, True, True) perceptron.fit(trainX, trainY) #Get features and labels for development set. devSet = p.load(open(devSetPath, 'rb')) devTweets = [d[0] for d in devSet] devX = getFeatures(devTweets, countVec, dictVec) devY = [d[1] for d in devSet] print "Train label distribution", getLabelDist(devY) # Predict labels for devSet perceptron.predict(devX) #Print out accuracy for trainSet print "Train set accuracy:", perceptron.score(trainX, trainY) #Print out accuracy for devSet print "Dev set accuracy:", perceptron.score(devX, devY)
# Standardizing the features: sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) # ## Training a perceptron via scikit-learn ppn = Perceptron(max_iter=40, eta0=0.1, random_state=1) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified examples: %d' % (y_test != y_pred).sum()) print('Accuracy: %.3f' % accuracy_score(y_test, y_pred)) print('Accuracy: %.3f' % ppn.score(X_test_std, y_test)) # Training a perceptron model using the standardized training data: X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout()
def main( argv ): try: input_csv_filename = argv[ 1 ] output_csv_filename = argv[ 2 ] except IndexError: print( "Error, usage: \"python3 {} <CSV> <output_CSV>\"".format( argv[ 0 ] ) ) return ''' Cross validation parameters ''' split_count = 3 import crossValidationGenerator as cvg cvg.splitData( input_csv_filename, split_count ) Y_results = getY( input_csv_filename ) RF_predictions = [] P_predictions = [] KNN_predictions = [] for set_idx in range( split_count ): print( "\n{} Starting split {}:".format( str( datetime.now() ), set_idx + 1 ) ) train_filename = "train_split_{}.csv".format( set_idx ) test_filename = "test_split_{}.csv".format( set_idx ) # Read training data train_df = pd.read_csv( train_filename ) X = train_df.ix[:,0:-1] Y = train_df.ix[:,-1] # Read training data test_df = pd.read_csv( test_filename ) test_X = test_df.ix[:,0:-1] test_Y = test_df.ix[:,-1] ''' Random Forest ''' from sklearn.ensemble import RandomForestClassifier # Hyper Parameters n_estimators = 60 RF_classifier = RandomForestClassifier ( n_estimators = n_estimators ) print( "{} | Training Random Forest".format( str( datetime.now() ) ) ) RF_classifier.fit( X, Y ) RF_pred = RF_classifier.predict( test_X ) RF_predictions.extend( RF_pred ) print( "{} > Random forest completed for split {} with accuracy {}%\n".format( str( datetime.now() ), set_idx + 1, 100 * RF_classifier.score( test_X, test_Y ) ) ) ''' Perceptron ''' from sklearn.linear_model import Perceptron # Hyper Parameters alpha = 0.0001 n_iter = 20 P_classifier = Perceptron ( alpha = alpha, n_iter = n_iter ) print( "{} | Training Perceptron".format( str( datetime.now() ) ) ) P_classifier.fit( X, Y ) P_pred = P_classifier.predict( test_X ) P_predictions.extend( P_pred ) print( "{} > Perceptron completed for split {} with accuracy {}%\n".format( str( datetime.now() ), set_idx + 1, 100 * P_classifier.score( test_X, test_Y ) ) ) ''' K-NN ''' from sklearn.neighbors import KNeighborsClassifier # Hyper Parameters n_neighbors = 20 KNN_classifier = KNeighborsClassifier ( n_neighbors = n_neighbors ) print( "{} | Training KNN".format( str( datetime.now() ) ) ) KNN_classifier.fit( X, Y ) KNN_pred = KNN_classifier.predict( test_X ) KNN_predictions.extend( KNN_pred ) print( "{} > K-NN completed for split {} with accuracy {}%\n".format( str( datetime.now() ), set_idx + 1, 100 * KNN_classifier.score( test_X, test_Y ) ) ) # with open( output_csv_filename, 'w+' ) as output_stream: output_stream.write( "Random_Forest,Perceptron,KNN,Label\n" ) Y = [ y for y in Y ] ''' print( "len Y = {}", len( Y_results ) ) print( "len RF = {}", len( RF_predictions ) ) print( "len P = {}", len( P_predictions ) ) print( "len KNN = {}", len( KNN_predictions ) ) ''' for idx in range( len( RF_predictions ) ): with open( output_csv_filename, 'a' ) as output_stream: output_stream.write( ','.join( [ RF_predictions[ idx ], P_predictions[ idx ], KNN_predictions[ idx ], Y_results[ idx ] ] ) ) output_stream.write( '\n' ) print( "\n\nComplete at {}\n\n".format( str( datetime.now() ) ) )
[0.7, 0.8] ]) Y = np.array([1, 1, 1, 0]) h = 0.02 # create a mesh to plot in x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) fig = plt.figure() for e in range(1, 7): print '\nStarting epoch', e clf = Perceptron(n_iter=e, verbose=5).fit(X, Y) print clf.intercept_, clf.coef_ Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # fig.add_subplot(1, 5, e) plt.contourf(xx, yy, Z, cmap=plt.cm.Paired) # ax.contourf(xx, yy, Z, cmap=plt.cm.Paired) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) plt.title('Epoch %s' % e) if clf.score(X, Y) == 1: print 'converged in epoch', e break plt.show()
X_test = pd.read_csv('perceptron-test.csv', header=None) y = X_train[X_train.columns[0]] X_train = X_train.drop(X_train.columns[0], axis=1, inplace=False) print X_train clf = Perceptron(random_state=42) clf.fit(X_train, y) print clf.predict(X_train) # 0.34 y1 = X_test[X_test.columns[0]] X_test = X_test.drop(X_test.columns[0], axis=1, inplace=False) score = clf.score(X_test, y1) print score X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) clf = Perceptron(random_state=42) clf.fit(X_train_scaled, y) # 0.89 score_scaled = clf.score(X_test_scaled, y1) print score_scaled print (score_scaled - score)
#!/usr/bin/env python from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Perceptron import numpy as np from titanic import answer if __name__ == '__main__': train_data = np.genfromtxt('perceptron-train.csv', delimiter=',') test_data = np.genfromtxt('perceptron-test.csv', delimiter=',') X_train_data = features = train_data[:, 1:] Y_train_data = train_data[:, 0] X_test_data = features = test_data[:, 1:] Y_test_data = test_data[:, 0] scaler = StandardScaler() clf = Perceptron(random_state=241) clf.fit(X_train_data, Y_train_data) scores = clf.score(X_test_data, Y_test_data) print(scores.mean()) X_train_data_scaled = scaler.fit_transform(X_train_data) X_test_data_scaled = scaler.transform(X_test_data) clf.fit(X_train_data_scaled, Y_train_data) scaled_scores = clf.score(X_test_data_scaled, Y_test_data) print(scores.mean(), scaled_scores.mean()) answer(scaled_scores.mean() - scores.mean(), 'feature_normalization.txt')
# Getting more accuray without Normalization ppn = Perceptron(max_iter=100, eta0=0.01, random_state=0) ppn.fit(X_train_std, y_train) #This is training the model y_pred = ppn.predict(X_test_std) #Test/Validating the model #Printing of results and plot print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Accuracy for Perceptron: %.2f' % accuracy_score(y_test, y_pred)) print('Test Accuracy for Perceptron: %.2f' % ppn.score(X_test_std, y_test)) print('Train Accuracy for Tree: %.2f' % ppn.score(X_train_std, y_train)) # plot data def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): # setup marker generator and color map markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))]) # plot the decision surface x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
['Nominated Best Picture', 'Won Best Picture', 'Num of Awards'], [ 'genres', 'plot_keywords', 'movie_imdb_link', 'director_name', 'actor_3_facebook_likes', 'actor_2_name', 'actor_1_facebook_likes', 'actor_1_name', 'movie_title', 'cast_total_facebook_likes', 'actor_3_name', 'facenumber_in_poster', 'language', 'country', 'content_rating', 'budget', 'actor_2_facebook_likes', 'aspect_ratio' ], 'movies_original.csv') preprocessor.preprocess() preprocessor.numerify() # Create the test set: preprocessor.create_test_set(0.3, 0, True) # Perform cross-validation: clf = Perceptron() clf = clf.fit(preprocessor.features_numerical, preprocessor.labels_numerical[0]) """ scores = cross_validation.cross_val_score(clf, preprocessor.features_numerical, preprocessor.labels_numerical[2], cv=10) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) """ score = clf.score(preprocessor.test_features, preprocessor.test_labels) print("Accuracy after testing (no CV): %3.2f%%") % (score * 100)
plt.show() # %% # incarcarea datelor de antrenare X = np.loadtxt('./datalab14/3d-points/x_train.txt') y = np.loadtxt('./datalab14/3d-points/y_train.txt', 'int') plot3d_data(X, y) # incarcarea datelor de testare X_test = np.loadtxt('./datalab14/3d-points/x_test.txt') y_test = np.loadtxt('./datalab14/3d-points/y_test.txt', 'int') # %% perceptron_model.fit(X, y) print(perceptron_model.score(X, y)) print(perceptron_model.score(X_test, y_test)) W = perceptron_model.coef_ b = perceptron_model.intercept_ epochs = perceptron_model.n_iter_ print(W) print(b) plot3d_data_and_decision_function(X_test, y_test, W[0], b) # %% X = np.loadtxt('./datalab14/MNIST/train_images.txt') y = np.loadtxt('./datalab14/MNIST/train_labels.txt', 'int') # incarcarea datelor de testare X_test = np.loadtxt('./datalab14/MNIST/test_images.txt') y_test = np.loadtxt('./datalab14/MNIST/test_labels.txt', 'int')
y_pred_svc = svc.predict(x_test) svc_score = svc.score(x_train, y_train) print("Linear SVC Score:", svc_score, round(svc_score * 100, 2)) #KNN knn = KNeighborsClassifier(n_neighbors=5) knn.fit(x_train, y_train) y_pred_knn = knn.predict(x_test) knn_score = knn.score(x_train, y_train) print("KNN Score:", knn_score, round(knn_score * 100, 2)) #Perceptron pt = Perceptron(max_iter=5) pt.fit(x_train, y_train) y_pred_pt = pt.predict(x_test) pt_score = pt.score(x_train, y_train) print("Perceptron Score:", pt_score, round(pt_score * 100, 2)) #Decision Tree Classifier tree = DecisionTreeClassifier() tree.fit(x_train, y_train) y_pred_tree = tree.predict(x_test) tree_score = tree.score(x_train, y_train) print("Decision Tree Classifier Score:", tree_score, round(tree_score * 100, 2)) #GaussianNB nb = GaussianNB() nb.fit(x_train, y_train) y_pred_nb = nb.predict(x_test) nb_score = nb.score(x_train, y_train)
EPOCHS = 200 # Initialize perceptrons my_p = my_perceptron.MyPerceptron(LR, EPOCHS) sk_p = Perceptron(max_iter=EPOCHS, tol=1e-3) # MY DATA train = [[5, 5, 1], [6, 5, 0], [5, 5, 3], [1, 2, 1], [2, 2, 3], [0, 1, 2]] labels = [1, 1, 1, -1, -1, -1] my_p.fit(train, labels) sk_p.fit(train, labels) my_score = my_p.test(train, labels) sk_score = sk_p.score(train, labels) print("\nMy dataset:") print("My perceptron score: " + str(my_score)) print("Sklearn perceptron score: " + str(sk_score)) # IRIS DATA STUFF # Download iris dataset, define labels iris = load_iris() data = iris['data'] SETOSA = 0 VERSICOLOR = 1 VIRGINICA = 2
y_pred = clf.predict(X_test) acc = accuracy_score(y_test, y_pred) iteration_values.append(acc) print(i, acc) # Plot plt.plot(range(1, 30), iteration_values) plt.xlabel('max_iter') plt.ylabel('Accuracy') # In[88]: per_clf = Perceptron(max_iter=4, tol=None) per_clf.fit(X_train, y_train) y_pred = per_clf.predict(X_test) print('Score: %.2f%%' % (round(per_clf.score(X_test, y_test) * 100, 4))) print('Accuracy: %.2f' % (accuracy_score(y_test, y_pred))) # ### 11. Stochastic Gradient Decent (SGD) # In[89]: sgd_clf = SGDClassifier(max_iter=8, tol=None) sgd_clf.fit(X_train, y_train) y_pred = sgd_clf.predict(X_test) print('Score: %.2f%%' % (round(sgd_clf.score(X_test, y_test) * 100, 4))) print('Accuracy: %.2f' % (accuracy_score(y_test, y_pred))) # ### 12. Bagging # In[90]:
""" ... needs import np statement For example, when dealing with boolean features, x_i^n = x_i for all n and is therefore useless; but x_i x_j represents the conjunction of two booleans. This way, we can solve the XOR problem with a linear classifier: """ from sklearn.linear_model import Perceptron from sklearn.preprocessing import PolynomialFeatures import numpy as np X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) y = X[:, 0] ^ X[:, 1] X = PolynomialFeatures(interaction_only=True).fit_transform(X) print X # array([[1, 0, 0, 0], # [1, 0, 1, 0], # [1, 1, 0, 0], # [1, 1, 1, 1]]) clf = Perceptron(fit_intercept=False, n_iter=10, shuffle=False).fit(X, y) print clf.score(X, y)
return max_count_tp def predict(self, x_array): pred_y = np.ones((x_array.shape[0], 1)) for i in range(x_array.shape[0]): pred_y[i] = self.predict_point(x_array[i]) return pred_y.flatten() def score(self, x_te, y_te): pred_y = self.predict(x_te) return sum(pred_y == y_te) / len(y_te) clf = M_KNN(n_neighbor = 3, p = 2) clf.fit(x_tr, y_tr) clf.predict_point(np.array([6, 3])) clf.score(x_te, y_te) # draw it sns.lmplot(x='sepal length', y='sepal width', hue='label', data=df.loc[:100, :], fit_reg=False) plt.scatter(6, 3 , color='red', edgecolors='grey') plt.show() ### 3.2.1 iris K近邻 sklearn实例 from sklearn.neighbors import KNeighborsClassifier clf_sk = KNeighborsClassifier(p=2) clf_sk.fit(x_tr, y_tr) clf_sk.score(x_te, y_te)
def test_perceptron_accuracy(): for data in (X, X_csr): clf = Perceptron(n_iter=30, shuffle=False, seed=0) clf.fit(data, y) score = clf.score(data, y) assert_true(score >= 0.7)
from sklearn.linear_model import Perceptron import numpy as np X_train = np.array([[3, 3], [4, 3], [1, 1]]) y = np.array([1, 1, -1]) perceptron = Perceptron() perceptron.fit(X_train, y) print("w:", perceptron.coef_, "\n", "b:", perceptron.intercept_, "\n", "n_iter:", perceptron.n_iter_) res = perceptron.score(X_train, y) print("correct rate:{:.0%}".format(res)) # from sklearn.linear_model import Perceptron # from sklearn.linear_model import SGDClassifier # import numpy as np # # X_train = np.array([[3, 3], [4, 3], [1, 1]]) # y = np.array([1, 1, -1]) # #perceptron=Perceptron(penalty="l2",alpha=0.01,eta0=1,max_iter=50,tol=1e-3) # #perceptron=Perceptron() # perceptron=SGDClassifier(loss="perceptron",eta0=1, learning_rate="constant", penalty=None) # perceptron.fit(X_train,y) # print(perceptron.coef_) # print(perceptron.intercept_) # print(perceptron.n_iter_) # X=np.array([[2,2]]) # y=perceptron.predict(X)
save_sparse_vectors('/home/jack/NLP/csr.npz', vectors, labels) save_sparse_vectors('/home/jack/NLP/dev_csr.npz', dev_vectors, dev_labels) return (vectors, labels, dev_vectors, dev_labels) if LOAD_FROM_FILE: try: vectors, labels = load_sparse_vectors(TRAINING_VECTORS_PATH) dev_vectors, dev_labels = load_sparse_vectors(DEV_VECTORS_PATH) except Exception as e: print 'Failed to load from File calculating feature vectors' vectors, labels, dev_vectors, dev_labels = calculate_vectors(TRAINING_PATH, DEV_PATH) else: 'Computing feature vectors' vectors, labels, dev_vectors, dev_labels = calculate_vectors(TRAINING_PATH, DEV_PATH) #random_state gives the seeed, none seams to always give the same result perceptron = Perceptron(shuffle=True, n_iter=5, random_state=1000) perceptron = perceptron.fit(vectors, labels) predictions = perceptron.predict(dev_vectors) score = perceptron.score(dev_vectors, dev_labels) print score print confusion_matrix(loaded_dev_labels, predictions, labels=['entailment', 'contradiction', 'neutral']) print classification_report(loaded_dev_labels, predictions,labels=['entailment', 'contradiction', 'neutral'])
data = pandas.read_csv('perceptron-train.csv', header=None) train, test = Bunch(), Bunch() train.data, train.target = data.loc[:, 1:], data.loc[:, 0] data = pandas.read_csv('perceptron-test.csv', header=None) test.data, test.target = data.loc[:, 1:], data.loc[:, 0] # 2. Обучите персептрон со стандартными параметрами и random_state=241 perc = Perceptron(random_state=241) perc.fit(train.data, train.target) # learning # 3. Подсчитайте качество (долю правильно классифицированных объ- # ектов, accuracy) полученного классификатора на тестовой выборке. accuracy = perc.score(test.data, test.target) # predicting print accuracy # 4. Нормализуйте обучающую и тестовую выборку с помощью класса # StandardScaler. scaler = StandardScaler() # scaling train_scaled, test_scaled = Bunch(), Bunch() train_scaled.data = scaler.fit_transform(train.data) test_scaled.data = scaler.transform(test.data) train_scaled.target, test_scaled.target = train.target, test.target # 5. Обучите персептрон на новых выборках. Найдите долю правиль- # ных ответов на тестовой выборке. perc.fit(train_scaled.data, train_scaled.target)
import numpy as np from sklearn.linear_model import Perceptron from sklearn.preprocessing import StandardScaler import pandas as pd data_test = pd.read_csv('C:/temp/machine learning/courseraYa/perceptron-test.csv', header=0) data_train = pd.read_csv('C:/temp/machine learning/courseraYa/perceptron-train.csv', header=0) y_train = data_train.iloc[:,0] #classes / target values X_train = data_train.iloc[:,1:] #feaches y_test = data_test.iloc[:,0] #classes / target values X_test = data_test.iloc[:,1:] #feaches clf = Perceptron(random_state=241, shuffle = True) clf.fit(X_train, y_train) #predictions = clf.predict(X_test) acur = clf.score(X_test,y_test) print(acur) scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) clf_scaled = Perceptron(random_state=241, shuffle = True) clf_scaled.fit(X_train_scaled, y_train) #predictions = clf.predict(X_test) acur_scaled = clf_scaled.score(X_test_scaled,y_test) print(acur_scaled)
from sklearn.datasets import load_digits from sklearn.linear_model import Perceptron import numpy as np X, Y = load_digits(return_X_y=True) clf = Perceptron(tol=1e-3, random_state=0) clf.fit(X, Y) print(clf.score(X, Y)) print(X.shape) print(X.ndim)
print "Reading files to make Training Dataset " start_time = time.time() traverse_over_files(str(testing_directory)) end_time = time.time() - start_time print "It took "+ str(end_time) + " to make the Training Dataset" print "Training Dataset completed" print '\nTraining data' start_time = time.time() perceptron_classifier = Perceptron() perceptron_classifier.fit(final_training_dataset_keys, final_training_dataset_values) end_time = time.time() - start_time print "It took "+ str(end_time) + " to train the classifiers" print 'Training Completed' print '\nTesting data ' start_time = time.time() # Calculating Accuracy perceptron_classifier_accuracy = perceptron_classifier.score(final_testing_dataset_keys, final_testing_dataset_values) end_time = time.time() - start_time print "It took "+ str(end_time) + " to test the data " print 'Testing Completed' # print '\nprinting Accuracy' print "\nCase "+str(testing_directory)+": Testing folder is part"+str(testing_directory) print "-------------------------------------------------" print "Perceptron accuracy : "+ str(perceptron_classifier_accuracy) # print 'Training Size:'+str(len(final_training_dataset_keys))+' and Testing size = '+str(len(final_testing_dataset_keys))
print(RF_train_score) end = time.process_time() print("total time taken Random Forest Search: {} min".format( (end - start) / 60)) # Perceptron print("=== Perceptron===") start = time.process_time() per_clf = Perceptron(penalty='l1', verbose=1) per_clf.fit(X_train, y_train) print(per_clf.predict(X_test[[332]])) print(y_test[332]) per_test_score = per_clf.score(X_test, y_test) print(per_test_score) per_train_score = per_clf.score(X_train, y_train) print(per_train_score) end = time.process_time() print("total time taken for Perceptron: {} min".format((end - start) / 60)) # visualization import matplotlib.pyplot as plt N = 6 train_acc = [ RF_train_score, svm_linear_train_score, per_train_score, NB_train_score,
knn=KNeighborsClassifier(n_neighbors=3) knn.fit(X_train,Y_train) Y_pred=knn.predict(X_test) acc_knn=round(knn.score(X_train,Y_train)*100,2) #print(acc_knn) gaussian=GaussianNB() gaussian.fit(X_train,Y_train) Y_pred=gaussian.predict(X_test) acc_gaussian=round(gaussian.score(X_train,Y_train)*100,2) #print(acc_gaussian) perceptron=Perceptron() perceptron.fit(X_train,Y_train) Y_pred=perceptron.predict(X_test) acc_perceptron=round(perceptron.score(X_train,Y_train)*100,2) #print(acc_perceptron) linear_svc=LinearSVC() linear_svc.fit(X_train,Y_train) Y_pred=linear_svc.predict(X_test) acc_linear_svc=round(linear_svc.score(X_train,Y_train)*100,2) #print(acc_linear_svc) sgd=SGDClassifier() sgd.fit(X_train,Y_train) Y_pred=sgd.predict(X_test) acc_sgd=round(sgd.score(X_train,Y_train)*100,2) #print(acc_sgd) decision_tree=DecisionTreeClassifier()
from sklearn.metrics import accuracy_score x_std = [] x = [] for _ in range(100): # Classify test samples ppn = Perceptron(max_iter=1000, eta0=0.001, random_state=np.random) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) # Treinando sem normalizar ppn_z_out = Perceptron(max_iter=1000, eta0=0.001, random_state=np.random) ppn_z_out.fit(X_train, y_train) y_pred_z_out = ppn_z_out.predict(X_test) x_std.append(ppn.score(X_test_std, y_test)) x.append(ppn.score(X_test, y_test)) np.array(x_std) np.array(x) print('Normalizado Media: ') # Measuring the accuracy in 3 different ways print('\nClassificador normalizado') print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) print('Accuracy: %.2f' % ppn.score(X_test_std, y_test)) print('\nClassificador sem z-score') print('Misclassified samples: %d' % (y_test != y_pred_z_out).sum())
perc.fit(pca_x_train, y_train) y_pred7 = perc.predict(pca_x_test) m7_acc = accuracy_score(y_test, y_pred6) m7_acc # Random forest with PCA reduction rf.fit(pca_x_train, y_train) y_pred6 = rf.predict(pca_x_test) m8_acc = accuracy_score(y_test, y_pred6) m8_acc """Tuning hyperparameters of some models.""" from sklearn.linear_model import Perceptron perceptron = Perceptron(max_iter=1000, eta0=0.01) perceptron.fit(x_train, y_train) perceptron.score(x_test, y_test) from sklearn.metrics import classification_report y_pred = perceptron.predict(x_test) print(classification_report(y_test, y_pred)) from sklearn.svm import SVC svc = SVC(C=0.8, gamma='auto') svc.fit(x_train, y_train) svc.score(x_test, y_test) y_pred = svc.predict(x_test) print(classification_report(y_test, y_pred)) """Group Members: \ Chirag (B19CSE026) Gautam Kumar (B19EE031)
plt.scatter(X_train[50:100, 0], X_train[50:100, 1], color='blue', marker='x', label='versicolor') plt.xlabel('sepal length') plt.ylabel('petal length') plt.legend(loc='upper left') plt.show() pn = Perceptron(max_iter=10, eta0=0.1, random_state=0) pn.fit(X_train, y_train) print((y_train != pn.predict(X_train)).sum()) print("Error :", (y_train != pn.predict(X_train)).sum()) print("SCORE :", pn.score(X_train, y_train)) ''' plt.plot(range(1, len(pn.errors) + 1), pn.errors, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of misclassifications') plt.show() ''' plot_decision_regions(X_train, y_train, classifier=pn) plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.legend(loc='upper left') plt.show() from sklearn.metrics import confusion_matrix
from sklearn.linear_model import Perceptron import matplotlib.pyplot as plt import numpy as np from itertools import product data = [[0, 0], [0, 1], [1, 0], [1, 1]] labels = [0, 1, 1, 1] plt.scatter([point[0] for point in data], [point[1] for point in data], c=labels) plt.show() plt.clf() classifier = Perceptron(max_iter=40) classifier.fit(data, labels) print(classifier.score(data, labels)) print(classifier.decision_function([[0, 0], [1, 1], [0.5, 0.5]])) x_values = np.linspace(0, 1, 100) print(len(x_values)) y_values = np.linspace(0, 1, 100) print(len(y_values)) point_grid = list(product(x_values, y_values)) print(len(point_grid)) distances = classifier.decision_function(point_grid) print(len(distances)) abs_distances = [abs(i) for i in distances] print(len(abs_distances)) distances_matrix = np.reshape(abs_distances, (100, 100))
from sklearn.linear_model import Perceptron # In[13]: dataset_1 = np.loadtxt('sampleQuadData2.txt') (numSamples_1, numFeatures_1) = dataset_1.shape feat_1 = dataset_1[:,range(numFeatures_1-1)].reshape((numSamples_1, numFeatures_1-1)) output_1 = dataset_1[:, numFeatures_1-1].reshape((numSamples_1,)) (numSamples_1, numFeatures_1) = feat_1.shape perceptron_1 = Perceptron(fit_intercept=False) perceptron_1.fit(feat_1,output_1) perceptron_1.score(feat_1,output_1) # In[14]: dataset_2 = np.loadtxt('sampleQuadData2Transformed.txt') (numSamples_2, numFeatures_2) = dataset_2.shape feat_2 = dataset_2[:,range(numFeatures_2-1)].reshape((numSamples_2, numFeatures_2-1)) output_2 = dataset_2[:, numFeatures_2-1].reshape((numSamples_2,)) perceptron_2 = Perceptron(fit_intercept=False) perceptron_2.fit(feat_2,output_2) perceptron_2.score(feat_2,output_2)
def do_prob1_a(data_X, data_y, N_in, N_tol=10**6, N_out=10**5, exp_num=1000): """ Finishes the problem1 part(a). :param data_X: features of data :param data_y: real values of data :param N_in: total number of in-sample data points :param N_tol: number of total data points :param N_out: number of out-of-sample data points :param exp_num: number of experiments """ # Part(i): compute theoretical generalization bound values as a function of delta. tolerance_seq = np.arange(0.01, 0.501, 0.01) vc_num = 4 t_gb_values = sqrt(8 * (log(4) + vc_num * log(2 * N_in) - log(tolerance_seq)) / N_in) print("t_gb_values", t_gb_values) # Part(ii): extract D_out with N_out samples and the first d (d_vc-1) features. d_num = vc_num - 1 in_X, out_X, in_y, out_y = train_test_split(data_X, data_y, test_size=N_out, random_state=660) out_X = out_X[:, 0:d_num] # Part(iii) and Part(iv): E_in_results = np.zeros(exp_num, dtype='float') E_out_results = np.zeros(exp_num, dtype='float') fold_num = int(N_tol / N_in) for exp_i in range(exp_num): stfied_k_fold = KFold(n_splits=fold_num, shuffle=True, random_state=exp_i) np.random.seed(exp_i) target_index, index = np.random.randint(fold_num), 0 for _, test_index in stfied_k_fold.split(X=data_X, y=data_y): if index == 1: in_bag_X = data_X[test_index] in_bag_y = data_y[test_index] break index += 1 in_bag_X = in_bag_X[:, 0:d_num] perceptron_model = Perceptron(random_state=660) perceptron_model.fit(X=in_bag_X, y=in_bag_y) E_in_results[exp_i] = 1 - perceptron_model.score(X=in_bag_X, y=in_bag_y) E_out_results[exp_i] = 1 - perceptron_model.score(X=out_X, y=out_y) # Part(v): diff_results = np.absolute(E_out_results - E_in_results) diff_results.sort() max_values = np.zeros(t_gb_values.shape[0], dtype='float') for tolerance_i in range(tolerance_seq.shape[0]): tolerance_value = tolerance_seq[tolerance_i] position = int((1 - tolerance_value) * exp_num) max_values[tolerance_i] = diff_results[position - 1] plt.figure(0) plt.subplot(211) # rows, columns, th_plot plt.plot(tolerance_seq, t_gb_values, 'b') plt.ylabel('Generalization bounds') plt.xlabel('Tolerance values') plt.title(r'The plot for the prob1_a_i when $N_i$$_n$=' + str(N_in)) plt.subplot(212) plt.plot(tolerance_seq, max_values, 'b') plt.ylabel('Max values') plt.xlabel('Tolerance values') plt.title(r'The plot for the prob1_a_v when $N_i$$_n$=' + str(N_in)) plt.tight_layout() plt.savefig('problem1_a_Nin' + str(N_in) + '.png') plt.show()
# output: # [0 0 1 0 1 1 1 1 0 0 0 1 1 1 0 1 0 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 1 0 0 1 1 # 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0] print("predictions_val shape: {}".format(predictions_val.shape)) # (57,) print("predictions_val unique values: {}".format( np.unique(predictions_val))) # [0 1] # Check our accuracy: Add up the number of classifications we got wrong # If classification was correct, then give score of 1; else 0 scores_val = np.where(predictions_val == y_val, 1, 0) mean_accuracy_val = np.mean(scores_val) # This will be different every time depending on how data is split in random permutation print("mean accuracy on validation set: {}".format(mean_accuracy_val)) # We can also use scikit-learn's built-in function; it does the same thing! mean_accuracy_val = model.score(x_val, y_val) # Typically, we'd then use the results of validaton to tweak hyperparameters and repeat """ Evaluate on testing set """ # Predict the class/labels predictions_test = model.predict(x_test) # Check accuracy scores_test = np.where(predictions_test == y_test, 1, 0) mean_accuracy_test = np.mean(scores_test) # or simply: mean_accuracy_test = model.score(x_test, y_test) print("mean accuracy on test set: {}".format(mean_accuracy_test))
sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(max_iter=40, alpha=0.1, random_state=1) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Misclassification Error: ', 5 / 45, ' Accuracy is: ', (1 - (5 / 45))) print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) print('Accuracy score: %.2f' % ppn.score(X_test_std, y_test)) # def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): # markers = ('s', 'x', 'o', '^', 'v') # colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') # cmap = ListedColormap(colors[:len(np.unique(y))]) # x1_min, x1_max = X[:, 0].min() - 1, X[:,0].max() + 1 # x2_min, x2_max = X[:, 1].min() - 1, X[:,1].max() + 1 # xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), # np.arange(x2_min, x2_max, resolution)) # z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) # z = z.reshape(xx1.shape) # plt.contourf(xx1,xx2, z, alpha=0.3, cmap=cmap) # plt.xlim(xx1.min(), xx1.max())
def classify(title, train, test, train_labels, test_labels): classifier = Perceptron() classifier.fit(train, train_labels) print("{} {}".format(title,classifier.score(test, test_labels)))