class ClassificationPLA(ClassficationBase.ClassificationBase): def __init__(self, isTrain, isOutlierRemoval=0): super(ClassificationPLA, self).__init__(isTrain, isOutlierRemoval) # data preprocessing self.dataPreprocessing() # PLA object self.clf = Perceptron() def dataPreprocessing(self): # deal with unbalanced data self.dealingUnbalancedData() # Standardization #self.Standardization() def training(self): # train the K Nearest Neighbors model self.clf.fit(self.X_train, self.y_train.ravel()) def predict(self): # predict the test data self.y_pred = self.clf.predict(self.X_test) # print the error rate self.y_pred = self.y_pred.reshape((self.y_pred.shape[0], 1)) err = 1 - np.sum(self.y_test == self.y_pred) * 1.0 / self.y_pred.shape[0] print "Error rate: {}".format(err)
def PERCEPTRON(data_train, data_train_vectors, data_test_vectors, **kwargs): # Implementing classification model- using Perceptron clf_p = Perceptron() clf_p.fit(data_train_vectors, data_train.target) y_pred = clf_p.predict(data_test_vectors) return y_pred
def run(self): """ Пуск задачи """ train_data = pd.read_csv(self.param.get('train')) test_data = pd.read_csv(self.param.get('test')) X_train = train_data[['1', '2']] y_train = train_data['0'] X_test = test_data[['1', '2']] y_test = test_data['0'] if self.param.get('scale') is True: scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) perceptron = Perceptron(random_state=241) perceptron.fit(X_train, y_train) predictions = perceptron.predict(X_test) accuracy = accuracy_score(y_test, predictions) with self.output().open('w') as output: output.write(str(accuracy))
def Perceptron_1(train_predictors,test_predictors,train_target,test_target): clf = Perceptron() clf.fit(train_predictors,train_target) predicted = clf.predict(test_predictors) accuracy = accuracy_score(test_target, predicted) print "Accuracy for Linear Model Perceptron: "+str(accuracy) return accuracy,predicted
def percep(X_tr, y_tr, X_te): clf = Perceptron(n_iter = 1000) X_tr_aug = add_dummy_feature(X_tr) X_te_aug = add_dummy_feature(X_te) clf.fit(X_tr_aug, y_tr) y_pred = clf.predict(X_te_aug) return y_pred
def t(): # 1 from pandas import read_csv df = read_csv('w2/perceptron-train.csv', header=None) dt = read_csv('w2/perceptron-test.csv', header=None) yf = df[0] xf = df.drop([0], axis=1) # print(yf, xf) yt = dt[0] xt = dt.drop([0], axis=1) # print(yt, xt) # 2 from sklearn.linear_model import Perceptron clf = Perceptron(random_state=241) clf.fit(xf, yf) af1 = clf.score(xf, yf) at1 = clf.score(xt, yt) rf = clf.predict(xf) rt = clf.predict(xt) # print(list(yf)) # print(pf) # print(list(yt)) # print(pt) # 3 from sklearn.metrics import accuracy_score af = accuracy_score(yf, rf) at = accuracy_score(yt, rt) print(af, at) print(af1, at1) # 4 from sklearn.preprocessing import StandardScaler scaler = StandardScaler() xfs = scaler.fit_transform(xf) xts = scaler.transform(xt) clf.fit(xfs, yf) afs1 = clf.score(xfs, yf) ats1 = clf.score(xts, yt) pfs = clf.predict(xfs) pts = clf.predict(xts) afs = accuracy_score(yf, pfs) ats = accuracy_score(yt, pts) print(afs, ats) print(afs1, ats1) pf('5', round(ats - at, 3))
def main(): iris = load_iris() X = iris.data[:, (2, 3)] # 花弁の長さ、花弁の幅 y = (iris.target == 0.).astype(np.int32) perceptron_classifier = Perceptron(random_state=42) perceptron_classifier.fit(X, y) y_prediction = perceptron_classifier.predict([[2, 0.5]]) print(y_prediction)
def classify_perceptron(): print "perceptron" (X_train, y_train), (X_test, y_test) = util.load_all_feat() print "original X_train shape", X_train.shape clf = Perceptron() clf.fit(X_train, y_train) pred = clf.predict(X_test) print "accuracy score:", accuracy_score(y_test, pred)
class PerceptronModel(BaseModel): def __init__(self, cached_features): BaseModel.__init__(self, cached_features) self.model = Perceptron(penalty="l2", random_state=1) def _predict_internal(self, X_test): return self.model.predict(X_test)
def solve(train_set_x, train_set_y, test_set_x, test_set_y): clf = Perceptron(random_state=241) clf.fit(X=train_set_x, y=train_set_y) prediction = clf.predict(test_set_x) accuracy = accuracy_score(test_set_y, prediction) return accuracy
def t1(): from sklearn.linear_model import Perceptron X = np.array([[1, 2], [3, 4], [5, 6]]) y = np.array([0, 1, 0]) clf = Perceptron() clf.fit(X, y) predictions = clf.predict(X) print(predictions)
def perceptron_classifier(data_train, data_test): # Load train and test data sets X_train = data_train.iloc[:, 1:].values y_train = data_train.iloc[:, :1].values.ravel() X_test = data_test.iloc[:, 1:].values y_test = data_test.iloc[:, :1].values.ravel() # Init Perceptron clf = Perceptron(random_state=241) # --- Perceptron w/o normalization of Training Data Set --- # Fit Perceptron linear model using training data clf.fit(X_train, y_train) # Use the model to predict test data y_test_prediction = clf.predict(X_test) # Calculate accuracy: accuracy_notnorm = metrics.accuracy_score(y_test, y_test_prediction) # --- Perceptron w/ normalization of Training Data Set --- # feature scaling (standardization/normalization) scaler = preprocessing.StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Fit Perceptron using Training Set and predict results for tTest Set clf.fit(X_train_scaled, y_train) y_test_prediction = clf.predict(X_test_scaled) accuracy_norm = metrics.accuracy_score(y_test, y_test_prediction) # Note [FEATURE SCALING]: # You MUST use fit_transform() over Training Set only. # The scaler will compute necessary statistics like std_dev and mean [aka 'fit'] # and normalize Training Set [aka 'transform'] # But for the Test Set you must not fit the scaler again! # Just re-use existing statistics and normalize the Test Set using transform() w/o fitting. print('Accuracy (non-normalized):', accuracy_notnorm) print('Accuracy (normalized):', accuracy_norm) diff = accuracy_norm - accuracy_notnorm print('Diff:', diff) return diff
def get_accuracy(_data_train_features, _data_train_labels, _data_test_features, _data_test_labels): # Обучите персептрон со стандартными параметрами и random_state=241. clf = Perceptron(random_state=241, shuffle=True) clf.fit(_data_train_features, numpy.ravel(_data_train_labels)) # Подсчитайте качество (долю правильно классифицированных объектов, accuracy) # полученного классификатора на тестовой выборке. predictions = clf.predict(_data_test_features) score = accuracy_score(_data_test_labels, predictions) return score
def neural_net(train, test): y = [] xTrain, yTrain = loadData(train) xTest, yTest = loadData(test) nN = Perceptron() nN.fit(xTrain, yTrain) y = nN.predict(xTest) testError = 1 - nN.score(xTest, yTest) print 'Test error: ' , testError return y
def test(): X = np.array([[1, 2], [3, 4], [5, 6]]) y = np.array([0, 1, 0]) clf = Perceptron() clf.fit(X, y) predictions = clf.predict(X) print("Predictions: %s" % predictions) print("Accuracy: %s" % accuracy_score(y, predictions))
def neural_net(): Xtrain,ytrain,Xtest,ytest = getSplitData() Xtrain, Xtest = getScaledData(Xtrain, Xtest) ntest = Xtest.shape[0] #Your code here clf = Perceptron() clf.fit(Xtrain, ytrain) yPredict = clf.predict(Xtest) #print "parameter: n_neighbors = ",n print "neural_net classification accuracy: ", accuracy_score(ytest,yPredict)
def neural_net(train, test): y = [] trainY, trainX = loadData(train) testY, testX = loadData(test) neuralNet = Perceptron() neuralNet.fit(trainX, trainY) y = neuralNet.predict(testX) testError = 1 - neuralNet.score(testX, testY) print 'Test error: ' + str(testError) return y
def main(): start = time.time() print "Reading train data and its features from: " + train_file data = cu.get_dataframe(train_file) global fea fea = features.extract_features(feature_names,data) percep = Perceptron(penalty=None, alpha=0.0001, fit_intercept=False, n_iter=5, shuffle=False, verbose=1, eta0=1.0, n_jobs=-1, seed=0, class_weight="auto", warm_start=False) X = [] for i in data["OwnerUndeletedAnswerCountAtPostTime"]: X.append([i]) # Must be array type object. Strings must be converted to # to integer values, otherwise fit method raises ValueError global y y = [] print "Collecting statuses" for element in data["OpenStatus"]: for index, status in enumerate(ques_status): if element == status: y.append(index) print "Fitting" percep.fit(fea, y) '''Make sure you have the up to date version of sklearn; v0.12 has the predict_proba method; http://scikit-learn.org/0.11/install.html ''' print "Reading test data and features" test_data = cu.get_dataframe(test_file) test_fea = features.extract_features(feature_names,test_data) print "Making predictions" global probs #probs = percep.predict_proba(test_fea) # only available for binary classification probs = percep.predict(test_fea) # shape of probs is [n_samples] # convert probs to shape [n_samples,n_classes] probs = np.resize(probs, (len(probs) / 5, 5)) #if is_full_train_set == 0: # print("Calculating priors and updating posteriors") # new_priors = cu.get_priors(full_train_file) # old_priors = cu.get_priors(train_file) # probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001) print "writing submission to " + submission_file cu.write_submission(submission_file, probs) finish = time.time() print "completed in %0.4f seconds" % (finish-start)
def __test_perceptron(self, normalized): clf = Perceptron() X_train = self.train_data.iloc[:, 1:] y_train = self.train_data.iloc[:, 0] X_test = self.test_data.iloc[:, 1:] y_test = self.test_data.iloc[:, 0] if normalized: scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) clf.fit(X_train, y_train) predictions = clf.predict(X_test) return accuracy_score(y_test, predictions)
class learn_by_perceptron: def __init__(self, X=None, Y=None, path=r"..\..\per_dump.pkl", penalty='l1', alpha=0.00001, fit=True): if X is None or Y is None: self.clf = joblib.load(path) else: self.clf = Perceptron(penalty=penalty, alpha=alpha, n_jobs=6, class_weight='auto', shuffle=True) if fit: self.clf.fit(X, Y) self.dump(path) def predict(self, X): return self.clf.predict(X) def cross_val(self, X, Y, n, cpus=6): return cross_validation.cross_val_score(self.clf, X, Y, cv=n, n_jobs=cpus, scoring='f1') def dump(self, path=r"..\..\svm_dump.pkl"): joblib.dump(self.clf, path)
def perecptronClassification(): from sklearn.datasets import fetch_20newsgroups from sklearn.metrics.metrics import f1_score, classification_report from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import Perceptron categories = ['rec.sport.hockey','rec.sport.baseball','rec.autos'] newsgroups_train = fetch_20newsgroups(subset='train',categories=categories,remove=('headers','footers','quotes')) newsgroups_test = fetch_20newsgroups(subset='test',categories=categories,remove=('headers','footers','quotes')) vectorizer = TfidfVectorizer() X_train = vectorizer.fit_transform(newsgroups_train.data) X_test = vectorizer.transform(newsgroups_test.data) classifier = Perceptron(n_iter=100,eta0=0.1) classifier.fit(X_train,newsgroups_train.target) predictions = classifier.predict(X_test) print classification_report(newsgroups_test.target,predictions)
def main(): #import data iris = datasets.load_iris() X = iris.data[:,[2,3]] y = iris.target #cross_validation X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) #print (X_train) #standardize the feature sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) #trainning model ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) joblib.dump(ppn, 'ppn.pkl') #predict y_pred = ppn.predict(X_test_std); print("Misclassified samples: %d" %(y_test != y_pred).sum()) #Accuracy print(y_test) print(y_pred) print("Accuracy: %.2f" % accuracy_score(y_test, y_pred)) # X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105,150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show()
def thePerceptron( irisData ): print("\n####################") print("thePerceptron():\n") ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### X = irisData.data[:,(2,3)] y = (irisData.target == 0).astype(np.int) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### myPerceptron = Perceptron(random_state=1234567) myPerceptron.fit(X,y) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### y_predicted = myPerceptron.predict([[2,0.5]]) print( "y_predicted = " + str(y_predicted) ) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### print("\nexiting: thePerceptron()") print("####################") return( None )
def train(a,sizel,intercept): d = a.copy() pes = Perceptron(n_jobs=4,n_iter=500,fit_intercept=intercept) # d = d.tolist() train = d[:len(d)/sizel] C = d[len(d)/sizel:] train_res = numpy.zeros(shape=(len(train)))#[0.0 for i in range(len(train))] C_res = numpy.zeros(shape=(len(C)))#[0.0 for i in range(len(C))] # C = [0.0 for i in range(len(C))] class_index = len(d[0])-1 for i in range(len(train)): train_res[i] = (train[i][class_index] > 1)# and train[i][class_index] < 16) train[i][class_index] = 0 C_res[i] = (C[i][class_index]> 1)# and C[i][class_index] < 16) C[i][class_index] = 0 pes.fit(train,train_res) output = pes.predict(C) (falsepr, truepr, thr) = roc_curve(C_res, output, 1) area = auc(falsepr, truepr) output = pes.score(C,C_res) return (output, area)
def run(): iris = datasets.load_iris() X = iris.data[:,[2,3]] y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler(); sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print ('Missclassified samples = %d' % (y_test != y_pred).sum()) print('Accuracy = %.2f' % accuracy_score(y_test, y_pred)) print(y_pred)
def predict_with_perceptron(): ''' # 用神经元训练 :return: ''' ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) # 预测 y_pred = ppn.predict(X_test_std) # 与真实值对比 print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) # 绘图 X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show()
def Perceptron_Parejas(pareja1, pareja2): datos,etiquetas,tamanio=CargarBaseDatos("iris") X_train,y_train,X_test,y_test=particionarDatos(tamanio,0.7, datos, etiquetas) X= X_train[:,[pareja1,pareja2]] Y= X_test[:,[pareja1,pareja2]] tamTrain=len(X_train) prc = Perceptron().fit(X,y_train) coef = prc.coef_ intercept = prc.intercept_ color = "rgb" for i in range(0,tamTrain,1): if(y_train[i]==0): pl.scatter(X_train[i,pareja1], X_train[i,pareja2], color=color[y_train[i]]) elif(y_train[i]==1): pl.scatter(X_train[i,pareja1], X_train[i,pareja2], color=color[y_train[i]]) elif(y_train[i]==2): pl.scatter(X_train[i,pareja1], X_train[i,pareja2], color=color[y_train[i]]) pl.axis('tight') xmin, xmax = pl.xlim() ymin, ymax = pl.ylim() for i in range(0,3,1): pl.plot([xmin, xmax], [((-(xmin * coef[i, 0]) - intercept[i]) / coef[i, 1]), ((-(xmax * coef[i, 0]) - intercept[i]) / coef[i, 1])],ls="--", color=color[i]) pl.show() y_ = prc.predict(Y) accuracy = accuracy_score(y_test, y_) recall = recall_score(y_test, y_, average=None) precision = precision_score(y_test, y_, average=None) print "accuracy: "+str(accuracy) print "recall: "+str(recall) print "precision por clase: "+str(precision)
def classify_data(X,Y, args, holdout = 0.5): X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=holdout, random_state=np.random.RandomState()) train_data = np.array(map(lambda x: x.flatten(), X_train)) test_data = np.array(map(lambda x: x.flatten(), X_test)) model = None if (args.initial == "perceptron"): model = Perceptron() elif(args.initial == "svm"): model = SVC(kernel="poly") elif (args.initial == "GMM"): model = GMM(n_components=args.K) else: raise("Model Not Supported.") model.fit(train_data, Y_train) Y_pred = model.predict(test_data) labels = set(Y_pred) k = len(labels) labels = list(labels) clusters = [] for i in range(k): clusters.append([]) for x,y,t in zip(X_test, Y_pred, Y_test): clusters[labels.index(y)].append(Instance(x,t)) return clusters
from sklearn.metrics import accuracy_score def generate(count): x = []s y = [] for ir in range(0, count): math = np.random.randint(1, 6) physics = np.random.randint(1, 6) russian = np.random.randint(1, 6) disabled = np.random.randint(0, 2) x.append([math, physics, russian, disabled]) y.append(1 if (disabled == 1 and math >= 3 and physics >= 3 and russian>=3) or (math >= 4 and physics >= 4 and math + physics + russian >= 11) else 0) return np.array(x), np.array(y) if __name__ == '__main__': np.random.seed(42) X, y = generate(400) X_test, y_test = generate(50) perceptron = Perceptron(tol=1e-7) perceptron.fit(X, y) predict = perceptron.predict(X_test) print(accuracy_score(predict, y_test)) print(perceptron.predict([[3,3,3,1]])) #prints 1 print(perceptron.predict([[5,5,5,0]])) #prints 1 print(perceptron.predict([[4,4,3,0]])) #prints 1 print(perceptron.predict([[3,4,3,0]])) #prints 0 print(perceptron.predict([[3,4,2,1]])) #prints 0
clf_percept = clf_percept.fit(X, Y) clf_KNN = clf_KNN.fit(X, Y) #Test models on the same training set to find the training accuracy # Decision Trees clf_tree_prediction = clf_tree.predict(X) acc_tree = accuracy_score(Y, clf_tree_prediction) * 100 print("Accuracy using Decision Trees:"), acc_tree, "%" #SVM clf_svm_prediction = clf_svm.predict(X) acc_svm = accuracy_score(Y, clf_svm_prediction) * 100 print("Labels for training set using SVM:'"), acc_svm, "%" #Perceptron clf_percept_prediction = clf_percept.predict(X) acc_per = accuracy_score(Y, clf_percept_prediction) * 100 print("Labels for training set using Perceptron:"), acc_per, "%" #KNN distances, indices = clf_KNN.kneighbors(X) new_label = indices[:, 0] clf_KNN_prediction = [Y[i][:] for i in new_label] acc_knn = accuracy_score(Y, clf_KNN_prediction) * 100 print("Labels for training set using K-nearst neighbour:"), acc_knn, "%" #All accuracies acc_all = [acc_tree, acc_svm, acc_per, acc_knn] #Chosing the best among all score_bestmethod = np.max(acc_all)
#testX =pd.read_csv("data/perceptron-test.csv") Xtrain = pd.read_csv('data/perceptron-train.csv', header=None, usecols=np.arange(1, 3)) ytrain = pd.read_csv('data/perceptron-train.csv', header=None, usecols=[0]) Xtest = pd.read_csv('data/perceptron-test.csv', header=None, usecols=np.arange(1, 3)) ytest = pd.read_csv('data/perceptron-test.csv', header=None, usecols=[0]) clf = Perceptron(random_state=241) clf.fit(Xtrain, ytrain.values.ravel()) predictions = clf.predict(Xtest) accuracy = accuracy_score(ytest, predictions) print(accuracy) print(classification_report(clf.predict(Xtest), ytest)) """ scaler = StandardScaler() X_train_scaled = scaler.fit_transform(Xtrain) X_test_scaled = scaler.transform(Xtest) clf.fit(X_train_scaled, ytrain.values.ravel()) predictions_scaled = clf.predict(X_test_scaled) accuracy_scaled = accuracy_score(ytest,predictions_scaled) print(accuracy_scaled)
clf_svm = SVC() clf_perceptron = Perceptron() clf_KNN = KNeighborsClassifier() # Training the models clf_tree.fit(X, Y) clf_svm.fit(X, Y) clf_perceptron.fit(X, Y) clf_KNN.fit(X, Y) # Testing using the same data pred_tree = clf_tree.predict(X) acc_tree = accuracy_score(Y, pred_tree) * 100 print('Accuracy for DecisionTree: {}'.format(acc_tree)) pred_svm = clf_svm.predict(X) acc_svm = accuracy_score(Y, pred_svm) * 100 print('Accuracy for SVM: {}'.format(acc_svm)) pred_per = clf_perceptron.predict(X) acc_per = accuracy_score(Y, pred_per) * 100 print('Accuracy for perceptron: {}'.format(acc_per)) pred_KNN = clf_KNN.predict(X) acc_KNN = accuracy_score(Y, pred_KNN) * 100 print('Accuracy for KNN: {}'.format(acc_KNN)) # The best classifier from svm, per, KNN index = np.argmax([acc_svm, acc_per, acc_KNN, acc_tree]) classifiers = {0: 'SVM', 1: 'Perceptron', 2: 'KNN', 3: 'DecisionTree'} print('Best gender classifier is {}'.format(classifiers[index]))
#Training Classifiers svm.fit(features_train, train_labels) mnb.fit(features_train, train_labels) bnb.fit(features_train, train_labels) logit.fit(features_train, train_labels) percept.fit(features_train, train_labels) sgd.fit(features_train, train_labels) #Predicting output on test data svm_predict = svm.predict(features_test) mnb_predict = mnb.predict(features_test) bnb_predict = bnb.predict(features_test) logit_predict = logit.predict(features_test) percept_predict = percept.predict(features_test) sgd_predict = sgd.predict(features_test) classifier_names = [ "SVM", "Multinomial NB", "Bernoulli NB", "Logistic Regression", "Perceptron", "SGD" ] classifiers_predictions = [ svm_predict, mnb_predict, bnb_predict, logit_predict, percept_predict, sgd_predict ] for predict, name in zip(classifiers_predictions, classifier_names): #Performance Metrics of Classifiers
#Separamos las clase de las etiquetas # Clase X = datosFull.iloc[:, 0:-1] # Etiquetas Y = datosFull.iloc[:, -1] # Divide matrices en sub conjuntos de pruebas y trenes aleatorio X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0) # Mandamos a llamar al perceptron para trabajar con él perceptron = Perceptron() # Se usa la función fit para entrenar al perceptron con los datos ya dados perceptron.fit(X_train, y_train) # Eficiencia del algoritmo print(perceptron.score(X_test, y_test)) # Imprimimos si el valor es -1 0 1 dependiendo la salida print("La predicción es: ", int(perceptron.predict(imT))) # Para que el usuario entienda mas imprimimos la respuesta si es o no humano if ((int(perceptron.predict(imT))) == 1): print("Es un humano =D") else: print("No es humano")
# logistic regression logreg = LogisticRegression() logreg.fit(X_train, y_train.ravel()) y_pred = logreg.predict(X_test) print() acc, spe, sen = score(y_pred, y_test) print( 'Accuracy, specifity, sensitivity of logistic regression classifier on test set: ', round(acc, 4), round(spe[0], 4), round(sen[0], 4)) #Perceptron for i in range(len(y_train)): y_train[i] = y_train[i][0] clf = Perceptron(tol=1e-3, random_state=0) clf.fit(X_train, y_train.ravel()) y_pred = clf.predict(X_test) acc, spe, sen = score(y_pred, y_test) print('Accuracy, specifity, sensitivity of Perceptron on test set: ', round(acc, 4), round(spe[0], 4), round(sen[0], 4)) #SVM for i in range(len(y_train)): y_train[i] = y_train[i][0] clf = SVC(kernel='linear') clf.fit(X_train, y_train.ravel()) y_pred = clf.predict(X_test) acc, spe, sen = score(y_pred, y_test) print( 'Accuracy, specifity, sensitivity of Support Vector Classification on test set: ', round(acc, 4), round(spe[0], 4), round(sen[0], 4))
linear_svc = LinearSVC() linear_svc.fit(x_train, y_train) y_pred = linear_svc.predict(x_val) acc_linear_svc = round(accuracy_score(y_pred, y_val) * 100, 2) print("MODEL-4: Accuracy of LinearSVC : ", acc_linear_svc) #OUTPUT:- #MODEL-4: Accuracy of LinearSVC : 78.68 #MODEL-5) Perceptron #------------------------------------------ from sklearn.linear_model import Perceptron perceptron = Perceptron() perceptron.fit(x_train, y_train) y_pred = perceptron.predict(x_val) acc_perceptron = round(accuracy_score(y_pred, y_val) * 100, 2) print("MODEL-5: Accuracy of Perceptron : ", acc_perceptron) #OUTPUT:- #MODEL-5: Accuracy of Perceptron : 79.19 #MODEL-6) Decision Tree Classifier #------------------------------------------ from sklearn.tree import DecisionTreeClassifier decisiontree = DecisionTreeClassifier() decisiontree.fit(x_train, y_train) y_pred = decisiontree.predict(x_val) acc_decisiontree = round(accuracy_score(y_pred, y_val) * 100, 2) print("MODEL-6: Accuracy of DecisionTreeClassifier : ", acc_decisiontree)
def perceptron(train_instances, train_labels, test_instances): percep = Perceptron(tol=1e-3, random_state=0) percep.fit(train_instances, train_labels) prediction = percep.predict(test_instances) return prediction
print("Accuracy:", metrics.accuracy_score(y_test, lt_predictions)) print("Precision:", metrics.precision_score(y_test, lt_predictions)) print("Recall:", metrics.recall_score(y_test, lt_predictions)) y_pred_proba = lr_classifier.predict_proba(X_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba) auc = metrics.roc_auc_score(y_test, y_pred_proba) plt.plot(fpr, tpr, label="data 1, auc=" + str(auc)) plt.legend(loc=4) plt.show() # Perceptron perceptron_classifier = Perceptron(random_state=11) perceptron_classifier.fit(X_train, y_train) perc_predictions = perceptron_classifier.predict(X_test) score = accuracy_score(y_test, perc_predictions) f_score = f1_score(y_test, perc_predictions, average='micro') print("The accuracy score (Perceptron) is:", score) print("The F score-Micro (Perceptron) is:", f_score) # Support Vector Machine svm_classifier = svm.SVC(gamma='scale') svm_classifier.fit(X_train, y_train) svm_predictions = svm_classifier.predict(X_test) score = accuracy_score(y_test, svm_predictions) f_score = f1_score(y_test, svm_predictions, average='micro') print("The accuracy score (SVM) is:", score) print("The F score-Micro (SVM) is:", f_score) #print('Number of spam messages: %s' % df[df[0] == 1][0].count())
# In[73]: # Gaussian Naive Bayes gaussian = GaussianNB() gaussian.fit(train_x, train_y) y_pred = gaussian.predict(test_x) acc_gaussian = round(gaussian.score(train_x, train_y) * 100, 2) print(acc_gaussian) # In[74]: # Perceptron perceptron = Perceptron() perceptron.fit(train_x, train_y) y_pred = perceptron.predict(test_x) acc_perceptron = round(perceptron.score(train_x, train_y) * 100, 2) print(acc_perceptron) # In[75]: # Linear SVC linear_svc = LinearSVC() linear_svc.fit(train_x, train_y) y_pred = linear_svc.predict(test_x) acc_linear_svc = round(linear_svc.score(train_x, train_y) * 100, 2) print(acc_linear_svc) # In[76]: # Stochastic Gradient Descent
print("tvec's shape : ", tvec.shape, "tvec : ", tvec) shuffle_index = np.random.permutation(number_of_data) xdata, tvec = xdata[shuffle_index], tvec[shuffle_index] # Train the classifier classifier = Perceptron(tol=1e-3, random_state=0) print( "\n\n******************************************* Paramesters ****************************************************" ) print("Perceptron Parameters : ", classifier.fit(xdata, tvec)) print("Classiifer Coefficient : ", classifier.coef_) print("Classiifer Intercept : ", classifier.intercept_) print("Classiifer Iteration : ", classifier.n_iter_) print("Classiifer Correctness : ", np.equal(classifier.predict(xdata), tvec)) print( "\n\n********************************************* Accuracy *****************************************************" ) # Accuracy Score: mean accuracy on the given test data and labels print("Accuracy : ", classifier.score(testData, testLabel)) # n-fold cross validation : Accuracy print( "Average of accuracies found from 10-fold cross-validation : ", np.average( cross_val_score(classifier, xdata, tvec, cv=10, scoring="accuracy"))) print(
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) #Standardize the dataset from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) #Perceptron One-Vs-Rest from sklearn.linear_model import Perceptron ppn = Perceptron(n_iter = 40, eta0 = 0.1, random_state = 0) ppn.fit(X_train_std, y_train) # Test model y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d'% (y_test != y_pred).sum()) #Accuracy from sklearn.metrics import accuracy_score print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) from matplotlib.colors import ListedColormap import matplotlib.pyplot as plt def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))])
plt.grid(True) plt.title('31240232 / Sunwung Lee') plt.gca().legend(('Training Set', 'Test Set')) #plt.savefig('2_percentage_correct') #plt.clf() #plt.scatter(Y_train[:,0], Y_train[:,1], s=3, c='r') #plt.plot(yy,xx, c='k') # --------------------------------------------- from sklearn.linear_model import Perceptron from sklearn.metrics import accuracy_score model = Perceptron() model.fit(Y_train, f_train) # SGD, # parameter: training data, target values # return: self(returns an instance of self). fh_train = model.predict(Y_test) # parameter: samples. # return: predicted class label per sample print(accuracy_score(f_test, fh_train)) # parameter: Ground truth labels, Predicted labels # return: score (if normalize = True, return fraction of correctly classified samples(float), # else returns the number of correctly classified samples(int)) # # #이 짓거리가 뭐냐 하면, #model이란 이름의 Perceptron을 하나 만들어, 그리고 Y_train이랑 f_train을 사용해서 model을 만들어 --> training 시킴 #model.predict 함수 -> Y_train 샘플들 넣어놓고 예상되는 class 라벨들 따내 = fh_train이야 #그리고 이 fh_train이랑 f_train이랑 얼마나 똑같은지 비교해 #
gaussian.fit(X_train, Y_train) Y_pred = gaussian.predict(X_test) acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2) acc_gaussian # The perceptron is an algorithm for supervised learning of binary classifiers (functions that can decide whether an input, represented by a vector of numbers, belongs to some specific class or not). It is a type of linear classifier, i.e. a classification algorithm that makes its predictions based on a linear predictor function combining a set of weights with the feature vector. The algorithm allows for online learning, in that it processes elements in the training set one at a time. Reference [Wikipedia](https://en.wikipedia.org/wiki/Perceptron). # In[ ]: # Perceptron perceptron = Perceptron() perceptron.fit(X_train, Y_train) Y_pred = perceptron.predict(X_test) acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2) acc_perceptron # In[ ]: # Linear SVC linear_svc = LinearSVC() linear_svc.fit(X_train, Y_train) Y_pred = linear_svc.predict(X_test) acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2) acc_linear_svc
for b in r: #iterates over r #Create the perceptron classifier clf = Perceptron(eta0=a, random_state=b, max_iter=1000) #eta0 = learning rate, random_state = used to shuffle the training data #Fitperceptron to the training data clf.fit(X_training, y_training) #make the classifier prediction for each test sample and start computing its accuracy #hint: to iterate over two collections simultaneously with zip() Example: #for (x_testSample, y_testSample) in zip(X_test, y_test): #to make a prediction do: clf.predict([x_testSample]) #--> add your Python code here counter = 0 for (x_testSample, y_testSample) in zip(X_test, y_test): prediction = clf.predict([x_testSample]) if prediction == y_testSample: counter += 1 accuracy = counter/len(y_test) #check if the calculated accuracy is higher than the previously one calculated. If so, update the highest accuracy and print it together with the perceprton hyperparameters #Example: "Highest Perceptron accuracy so far: 0.88, Parameters: learning rate=00.1, random_state=True" #--> add your Python code here if accuracy > highestAccuracy: highestAccuracy = accuracy highestAccuracyLR = a highestAccuracyRS = b print("Highest Perceptron accuracy so far: " + str(highestAccuracy) + ", Parameters: learning rate=" + str(a) + ", random_state=" + str(b)) print("\nHighest Perceptron accuracy: " + str(highestAccuracy) + ", Parameters: learning rate=" + str(highestAccuracyLR) + ", random_state=" + str(highestAccuracyRS))
class NaiveNLP: def __init__(self, train_set, valid_set, multi_classification=False): self.train_set = train_set self.valid_set = valid_set self.multi_classification = multi_classification self.my_LR = sklearn.linear_model.logistic.LogisticRegression() self.my_RF = RandomForestClassifier(criterion='entropy', max_depth=50, min_samples_leaf=1, min_samples_split=3, n_estimators=50) self.my_P = Perceptron(max_iter=10000, tol=0.1) self.my_SVM_rbf = SVC(kernel='rbf', gamma=0.03, C=30, max_iter=10000) self.my_SVM_linear = SVC(kernel='linear', gamma=0.03, C=30, max_iter=10000) self.my_DT = DecisionTreeClassifier() self.my_NB = GaussianNB() self.my_KNN = KNeighborsClassifier(n_neighbors=3) def penalized_accuracy(self, predict, target): index_un_p = predict == 'unrelated' index_un = target == 'unrelated' index_re = np.where(predict != 'unrelated') acc1 = np.mean(index_un_p == index_un) acc2 = np.mean(predict[index_re] == np.array(target)[index_re]) return (str(0.25 * acc1 + 0.75 * acc2)) def method_KNeighborsClassifier(self): # pipeline = Pipeline([('clf', KNeighborsClassifier())]) # parameters = {'clf__n_neighbors': (5, 10, 3, 50)} # grid_search = GridSearchCV(pipeline, # parameters, # verbose=1, # scoring='accuracy') # grid_search.fit(self.train_set[0], self.train_set[1]) # print('Best score: %0.3f' % grid_search.best_score_) # print('Best parameters; ') # best_parameters = grid_search.best_estimator_.get_params() # for param_name in sorted(best_parameters.keys()): # print('\t%s: %r' % (param_name, best_parameters[param_name])) self.my_KNN.fit(self.train_set[0], self.train_set[1]) self.my_KNN_pred = self.my_KNN.predict(self.valid_set[0]) self.my_KNN_acc = accuracy_score(self.my_KNN_pred, self.valid_set[1]) print('KNeighborsClassifier accuracy is: ' + str(self.my_KNN_acc)) if self.multi_classification: print('KNeighborsClassifier penalized accuracy is: ' + self.penalized_accuracy(self.my_KNN_pred, self.valid_set[1])) def method_GaussianNB(self): self.my_NB.fit(self.train_set[0], self.train_set[1]) self.my_NB_pred = self.my_NB.predict(self.valid_set[0]) self.my_NB_acc = accuracy_score(self.my_NB_pred, self.valid_set[1]) print('GaussianNB accuracy is: ' + str(self.my_NB_acc)) if self.multi_classification: print('GaussianNB penalized accuracy is: ' + self.penalized_accuracy(self.my_NB_pred, self.valid_set[1])) def method_LogisticRegression(self): self.my_LR.fit(self.train_set[0], self.train_set[1]) self.my_LR_pred = self.my_LR.predict(self.valid_set[0]) self.my_LR_acc = accuracy_score(self.my_LR_pred, self.valid_set[1]) print('LogisticRegression accuracy is: ' + str(self.my_LR_acc)) def method_DecisionTreeClassifier(self): self.my_DT.fit(self.train_set[0], self.train_set[1]) self.my_DT_pred = self.my_DT.predict(self.valid_set[0]) self.my_DT_acc = accuracy_score(self.my_DT_pred, self.valid_set[1]) print('DecisionTreeClassifier accuracy is: ' + str(self.my_DT_acc)) if self.multi_classification: print('DecisionTreeClassifier penalized accuracy is: ' + self.penalized_accuracy(self.my_DT_pred, self.valid_set[1])) def method_RandomForestClassifier(self): # pipeline = Pipeline([('clf', RandomForestClassifier(criterion='entropy'))]) # parameters = {'clf__n_estimators': (5, 10, 20, 50), # 'clf__max_depth': (50, 150, 250), # 'clf__min_samples_split': (1.0, 2, 3), # 'clf__min_samples_leaf': (1, 2, 3)} # grid_search = GridSearchCV(pipeline, # parameters, # #n_jobs=-1, # verbose=1, # scoring='accuracy') # grid_search.fit(self.train_set[0], self.train_set[1]) # print('Best score: %0.3f' % grid_search.best_score_) # print('Best parameters; ') # best_parameters = grid_search.best_estimator_.get_params() # for param_name in sorted(best_parameters.keys()): # print('\t%s: %r' % (param_name, best_parameters[param_name])) # self.my_RF_score = self.scores(grid_search, # self.valid_set[0], # self.valid_set[1], # cv=5) self.my_RF.fit(self.train_set[0], self.train_set[1]) #self.my_RF_score = self.my_RF.score(self.valid_set[0],self.valid_set[1]) self.my_RF_pred = self.my_RF.predict(self.valid_set[0]) self.my_RF_acc = accuracy_score(self.my_RF_pred, self.valid_set[1]) print('RandomForestClassifier accuracy is: ' + str(self.my_RF_acc)) if self.multi_classification: print('RandomForestClassifier penalized accuracy is: ' + self.penalized_accuracy(self.my_RF_pred, self.valid_set[1])) def method_Perception(self): self.my_P.fit(self.train_set[0], self.train_set[1]) self.my_P_pred = self.my_P.predict(self.valid_set[0]) self.my_P_acc = accuracy_score(self.my_P_pred, self.valid_set[1]) print('Perception accuracy is: ' + str(self.my_P_acc)) if self.multi_classification: print('Perception penalized accuracy is: ' + self.penalized_accuracy(self.my_P_pred, self.valid_set[1])) def method_SVM_rbf(self): # pipeline = Pipeline([('clf', sklearn.svm.SVC(kernel='rbf', gamma=0.01, C=100))]) # parameters = {'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1), # 'clf__C': (0.1, 0.3, 1, 3, 10, 30), } # parameters = {'clf__gamma': (0.03), # 'clf__C': (30), } # grid_search = GridSearchCV(pipeline, # parameters, # verbose=1, # scoring='accuracy') # grid_search.fit(self.train_set[0], self.train_set[1]) # print('Best score:%0.3f' % grid_search.best_score_) # print('Best paragram:') # best_parameters = grid_search.best_estimator_.get_params() # for param_name in sorted(parameters.keys()): # print('\t%s: %r' % (param_name, best_parameters[param_name])) self.my_SVM_rbf.fit(self.train_set[0], self.train_set[1]) self.my_SVM_rbf_pred = self.my_SVM_rbf.predict(self.valid_set[0]) self.my_SVM_rbf_acc = accuracy_score(self.my_SVM_rbf_pred, self.valid_set[1]) print('SVM_rbf accuracy is: ' + str(self.my_SVM_rbf_acc)) if self.multi_classification: print('SVM_rbf penalized accuracy is: ' + self.penalized_accuracy( self.my_SVM_rbf_pred, self.valid_set[1])) def method_SVM_linear(self): # pipeline = Pipeline([('clf', SVC(kernel='linear', gamma=0.01, C=100))]) # parameters = {'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1), # 'clf__C': (0.1, 0.3, 1, 3, 10, 30), } # grid_search = GridSearchCV(pipeline, # parameters, # verbose=1, # scoring='accuracy') # grid_search.fit(self.train_set[0], self.train_set[1]) # print('Best score:%0.3f' % grid_search.best_score_) # print('Best paragram:') # best_parameters = grid_search.best_estimator_.get_params() # for param_name in sorted(parameters.keys()): # print('\t%s: %r' % (param_name, best_parameters[param_name])) # #self.my_SVM_rbf.fit(self.train_set[0], self.train_set[1]) # self.my_SVM_rbf_score = grid_search.score(self.valid_set[0],self.valid_set[1]) # print('SVM_rbf score is: ' + str(self.my_SVM_rbf_score)) self.my_SVM_linear.fit(self.train_set[0], self.train_set[1]) self.my_SVM_linear_pred = self.my_SVM_linear.predict(self.valid_set[0]) self.my_SVM_linear_acc = accuracy_score(self.my_SVM_linear_pred, self.valid_set[1]) print('SVM_linear accuracy is: ' + str(self.my_SVM_linear_acc)) if self.multi_classification: print('SVM_linear penalized accuracy is: ' + self.penalized_accuracy(self.my_SVM_linear_pred, self.valid_set[1]))
# Treinando os modelos com dados X e Y clf_tree.fit(X, Y) clf_svm.fit(X, Y) clf_perceptron.fit(X, Y) clf_KNN.fit(X, Y) # Testando pred_tree = clf_tree.predict(X) acc_tree = accuracy_score(Y, pred_tree) * 100 print('Acurácia do método DecisionTree: {}'.format(acc_tree)) pred_svm = clf_svm.predict(X) acc_svm = accuracy_score(Y, pred_svm) * 100 print('Acurácia do método SVM: {}'.format(acc_svm)) pred_per = clf_perceptron.predict(X) acc_per = accuracy_score(Y, pred_per) * 100 print('Acurácia do método perceptron: {}'.format(acc_per)) pred_KNN = clf_KNN.predict(X) acc_KNN = accuracy_score(Y, pred_KNN) * 100 print('Acurácia do método KNN: {}'.format(acc_KNN)) # The best classifier from svm, per, KNN index = np.argmax([acc_svm, acc_per, acc_KNN]) classifiers = {0: 'SVM', 1: 'Perceptron', 2: 'KNN'} print('\nO melhor método foi: {}\n'.format(classifiers[index])) test = [168, 60, 39] prediction = clf_tree.predict([test])
#classifiers dtc = tree.DecisionTreeClassifier() svmc = SVC() perC = Perceptron() KNNc = KNeighborsClassifier() #model training dtc = dtc.fit(X, Y) svmc = svmc.fit(X, Y) perC = perC.fit(X, Y) KNNc = KNNc.fit(X, Y) # prediction prediction1 = dtc.predict(X) prediction2 = svmc.predict(X) prediction3 = perC.predict(X) prediction4 = KNNc.predict(X) #accuracy acc_dtc = accuracy_score(Y, prediction1) print("dtc", acc_dtc) acc_svmc = accuracy_score(Y, prediction2) print("svmc", acc_svmc) acc_perC = accuracy_score(Y, prediction3) print("perC", acc_perC) acc_knnc = accuracy_score(Y, prediction4) print("knnc", acc_knnc) # print best result mval = max(acc_dtc, acc_svmc, acc_perC, acc_knnc) acc = {
pn = Perceptron() pn.fit(X, y) plt.plot(range(1, len(pn.errors) + 1), pn.errors, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of misclassifications') plt.show() #metrics calculation X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) ppn = Perceptron() ppn.fit(X_train, y_train) y_pred = ppn.predict(X_test) #printing the results print('Metrics for perceptron classifier\n\nMisclassified samples: %d' % (y_test != y_pred).sum()) print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) print('Confusion matrix: \n%s' % confusion_matrix(y_test, y_pred)) #metrics calculation df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', 0, 1) X = df.iloc[0:100, [0, 1, 2]].values
print() # Gaussian Naive Bayes gaussian = GaussianNB() gaussian.fit(X, Y) prediction = gaussian.predict([[190, 70, 43], [186, 65, 39]]) acc_gaussian = round(gaussian.score(X, Y) * 100, 2) print("Naive Bayes: ") print(prediction) print(acc_gaussian) print() #Perceptron perceptron = Perceptron() perceptron.fit(X, Y) prediction = perceptron.predict([[190, 70, 43], [186, 65, 39]]) acc_perceptron = round(perceptron.score(X, Y) * 100, 2) print("Perceptron: ") print(prediction) print(acc_perceptron) print() # Linear SVC linear_svc = LinearSVC() linear_svc.fit(X, Y) Y_pred = linear_svc.predict([[190, 70, 43], [186, 65, 39]]) acc_linear_svc = round(linear_svc.score(X, Y) * 100, 2) print("LinearSVC: ") print(prediction) print(acc_linear_svc) print()
msk = np.random.rand(len(dataSet)) < 0.8 trainData = dataSet[msk] testData = dataSet[~msk] X_train, Y_train = getXandY(trainData) X_test, Y_test = getXandY(testData) pla = Perceptron(max_iter=1000, random_state=np.random, warm_start=True) print(pla.get_params()) for i in range(0, 700): pla = pla.fit(X_train, Y_train) score = pla.score(X_test, Y_test) Y_pred = pla.predict(X_test) # F1 Measure Y_test = pd.Series(Y_test) series = Y_test.value_counts() null_accuracy = (series[0] / (series[0] + series[1])) print('Null Acuuracy: ', str(null_accuracy)) cm = confusion_matrix(Y_test, Y_pred) print(cm) print('Confusion matrix\n\n', cm) print('\nTrue Positives(TP) = ', cm[0, 0]) print('\nTrue Negatives(TN) = ', cm[1, 1]) print('\nFalse Positives(FP) = ', cm[0, 1])
sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ### 004 train a perceptron model from sklearn.linear_model import Perceptron ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) ### 005 make prediction y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) ### 006 calculate the classification accuracy from sklearn.metrics import accuracy_score print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) ### 007 plot decision regions from plot_decision_regions import plot_decision_regions import matplotlib.pyplot as plt
X_train, X_valid, y_train, y_valid = train_test_split(X, y, shuffle=True, random_state=10, test_size=0.1) print(X_train.shape, X_valid.shape, len(y_train), len(y_valid)) # 4.2 train a linear model using Perceptron from sklearn.linear_model import Perceptron from sklearn.metrics import f1_score, precision_recall_fscore_support, classification_report, confusion_matrix model_pc = Perceptron(tol=1e-4, random_state=42, penalty='l2') # initialize perceptron model model_pc.fit(X_train, y_train) # train model (learning) y_pred_pc = model_pc.predict(X_valid) #4.3 evaluate performance by printing f1 score, confusion matrix and classification report confusion_matrix(y_valid, y_pred_pc) f1_score(y_valid, y_pred_pc) print(classification_report(y_valid, y_pred_pc)) #5. RandomForestClassifier from sklearn.ensemble import RandomForestClassifier model_rf = RandomForestClassifier(n_estimators=500, criterion='entropy', random_state=10, n_jobs=-1, max_depth=10, verbose=1)
from sklearn.linear_model import Perceptron import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score scaler = StandardScaler() clf = Perceptron() X_train = pd.read_csv('perceptron-train.csv', header=None).ix[:, 1:] X_test = pd.read_csv('perceptron-test.csv', header=None).ix[:, 1:] y_train = pd.read_csv('perceptron-train.csv', header=None).ix[:, 0] y_test = pd.read_csv('perceptron-test.csv', header=None).ix[:, 0] clf.fit(X_train, y_train) predictions = clf.predict(X_test) print('Accuracy: ', accuracy_score(y_test, predictions)) X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) clf.fit(X_train_scaled, y_train) predict_scaled_data = clf.predict(X_test_scaled) print('Accuracy after scalibng: ', accuracy_score(y_test, predict_scaled_data)) print( 'Difference: ', abs( accuracy_score(y_test, predictions) - accuracy_score(y_test, predict_scaled_data)).round(3))
X_train_std = sc.transform(X_train) # apply to the training data X_test_std = sc.transform(X_test) # and SAME transformation of test data!!! # perceptron linear # epoch is one forward and backward pass of all training samples (also an iteration) # eta0 is rate of convergence # max_iter, tol, if it is too low it is never achieved # and continues to iterate to max_iter when above tol # fit_intercept, fit the intercept or assume it is 0 # slowing it down is very effective, eta is the learning rate ppn = Perceptron(max_iter=10, tol=1e-3, eta0=0.001, fit_intercept=True, random_state=100, verbose=True) ppn.fit(X_train_std, y_train) # do the training print('Number in test ',len(y_test)) y_pred = ppn.predict(X_test_std) # now try with the test data # Note that this only counts the samples where the predicted value was wrong print('Misclassified samples: %d' % (y_test != y_pred).sum()) # how'd we do? print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) # vstack puts first array above the second in a vertical stack # hstack puts first array to left of the second in a horizontal stack # NOTE the double parens! X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) print('Number in combined ',len(y_combined)) # we did the stack so we can see how the combination of test and train data did y_combined_pred = ppn.predict(X_combined_std) print('Misclassified combined samples: %d' % (y_combined != y_combined_pred).sum())
# produce diagram with the parameters and score showDiagram(parameters_array, score_array, title='F1 score - Random Forest - Validation set', parameters="n_estimators, max_depth", color='brown') clear_arrays() # Perceptron for max_it in (100, 500, 2000): parameters_array.append(max_it) Per_classifier = Perceptron(max_iter=max_it) Per_classifier = Per_classifier.fit(X_train, Y_train) Per_prediction = Per_classifier.predict(X_validation) f1 = f1_score(Y_validation, Per_prediction, average='micro') score_array.append(f1) # produce diagram with the parameters and score showDiagram(parameters_array, score_array, title='F1 score - Perceptron - Validation set', parameters="max_iter", color='brown') clear_arrays() # GaussianNB for var_sm in (1e-9, 1e-10, 1e-11):
import numpy as np from sklearn.datasets import load_iris from sklearn.linear_model import Perceptron iris = load_iris() X = iris.data[:, (2, 3)] # 꽃잎의 길이와 너비 y = (iris.target == 0).astype(np.int) # 부채붓꽃(Iris Setosa)인가? # loss="perceptron", learning_rate="constant", eta0=1(학습률), penalty=None(규제 없음)인 SGDClassifier와 같습니다. # 로지스틱 회귀와 달리 확률을 제공하지 않으며 고정된 임곗값을 기준으로 예측을 만듭니다. per_clf = Perceptron() per_clf.fit(X, y) y_pred = per_clf.predict([[2, 0.5]]) print(y_pred)
#est = CalibratedClassifierCV (rf, method='isotonic', cv=5) est = Perceptron(fit_intercept=False, n_iter=100, shuffle=False) est.fit(data_x, data_y) scores = cross_val_score(est, data_x, data_y) print("score mean = %f" % scores.mean()) #Z = hierarchy.linkage(data_x, 'single') #plt.figure() #dn = hierarchy.dendrogram(Z) #plt.show() #print (est.score (data_x,data_y)) xxx = est.predict(data_xt) y2 = xxx.tolist() #scores = cross_val_score(est, data_x, data_y, cv=5, scoring='accuracy') #print("Accuracy: %0.2f (+/- %0.2f) " % (scores.mean(), scores.std())) #print ("AUC-ROC (oob) = %0.2f" % est.oob_score_) for i in y2: print(int(round(i, 0)))
actual_targets = capture_targets( 'test_with_label_2.csv') # pass test set with targets """ Run GNB model """ fitted_gnb = GaussianNB().fit( train_features, train_targets) # fit model with training set values predicted_targets = list(fitted_gnb.predict( test_features)) # get predictions from model and record them export_results(actual_targets, predicted_targets, 'GNB-DS2.csv') """ Run PER model """ fitted_per = Perceptron().fit( train_features, train_targets) # fit model with training set values predicted_targets = list(fitted_per.predict( test_features)) # get predictions from model and record them export_results(actual_targets, predicted_targets, 'PER-DS2.csv') """ Run BaseDT model """ fitted_baseDT = DecisionTreeClassifier(criterion='entropy').fit( train_features, train_targets) # fit model with training set values predicted_targets = list(fitted_baseDT.predict( test_features)) # get predictions from model and record them export_results(actual_targets, predicted_targets, 'Base-DT-DS2.csv') """ Find best hyperparameters for the BestDT model Parameter options to tune: • splitting criterion: gini and entropy • maximum depth of the tree: 10 and no maximum
import numpy as np from sklearn.linear_model import Perceptron from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X = np.array([[1, 2], [3, 4], [5, 6]]) y = np.array([0, 1, 0]) clf = Perceptron() clf.fit(X, y) predictions = clf.predict(X)
# VisualizeResult(X_test, y_test, clf2,'SVM(Testing set)' ) #Perceptron ######################################################################### skf = StratifiedKFold(shuffle=True) table = [] for train_index, val_index in skf.split(X, y): X_train, X_val = X[train_index], X[val_index] y_train, y_val = y[train_index], y[val_index] clf3 = Perceptron() clf3.fit(X_train, y_train) val_acc = clf3.score(X_val, y_val) table.append(val_acc) y_pred3 = clf3.predict(X_test) acc3 = accuracy_score(y_test, y_pred3) cm3 = confusion_matrix(y_test, y_pred3) print("Perceptron:", round(100 * acc3, 2), "%") print("cross_val_acc mean:", round(np.mean(table), 3)) print("cross_val_acc std:", round(np.std(table), 3)) print(cm3, "\n") # VisualizeResult(X_test, y_test, clf3,'Perceptron(Testing set)' ) #OVR ######################################################################### skf = StratifiedKFold(shuffle=True) table = [] for train_index, val_index in skf.split(X, y): X_train, X_val = X[train_index], X[val_index] y_train, y_val = y[train_index], y[val_index]