class PerceptronClassifier(AbstractSKLearnClassifier): def __init__(self): AbstractSKLearnClassifier.__init__(self) self.model = False def set_label_encoder(self, labels): AbstractSKLearnClassifier.set_label_encoder(self, labels) def return_label_encoding(self, labels): return AbstractSKLearnClassifier.return_label_encoding(self, labels) def train_classifier(self, trainvectors, labels, alpha='1.0', iterations=10, jobs=1, v=2): iterations = int(iterations) jobs = int(jobs) if alpha == 'search': paramsearch = GridSearchCV(estimator=Perceptron(), param_grid=dict(alpha=numpy.linspace(0,2,20)[1:],n_iter=[iterations]), n_jobs=jobs) paramsearch.fit(trainvectors,labels) alpha = paramsearch.best_estimator_.alpha else: alpha = float(alpha) self.model = Perceptron(alpha=alpha,n_iter=iterations,n_jobs=jobs) self.model.fit(trainvectors, labels) def return_classifier(self): return self.model def return_model_insights(self,vocab=False): model_insights = [] return model_insights
class ClassificationPLA(ClassficationBase.ClassificationBase): def __init__(self, isTrain, isOutlierRemoval=0): super(ClassificationPLA, self).__init__(isTrain, isOutlierRemoval) # data preprocessing self.dataPreprocessing() # PLA object self.clf = Perceptron() def dataPreprocessing(self): # deal with unbalanced data self.dealingUnbalancedData() # Standardization #self.Standardization() def training(self): # train the K Nearest Neighbors model self.clf.fit(self.X_train, self.y_train.ravel()) def predict(self): # predict the test data self.y_pred = self.clf.predict(self.X_test) # print the error rate self.y_pred = self.y_pred.reshape((self.y_pred.shape[0], 1)) err = 1 - np.sum(self.y_test == self.y_pred) * 1.0 / self.y_pred.shape[0] print "Error rate: {}".format(err)
def PERCEPTRON(data_train, data_train_vectors, data_test_vectors, **kwargs): # Implementing classification model- using Perceptron clf_p = Perceptron() clf_p.fit(data_train_vectors, data_train.target) y_pred = clf_p.predict(data_test_vectors) return y_pred
def run(self): """ Пуск задачи """ train_data = pd.read_csv(self.param.get('train')) test_data = pd.read_csv(self.param.get('test')) X_train = train_data[['1', '2']] y_train = train_data['0'] X_test = test_data[['1', '2']] y_test = test_data['0'] if self.param.get('scale') is True: scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) perceptron = Perceptron(random_state=241) perceptron.fit(X_train, y_train) predictions = perceptron.predict(X_test) accuracy = accuracy_score(y_test, predictions) with self.output().open('w') as output: output.write(str(accuracy))
def percep(X_tr, y_tr, X_te): clf = Perceptron(n_iter = 1000) X_tr_aug = add_dummy_feature(X_tr) X_te_aug = add_dummy_feature(X_te) clf.fit(X_tr_aug, y_tr) y_pred = clf.predict(X_te_aug) return y_pred
def perceptron_histo(): "Interprétation des images comme histogrammes de couleurs et classification via le Perceptron" alphas = np.arange(0.01,1.01,0.1) best=np.zeros(4) _, data, target, _ = utils.chargementHistogrammesImages(mer,ailleurs,1,-1) X_train,X_test,Y_train,Y_test=train_test_split(data,target,test_size=0.3,random_state=random.seed()) for iterations in range(1,5): for a in alphas: start_time = time.time() p = Perceptron(alpha=a, n_iter=iterations, random_state=random.seed(), n_jobs=-1) x1=np.array(X_train) x2=np.array(X_test) p.fit(X=x1, y=Y_train) score = p.score(x2,Y_test) end_time = time.time() if score>best[0]: best[0] = score best[1] = a best[2] = iterations best[3] = end_time-start_time print("| Perceptron simple | V.Histo | alpha={:1.2f} iterations={:1.0f} | {:10.3f}ms | {:1.3f} |".format(best[1],best[2],best[3]*1000,best[0]))
def Perceptron_1(train_predictors,test_predictors,train_target,test_target): clf = Perceptron() clf.fit(train_predictors,train_target) predicted = clf.predict(test_predictors) accuracy = accuracy_score(test_target, predicted) print "Accuracy for Linear Model Perceptron: "+str(accuracy) return accuracy,predicted
def perceptron_vecteur(): "Interprétation des images comme vecteurs de pixels et classification via le Perceptron" alphas = np.arange(0.01,1.01,0.1) best=np.zeros(5) for npix in range(50,200,50): _, data, target, _ = utils.chargementVecteursImages(mer,ailleurs,1,-1,npix) X_train,X_test,Y_train,Y_test=train_test_split(data,target,test_size=0.3,random_state=random.seed()) for iterations in range(1,5): for a in alphas: start_time = time.time() p = Perceptron(alpha=a, n_iter=iterations, random_state=random.seed(), n_jobs=-1) #X_train, etc, sont des tableaux à 3 dimensiosn par défaut, (93,1,30000) par exemple, qu'il faut remmener en 2 dimensions x1=np.array(X_train) x1 = np.reshape(x1, (x1.shape[0],x1.shape[2])) x2=np.array(X_test) x2 = np.reshape(x2, (x2.shape[0],x2.shape[2])) p.fit(X=x1, y=Y_train) score = p.score(x2,Y_test) end_time = time.time() if score>best[0]: best[0] = score best[1] = a best[2] = iterations best[3] = end_time-start_time best[4] = npix print("| Perceptron simple | V.Pix {:4.0f} | alpha={:1.2f} iterations={:1.0f} | {:10.3f}ms | {:1.3f} |".format(best[4],best[1],best[2],best[3]*1000,best[0]))
def t1(): from sklearn.linear_model import Perceptron X = np.array([[1, 2], [3, 4], [5, 6]]) y = np.array([0, 1, 0]) clf = Perceptron() clf.fit(X, y) predictions = clf.predict(X) print(predictions)
def perceptron(kf,data,label,k): for train, test in kf: X_train, X_test, y_train, y_test = data[train,:], data[test,:], label[train], label[test] log = Perceptron(penalty="l2", alpha=0.003) logit = log.fit(X_train,y_train) y_pred = logit.predict(X_test) scores = cross_validation.cross_val_score(log, data, label, cv=k) return scores.mean()
def main(): iris = load_iris() X = iris.data[:, (2, 3)] # 花弁の長さ、花弁の幅 y = (iris.target == 0.).astype(np.int32) perceptron_classifier = Perceptron(random_state=42) perceptron_classifier.fit(X, y) y_prediction = perceptron_classifier.predict([[2, 0.5]]) print(y_prediction)
def classify_perceptron(): print "perceptron" (X_train, y_train), (X_test, y_test) = util.load_all_feat() print "original X_train shape", X_train.shape clf = Perceptron() clf.fit(X_train, y_train) pred = clf.predict(X_test) print "accuracy score:", accuracy_score(y_test, pred)
def train_data_perceptron( tup, penalty ): df, features, label = tup percep = Perceptron( penalty = penalty, fit_intercept = True, eta0 = ETA, n_iter = CYCLES, n_jobs = 2 ) percep.fit( df[features], df[label] ) return percep
def solve(train_set_x, train_set_y, test_set_x, test_set_y): clf = Perceptron(random_state=241) clf.fit(X=train_set_x, y=train_set_y) prediction = clf.predict(test_set_x) accuracy = accuracy_score(test_set_y, prediction) return accuracy
def get_accuracy(_data_train_features, _data_train_labels, _data_test_features, _data_test_labels): # Обучите персептрон со стандартными параметрами и random_state=241. clf = Perceptron(random_state=241, shuffle=True) clf.fit(_data_train_features, numpy.ravel(_data_train_labels)) # Подсчитайте качество (долю правильно классифицированных объектов, accuracy) # полученного классификатора на тестовой выборке. predictions = clf.predict(_data_test_features) score = accuracy_score(_data_test_labels, predictions) return score
def neural_net(train, test): y = [] xTrain, yTrain = loadData(train) xTest, yTest = loadData(test) nN = Perceptron() nN.fit(xTrain, yTrain) y = nN.predict(xTest) testError = 1 - nN.score(xTest, yTest) print 'Test error: ' , testError return y
def test(): X = np.array([[1, 2], [3, 4], [5, 6]]) y = np.array([0, 1, 0]) clf = Perceptron() clf.fit(X, y) predictions = clf.predict(X) print("Predictions: %s" % predictions) print("Accuracy: %s" % accuracy_score(y, predictions))
def perceptron(trainingData,trainingLabels): """ Implements a linear perceptron model as the machine learning algorithm. """ from sklearn.linear_model import Perceptron clf = Perceptron() clf.fit(trainingData,trainingLabels) print "Perceptron has been generated with a training set size of",len(trainingLabels) return clf
def test_perceptron_correctness(): y_bin = y.copy() y_bin[y != 1] = -1 clf1 = MyPerceptron(n_iter=2) clf1.fit(X, y_bin) clf2 = Perceptron(n_iter=2) clf2.fit(X, y_bin) assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
def linear_train(features_train, target_train): data_f = pandas.read_csv(features_train, header=None, sep=';') features = data_f.iloc[:, 1:] features = scale(features) data_t = pandas.read_csv(target_train, header=None, sep=';') target = data_t.iloc[:, 1] perc = Perceptron(random_state=242) perc.fit(features, target) return perc
def neural_net(): Xtrain,ytrain,Xtest,ytest = getSplitData() Xtrain, Xtest = getScaledData(Xtrain, Xtest) ntest = Xtest.shape[0] #Your code here clf = Perceptron() clf.fit(Xtrain, ytrain) yPredict = clf.predict(Xtest) #print "parameter: n_neighbors = ",n print "neural_net classification accuracy: ", accuracy_score(ytest,yPredict)
def train(im_features, image_classes): # Train the Perceptron clf = Perceptron(n_iter=100, eta0=0.1) clf.fit(im_features, np.array(image_classes)) n_folds = 10 kFoldScore = evaluate_cross_validation(clf, im_features, np.array(image_classes), n_folds) #print 'SVM Score:',clf.score(im_features, np.array(image_classes)) #print 'SVM Score:', kFoldScore return clf
def neural_net(train, test): y = [] trainY, trainX = loadData(train) testY, testX = loadData(test) neuralNet = Perceptron() neuralNet.fit(trainX, trainY) y = neuralNet.predict(testX) testError = 1 - neuralNet.score(testX, testY) print 'Test error: ' + str(testError) return y
class DrunkLearningOnline(DrunkLearningBatch): """drunk_learning class for online learning""" def __init__(self): super(DrunkLearningOnline, self).__init__() self.clf = Perceptron() self.filename = 'modelPerceptron.pkl' def partial_fit(self, X, y): X = np.array([X]) y = np.array(y) self.clf.partial_fit(X, y, [0, 1]) joblib.dump(self.clf, self.filename, compress=9)
def main(): start = time.time() print "Reading train data and its features from: " + train_file data = cu.get_dataframe(train_file) global fea fea = features.extract_features(feature_names,data) percep = Perceptron(penalty=None, alpha=0.0001, fit_intercept=False, n_iter=5, shuffle=False, verbose=1, eta0=1.0, n_jobs=-1, seed=0, class_weight="auto", warm_start=False) X = [] for i in data["OwnerUndeletedAnswerCountAtPostTime"]: X.append([i]) # Must be array type object. Strings must be converted to # to integer values, otherwise fit method raises ValueError global y y = [] print "Collecting statuses" for element in data["OpenStatus"]: for index, status in enumerate(ques_status): if element == status: y.append(index) print "Fitting" percep.fit(fea, y) '''Make sure you have the up to date version of sklearn; v0.12 has the predict_proba method; http://scikit-learn.org/0.11/install.html ''' print "Reading test data and features" test_data = cu.get_dataframe(test_file) test_fea = features.extract_features(feature_names,test_data) print "Making predictions" global probs #probs = percep.predict_proba(test_fea) # only available for binary classification probs = percep.predict(test_fea) # shape of probs is [n_samples] # convert probs to shape [n_samples,n_classes] probs = np.resize(probs, (len(probs) / 5, 5)) #if is_full_train_set == 0: # print("Calculating priors and updating posteriors") # new_priors = cu.get_priors(full_train_file) # old_priors = cu.get_priors(train_file) # probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001) print "writing submission to " + submission_file cu.write_submission(submission_file, probs) finish = time.time() print "completed in %0.4f seconds" % (finish-start)
def test_model(training_data, testing_data, word2vec_model): v = DictVectorizer() train_features, train_labels = build_features(training_data, word2vec_model, v, 'train') test_features, test_labels = build_features(testing_data, word2vec_model, v) # create the perceptron model model = Perceptron(n_iter = 5) # fit the model to the training data model.fit(train_features, train_labels) # get the accuracy on the testing data accuracy = model.score(test_features, test_labels) return accuracy
def __test_perceptron(self, normalized): clf = Perceptron() X_train = self.train_data.iloc[:, 1:] y_train = self.train_data.iloc[:, 0] X_test = self.test_data.iloc[:, 1:] y_test = self.test_data.iloc[:, 0] if normalized: scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) clf.fit(X_train, y_train) predictions = clf.predict(X_test) return accuracy_score(y_test, predictions)
def __init__(self, zmq_sub_string, channel): self.classes = ["pos", "neg"] self.re_emoticons = re.compile(r":\)|:\(") self.vec = HashingVectorizer(n_features=2 ** 20, non_negative=True) self.clf = Perceptron() self.count = { "train": { "pos": 0, "neg": 0, }, "test": { "pos": 0, "neg": 0, } } self.train = 1 self.eval_count = { "pos": {"tp": 0, "fp": 0, "fn": 0}, "neg": {"tp": 0, "fp": 0, "fn": 0}, } super(StreamingLearner, self).__init__(zmq_sub_string, channel)
def runTrial(numberOfTestPoints, iterationLimit, showChart = False): x1, y1, x2, y2, points = generatePoints(numberOfTestPoints) pclf = Perceptron() clf = SVC(C = 1000, kernel = 'linear') sample = np.array(points) X = np.c_[sample[:,1], sample[:,2]] y = sample[:,3] #print(y) pclf.fit(X,y) clf.fit(X,y) iterations, w = train(points, iterationLimit) #print("weights ", w) #print("coefficients", pclf.coef_) errorProb = findErrorProbability(x1,y1,x2,y2,w, 50000, clf, pclf) if showChart: if iterations == iterationLimit: print( "No solution found in " + str(iterations) + " iterations!") print( "Iterations: " + str(iterations) + ' | Weights: ' + str(w)) # plot points above(green) and below(blue) the target function. green_x = [] green_y = [] blue_x = [] blue_y = [] for x in points: if x[3] == 1: green_x.append(x[1]) green_y.append(x[2]) else: blue_x.append(x[1]) blue_y.append(x[2]) pylab.plot(green_x, green_y, 'go') pylab.plot(blue_x, blue_y, 'bo') # plot target function(black) and hypothesis function(red) lines x = np.array( [-1,1] ) slope = (y2-y1)/(x2-x1) intercept = y2 - slope * x2 pylab.plot(x, slope*x + intercept, 'k--') pylab.plot( x, -w[1]/w[2] * x - w[0] / w[2] , 'r' ) # this will throw an error if w[2] == 0 pylab.ylim([-1,1]) pylab.xlim([-1,1]) pylab.show() return iterations, w, errorProb
def __init__(self, isTrain, isOutlierRemoval=0): super(ClassificationPLA, self).__init__(isTrain, isOutlierRemoval) # data preprocessing self.dataPreprocessing() # PLA object self.clf = Perceptron()
tuned_param_knn = [{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}] models = { 'dt': { 'name': 'Decision Tree ', 'estimator': DecisionTreeClassifier(), 'param': tuned_param_dt, }, 'nb': { 'name': 'Gaussian Naive Bayes', 'estimator': GaussianNB(), 'param': tuned_param_nb }, 'lp': { 'name': 'Linear Perceptron ', 'estimator': Perceptron(), 'param': tuned_param_lp, }, 'svc': { 'name': 'Support Vector ', 'estimator': SVC(), 'param': tuned_param_svc }, 'knn': { 'name': 'K Nearest Neighbor ', 'estimator': KNeighborsClassifier(), 'param': tuned_param_knn } } scores = ['precision', 'recall']
# Gaussian Naive Bayes gaussian = GaussianNB() scores = cross_val_score(gaussian, X_train, Y_train, cv=10) acc_gaussian = round(scores.mean() * 100, 2) acc_gaussian # In[ ]: # Perceptron (a single layer neural net) perceptron = Perceptron() scores = cross_val_score(perceptron, X_train, Y_train, cv=10) acc_perceptron = round(scores.mean() * 100, 2) acc_perceptron # In[ ]: # Neural Network (a multi layer neural net) neural_net = MLPClassifier() scores = cross_val_score(neural_net, X_train, Y_train, cv=10) acc_neural_net = round(scores.mean() * 100, 2) acc_neural_net
def IncreasingFIT(): global total_vect_time classifiers = { 'SGD': SGDClassifier(), 'Perceptron': Perceptron(), 'NB Multinomial': MultinomialNB(alpha=0.01), 'Passive-Aggressive': PassiveAggressiveClassifier(), } Vocubularysave = [] if os.path.exists("VocubularySave.v"): Vocubularysave = joblib.load("VocubularySave.v") VocubularyList = [] for numV in Vocubularysave: VocubularyList.append(numV['name']) vectorizer = CountVectorizer(stop_words=None, vocabulary=VocubularyList) transformer = TfidfTransformer() count = vectorizer.fit_transform(xtest) X_test = transformer.fit_transform(count) for i in range(TrainDataSize): tick = time.time() # X_train = vectorizer.transform(xtrain[i]) count = vectorizer.fit_transform(xtrain[i]) X_train = transformer.fit_transform(count) total_vect_time += time.time() - tick for cls_name, cls_useless in partial_fit_classifiers.items(): cls = classifiers[cls_name] tick = time.time() # update estimator with examples in the current mini-batch # 使用当前最小批次中的示例更新估算器 # print(X_train) cls.partial_fit(X_train, ytrain[i], classes=all_classes) # if i % printjumpsize == 0: if i == (TrainDataSize - 1): # accumulate test accuracy stats # 累积测试准确度统计 cls_stats[cls_name]['total_fit_time'] += time.time() - tick cls_stats[cls_name]['n_train'] += X_train.shape[0] cls_stats[cls_name]['n_train_pos'] += sum(ytrain[i]) tick = time.time() # 测试准确性函数 cls_stats[cls_name]['accuracy'] = cls.score(X_test, ytest) cls_stats[cls_name]['prediction_time'] = time.time() - tick acc_history = (cls_stats[cls_name]['accuracy'], cls_stats[cls_name]['n_train']) cls_stats[cls_name]['accuracy_history'].append(acc_history) run_history = (cls_stats[cls_name]['accuracy'], total_vect_time + cls_stats[cls_name]['total_fit_time']) cls_stats[cls_name]['runtime_history'].append(run_history) # accumulate test accuracy stats # 累积测试准确度统计 if T == 0: print(progress(cls_name, cls_stats[cls_name])) if T != 0: AccuracyAverage[cls_name]['total_fit_time'] += time.time( ) - tick AccuracyAverage[cls_name]['n_train'] += X_train.shape[0] AccuracyAverage[cls_name]['n_train_pos'] += sum(ytrain[i]) tick = time.time() # 测试准确性函数 AccuracyAverage[cls_name]['accuracy'] += cls.score( X_test, ytest) RecordOneAccuracy[cls_name]['accuracy'] += cls.score( X_test, ytest) acc_history = (AccuracyAverage[cls_name]['accuracy'], AccuracyAverage[cls_name]['n_train']) AccuracyAverage[cls_name]['accuracy_history'].append( acc_history) run_history += ( AccuracyAverage[cls_name]['accuracy'], total_vect_time + AccuracyAverage[cls_name]['total_fit_time']) AccuracyAverage[cls_name]['runtime_history'].append( run_history) recordAccuracy.append(RecordOneAccuracy) print(progress2(cls_name, AccuracyAverage[cls_name], T))
import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.linear_model import SGDClassifier, Perceptron from sklearn.linear_model import PassiveAggressiveClassifier from sklearn.linear_model import LogisticRegression heldout = [0.95, 0.90, 0.75, 0.50, 0.01] rounds = 20 digits = datasets.load_digits() X, y = digits.data, digits.target classifiers = [("SGD", SGDClassifier(max_iter=100, tol=1e-3)), ("ASGD", SGDClassifier(average=True, max_iter=100, tol=1e-3)), ("Perceptron", Perceptron(tol=1e-3)), ("Passive-Aggressive I", PassiveAggressiveClassifier(loss='hinge', C=1.0, tol=1e-4)), ("Passive-Aggressive II", PassiveAggressiveClassifier(loss='squared_hinge', C=1.0, tol=1e-4)), ("SAG", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))] xx = 1. - np.array(heldout) for name, clf in classifiers: print("training %s" % name) rng = np.random.RandomState(42)
def main(): # Number of hiddens n = 10 # Instance of Read class to read in data trainData, testData, train, test = Read().read() # Ensure that both the training and testing dataframes have the same columns. # If we're adding any new columns through this, make all values in that column = 0. trainData, testData = trainData.align(testData, join='outer', fill_value=0, axis=1) # Run the training and testing data through an Sklearns model. model = SVC() model_GNB = GaussianNB() model_Bern = BernoulliNB() model_MNB = MultinomialNB() model_MLP = MLPClassifier(hidden_layer_sizes=(n, n), activation='logistic', max_iter=2000) testScore = 0 targetLabels = train['Survived'].values testLabels = test['Survived'].values trainData, testData = dropData(trainData, testData) # just uncomment the data you want to use trainFeatures = trainData testFeatures = testData # trainFeatures = train[["Pclass", "Age", "Sex", "Fare"]].values # testFeatures = test[["Pclass", "Age", "Sex", "Fare"]].values print(trainFeatures.shape) print(trainFeatures) # 3 hidden layers (n, n, n) model_Perceptron = Perceptron(n_iter_no_change=10) model.fit(trainFeatures, targetLabels) model_GNB.fit(trainFeatures, targetLabels) model_Bern.fit(trainFeatures, targetLabels) model_MNB.fit(trainFeatures, targetLabels) model_MLP.fit(trainFeatures, targetLabels) model_Perceptron.fit(trainFeatures, targetLabels) # Let's test a few different models .. # Now do support vector machine trainScore = model.score(trainFeatures, targetLabels) * 100 testScore = model.score(testFeatures, testLabels) * 100 print("Support vector results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%\n\n") # Now do Gaussian Naive Bayes trainScore = model_GNB.score(trainFeatures, targetLabels) * 100 testScore = model_GNB.score(testFeatures, testLabels) * 100 print("Gaussian NB results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%\n\n") # Now do Bernoulli Naive Bayes # trainScore = model_Bern.score(trainFeatures, targetLabels)*100 # testScore = model_Bern.score(testFeatures, testLabels)*100 # print("Bernoulli NB results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%") # Now do Multinomial Naive Bayes # trainScore = model_MNB.score(trainFeatures, targetLabels)*100 # testScore = model_MNB.score(testFeatures, testLabels)*100 # print("Multinomial NB results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%") # Now do MLP trainScore = model_MLP.score(trainFeatures, targetLabels) * 100 testScore = model_MLP.score(testFeatures, testLabels) * 100 print("Multi-Layer Perceptron results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%\n\n") # Now do Perceptron trainScore = model_Perceptron.score(trainFeatures, targetLabels) * 100 testScore = model_Perceptron.score(testFeatures, testLabels) * 100 print("Perceptron Learning Algorithm results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%\n\n\n\n") # Some PCA magic, reduce down to n parameters """ Everything below this point is using PCA for dimensionality reduction in the data """ numComp = 2 pca = PCA(n_components=numComp) pca.fit(trainFeatures) trainFeatures = pca.transform(trainFeatures) testFeatures = pca.transform(testFeatures) print(trainFeatures.shape) model.fit(trainFeatures, targetLabels) model_GNB.fit(trainFeatures, targetLabels) model_Bern.fit(trainFeatures, targetLabels) # Now do support vector machine trainScore = model.score(trainFeatures, targetLabels) * 100 testScore = model.score(testFeatures, testLabels) * 100 print("(PCA) Support vector results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%\n\n") # Now do Gaussian Naive Bayes trainScore = model_GNB.score(trainFeatures, targetLabels) * 100 testScore = model_GNB.score(testFeatures, testLabels) * 100 print("(PCA) Gaussian NB results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%\n\n") # Now do Bernoulli Naive Bayes # trainScore = model_Bern.score(trainFeatures, targetLabels)*100 # testScore = model_Bern.score(testFeatures, testLabels)*100 # print("Bernoulli NB results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%") model_MLP.fit(trainFeatures, targetLabels) trainScore = model_MLP.score(trainFeatures, targetLabels) * 100 testScore = model_MLP.score(testFeatures, testLabels) * 100 print("(PCA) Multi-Layer Perceptron results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%\n\n") model_Perceptron.fit(trainFeatures, targetLabels) trainScore = model_Perceptron.score(trainFeatures, targetLabels) * 100 testScore = model_Perceptron.score(testFeatures, testLabels) * 100 print("(PCA) Perceptron Learning Algorithm results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%\n\n\n") # fig = plt.figure(figsize = (8,8)) # ax = fig.add_subplot(1,1,1) # ax.set_title("n="+str(numComp)+" PCA") # colors = itertools.cycle(['r', 'g', 'b']) # plt.scatter(trainFeatures[:, 0], trainFeatures[:, 1], alpha = 0.5) # print(trainFeatures) # plt.show() plot(trainFeatures, targetLabels, "PCA", "MLP", numComp)
print(np.shape(class_labels)) #print(class_labels) # In[251]: def calcul_accuracy(real_label, pre_label): cnt = 0 for i in range(len(real_label)): if real_label[i] == pre_label[i]: cnt += 1 return cnt / len(real_label) #perceptron on the first two features perceptron = Perceptron(max_iter=1000, tol=0.0001, random_state=None) perceptron.fit( feature_2, class_labels ) #fit(X, y[, coef_init, intercept_init, ...]) Fit linear model with Stochastic Gradient Descent. print("feature_2's final Weight:") #print(perceptron.intercept_) #不懂 final_wei1 = perceptron.coef_ print(final_wei1) label_train_pred1 = perceptron.predict( feature_2) #Predict class labels for samples in X. #print(label_train_pred1) mean_ar = perceptron.score(feature_2, class_labels) #print(mean_ar) mean_accuracy1 = calcul_accuracy( class_labels, label_train_pred1
import numpy as np from sklearn.linear_model import Perceptron X = [[0, 0.1], [0.1, 0.9], [0.02, 0], [0.9, 0], [1, 0.9]] y = [0, 0, 0, 0, 1] per_clf = Perceptron(random_state=42) per_clf.fit(X, y) #y_pred = per_clf.predict([[2, 0.5]]) y_pred = per_clf.predict([[0.12, 0], [1, 0], [0, 1], [0.9, 0.9], [1, 1]]) print(y_pred) # output expected to be [0 0 0 1 1]
def test_multiclass(data): data['target'] = lb.fit_transform(data['target']) print('Classes: {}'.format(list(lb.classes_))) X = data.loc[:, data.columns != 'target'] y = data['target'] print('Arvore de decisao - GINI') dt = DecisionTreeClassifier() scores = cross_validate(dt, X, y=y, cv=10, scoring={ 'acc': 'accuracy', 'rec': make_scorer(multiclass_recall), 'f1': 'f1_weighted' }, return_train_score=False) print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(), scores['test_acc'].std())) print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(), scores['test_rec'].std())) print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(), scores['test_f1'].std())) print('------------------------------') print('Arvore de decisao - Entropy') dt = DecisionTreeClassifier(criterion='entropy') scores = cross_validate(dt, X, y=y, cv=10, scoring={ 'acc': 'accuracy', 'rec': make_scorer(multiclass_recall), 'f1': 'f1_weighted' }, return_train_score=False) print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(), scores['test_acc'].std())) print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(), scores['test_rec'].std())) print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(), scores['test_f1'].std())) print('------------------------------') print('SVM - Um contra todos') dt = LinearSVC() scores = cross_validate(dt, X, y=y, cv=10, scoring={ 'acc': 'accuracy', 'rec': make_scorer(multiclass_recall), 'f1': 'f1_weighted' }, return_train_score=False) print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(), scores['test_acc'].std())) print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(), scores['test_rec'].std())) print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(), scores['test_f1'].std())) print('------------------------------') print('SVM - Crammer-Singer') dt = LinearSVC(multi_class='crammer_singer') scores = cross_validate(dt, X, y=y, cv=10, scoring={ 'acc': 'accuracy', 'rec': make_scorer(multiclass_recall), 'f1': 'f1_weighted' }, return_train_score=False) print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(), scores['test_acc'].std())) print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(), scores['test_rec'].std())) print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(), scores['test_f1'].std())) print('------------------------------') print('Perceptron') dt = Perceptron() scores = cross_validate(dt, X, y=y, cv=10, scoring={ 'acc': 'accuracy', 'rec': make_scorer(multiclass_recall), 'f1': 'f1_weighted' }, return_train_score=False) print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(), scores['test_acc'].std())) print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(), scores['test_rec'].std())) print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(), scores['test_f1'].std())) print('------------------------------') print('Random Forest - GINI') dt = RandomForestClassifier() scores = cross_validate(dt, X, y=y, cv=10, scoring={ 'acc': 'accuracy', 'rec': make_scorer(multiclass_recall), 'f1': 'f1_weighted' }, return_train_score=False) print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(), scores['test_acc'].std())) print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(), scores['test_rec'].std())) print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(), scores['test_f1'].std())) print('------------------------------') print('Random Forest - Entropy') dt = RandomForestClassifier(criterion='entropy') scores = cross_validate(dt, X, y=y, cv=10, scoring={ 'acc': 'accuracy', 'rec': make_scorer(multiclass_recall), 'f1': 'f1_weighted' }, return_train_score=False) print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(), scores['test_acc'].std())) print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(), scores['test_rec'].std())) print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(), scores['test_f1'].std())) print('------------------------------') print('Naive Bayes') dt = BernoulliNB() scores = cross_validate(dt, X, y=y, cv=10, scoring={ 'acc': 'accuracy', 'rec': make_scorer(multiclass_recall), 'f1': 'f1_weighted' }, return_train_score=False) print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(), scores['test_acc'].std())) print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(), scores['test_rec'].std())) print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(), scores['test_f1'].std()))
# 建立正規化物件 sc = StandardScaler() # 利用現有資料計算正規化所需的平均值與標準差 # 不執行此指令則之後的動作都無法操作 sc.fit(X_train) # 查平均值與標準差,回傳陣列[平均值,標準差] sc.mean_ # 利用sc.fit算出來的平均值與標準差來正規化訓練集與測試集 X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) # 模型訓練 ppn = Perceptron(max_iter=40, eta0=0.01, random_state=0) ppn.fit(X_train_std, y_train) # 預測 y_pred = ppn.predict(X_test_std) print(accuracy_score(y_test, y_pred)) # 繪圖 X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) pdr.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]')
self.model = model def FIT(self, X_train, y_train): self.model.fit(X_train, y_train) def predict(self, X_test, y_test): y_pred = self.model.predict(X_test_std) print(y_pred) print(f"Number of Misclassified Examples by {self.model} are {sum(y_pred!=y_test)}") print(f"Accuracy of {self.model} using Accuracy_score function is {accuracy_score(y_test, y_pred)}") print(f"Accuracy of {self.model} using Score is {self.model.score(X_test_std, y_test)}") percp = Perceptron(eta0=0.1, random_state=1) Logic = LogisticRegression(C=100.0, random_state=1, solver='lbfgs', multi_class='ovr') Logic2 = LogisticRegression(C=100.0, random_state=1, solver='lbfgs', multi_class='multinomial',) m1 = Model(percp) m2 = Model(Logic) m3 = Model(Logic2) m1.FIT(X_train_std, y_train) m2.FIT(X_train_std, y_train) m3.FIT(X_train_std, y_train) m1.predict(X_test_std, y_test) m2.predict(X_test_std, y_test) m3.predict(X_test_std, y_test)
from sklearn import datasets from sklearn.cross_validation import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Perceptron from sklearn.metrics import accuracy_score iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0, shuffle=True) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
from sklearn.datasets import load_files from sklearn.model_selection import train_test_split from sklearn import metrics # languages_data_folder = sys.argv[1] #this line is the cause of 90% of my stress in this class today dataset = load_files(languages_data_folder) docs_train, docs_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5) vectorizer = TfidfVectorizer(ngram_range=(1, 3), analyzer='char', use_idf=False) # this is the thing that gets rid of small, common words because they're not super duper useful clf = Pipeline([ ('vec', vectorizer), ('clf', Perceptron(tol=1e-3)), ]) #This is equivalent to one millitolerance clf.fit(docs_train, y_train) y_predicted = clf.predict(docs_test) print(metrics.classification_report(y_test, y_predicted, target_names=dataset.target_names)) #this is the confusing confusion matrix centimeter = metrics.confusion_matrix(y_test, y_predicted) print(centimeter) sentences = [ u'Pikachu squirtle togepi',
print("accuracy: %0.3f" % score) totalPredictions.append(predictions) return name, score, train_time, test_time results = [] #Main Code #Classifiers clf1 = LogisticRegression() clf2 = PassiveAggressiveClassifier() clf3 = MultinomialNB(alpha=.01) clf4 = BernoulliNB(alpha=.01) clf5 = NearestCentroid() clf6 = RidgeClassifier(tol=1e-2, solver="sag") clf7 = Perceptron(n_iter=50) clf8 = SGDClassifier(loss='hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1) clf9 = SGDClassifier(loss='hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1) clf10 = SGDClassifier(loss='hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1) clf11 = SGDClassifier(loss='log',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1) clf12 = SGDClassifier(loss='log',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1) clf13 = SGDClassifier(loss='log',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1) clf14 = SGDClassifier(loss='modified_huber',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1) clf15 = SGDClassifier(loss='modified_huber',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1) clf16 = SGDClassifier(loss='modified_huber',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1) clf17 = SGDClassifier(loss='squared_hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1) clf18 = SGDClassifier(loss='squared_hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1) clf19 = SGDClassifier(loss='squared_hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1) clf20 = SGDClassifier(loss='perceptron',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1) clf21 = SGDClassifier(loss='perceptron',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1) clf22 = SGDClassifier(loss='perceptron',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1)
def test_predict_proba(): X = np.random.randn(5, 5) y = np.array([0, 1, 0, 0, 0]) clf1 = Perceptron() clf1.fit(X, y) DESKNN([clf1, clf1, clf1])
test_hy = hy[:8678, :] X_train, X_test, y_train, y_test = train_test_split(hy, data, train_size=0.80, test_size=0.20, random_state=1234) nb = MultinomialNB() nb = nb.fit(X=X_train, y=y_train) y_pred = nb.predict(X_test) print("Bow + Tf-idf ---> " + str(accuracy_score(y_test, y_pred) * 100) + "%") # -------------------------------------PERCEPTRON------------------------------------- bow_vectorizer = CountVectorizer() bow = bow_vectorizer.fit_transform(res) train_bow = bow[8678:, :] test_bow = bow[:8678, :] X_train, X_test, y_train, y_test = train_test_split(bow, data, test_size=0.20, train_size=0.80, random_state=1234) per = Perceptron(tol=1e-3, random_state=0) per = per.fit(X=X_train, y=y_train) y_pred = per.predict(X_test) print("----------------------------------------------------------------------------------") print("__________PERCEPTRON__________") print("BagOfWords -----> " + str(accuracy_score(y_test, y_pred) * 100) + "%") tfidf_vectorizer = TfidfVectorizer() tfidf = tfidf_vectorizer.fit_transform(res) train_tfidf = tfidf[8678:, :] test_tfidf = tfidf[:8678, :] X_train, X_test, y_train, y_test = train_test_split(tfidf, data, test_size=0.20, train_size=0.80, random_state=1234) per = Perceptron(tol=1e-3, random_state=0) per = per.fit(X=X_train, y=y_train) y_pred = per.predict(X_test)
def CheckingClassifer(ClassiferName): if ClassiferName == "Perceptron": # Running the Perceptron Classifier and computing the Test accuracy and CV accuracy print( "--------------------------------perceptron---------------------------------------------------" ) start_time = time.time() eta_P = input("Please Insert the value of learning Rate : \n") pip_perce = make_pipeline( StandardScaler(), PCA(n_components=2), Perceptron(penalty=None, alpha=0.0001, fit_intercept=True, max_iter=50, tol=None, shuffle=True, verbose=0, eta0=float(eta_P), n_jobs=None, random_state=0, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight=None, warm_start=False)) pip_perce.fit(X, y) y_pred = pip_perce.predict(Xtest) print("Test Accuracy : ", metrics.accuracy_score( ytest, y_pred)) # Computing the Test accuracy print("The Running Time To compute the Test Accuracy :") print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() K3 = StratifiedKFold(n_splits=10, random_state=1, shuffle=True).split(T, P) scores1 = [] for K, (train_index, test_index) in enumerate(K3): pip_perce.fit(T[train_index], P[train_index]) score = pip_perce.score(T[test_index], P[test_index]) scores1.append(score) print( " The accuracy of Cross validation Perceptron : " + str(sum(scores1) / len(scores1))) # Computing The CV Accuracy print("The Running Time To compute the CV Accuracy :") print("--- %s seconds ---" % (time.time() - start_time)) print( "-----------------------------------------------------------------------------------" ) elif ClassiferName == "SVM": # Running the Support Vector Machine (SVM) Classifier and computing the Test accuracy and CV accuracy print( "--------------------------------SVM---------------------------------------------------" ) start_time = time.time() C_SVM = input("Please Inuput the value of C : \n") pip_SV = make_pipeline( StandardScaler(), PCA(n_components=2), LinearSVC(random_state=0, tol=1e-5, C=float(C_SVM))) pip_SV.fit(X, y) y_pred = pip_SV.predict(Xtest) print("Test Accuracy: ", metrics.accuracy_score( ytest, y_pred)) # Computing The test accuracy print("The Running Time To compute the Test Accuracy :") print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() K3 = StratifiedKFold(n_splits=10, random_state=1, shuffle=True).split(T, P) scores2 = [] for K, (train_index, test_index) in enumerate(K3): pip_SV.fit(T[train_index], P[train_index]) score = pip_SV.score(T[test_index], P[test_index]) scores2.append(score) print( " The accuracy of Cross validation SVM : " + str(sum(scores2) / len(scores2))) # Computing The CV accuracy print("The Running Time To compute the CV Accuracy :") print("--- %s seconds ---" % (time.time() - start_time)) scores = [] elif ClassiferName == "DecisionTree": # Running the Decision Tree algorithm (DT) Classifier and computing the Test accuracy and CV accuracy print( "--------------------------------DT---------------------------------------------------" ) start_time = time.time() Max_depth = input("Please Insert the Depth of the Tree : \n") pip_DT = make_pipeline( StandardScaler(), PCA(n_components=2), DecisionTreeClassifier(random_state=0, max_depth=int(Max_depth))) pip_DT.fit(X, y) y_pred = pip_DT.predict(Xtest) print("Test Accuracy : ", metrics.accuracy_score( ytest, y_pred)) # Computing The test accuracy print("The Running Time To compute the Test Accuracy :") print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() K0 = StratifiedKFold(n_splits=10, random_state=1, shuffle=True).split(T, P) scores3 = [] for K, (train_index, test_index) in enumerate(K0): pip_DT.fit(T[train_index], P[train_index]) score = pip_DT.score(T[test_index], P[test_index]) scores3.append(score) print( " The accuracy of Cross validation DT : " + str(sum(scores3) / len(scores3))) # Computing The CV accuracy print("The Running Time To compute the CV Accuracy :") print("--- %s seconds ---" % (time.time() - start_time)) elif ClassiferName == "KNN": # Running the k-nearest neighbors algorithm (k-NN) Classifier and computing the Test accuracy and CV accuracy print( "--------------------------------KNN---------------------------------------------------" ) start_time = time.time() N_of_neg = input("Please insert the number of neigbours : \n") pip_KNN = make_pipeline( StandardScaler(), PCA(n_components=2), KNeighborsClassifier(n_neighbors=int(N_of_neg))) pip_KNN.fit(X, y) y_pred = pip_KNN.predict(Xtest) print("Test Accuracy: ", metrics.accuracy_score( ytest, y_pred)) # Computing the Test accuracy print("The Running Time To compute the Test Accuracy :") print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() K1 = StratifiedKFold(n_splits=10, random_state=1, shuffle=True).split(T, P) scores4 = [] for K, (train_index, test_index) in enumerate(K1): pip_KNN.fit(T[train_index], P[train_index]) score = pip_KNN.score(T[test_index], P[test_index]) scores4.append(score) print(" The accuracy of Cross validation KNN : " + str(sum(scores4) / len(scores4))) #Computing the CV Accuracy print("--- %s seconds ---" % (time.time() - start_time)) elif ClassiferName == "LG": #Running the Logestic Regression Classifier and computing the Test accuracy and CV accuracy print( "--------------------------------LG---------------------------------------------------" ) start_time = time.time() C_LG = input("Please Insert C value :\n") pip_LG = make_pipeline( StandardScaler(), PCA(n_components=2), LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=float(C_LG), fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, max_iter=500, solver='liblinear')) pip_LG.fit(X, y) y_pred = pip_LG.predict(Xtest) print("Test Accuracy :", metrics.accuracy_score(ytest, y_pred)) #Comuting the test accuracy print("The Running Time To compute the Test Accuracy :") print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() kflod = StratifiedKFold(n_splits=10, random_state=1, shuffle=True).split(T, P) scores5 = [] for K, (train_index, test_index) in enumerate(kflod): pip_LG.fit(T[train_index], P[train_index]) score = pip_LG.score(T[test_index], P[test_index]) scores5.append(score) print(" The accuracy of Cross validation LG : " + str(sum(scores5) / len(scores5))) #Computing The CV Accuracy print("--- %s seconds ---" % (time.time() - start_time)) print( "----------------------------------------------------------------------" )
X = np.array(cancer.ix[:, 0:28].values) #ouoput label y = np.array(cancer.ix[:, 29].values) #creating empty dictionary to append all classifiers dictOfClassifiers = {} knn = KNeighborsClassifier(n_neighbors=7, weights='uniform') dictOfClassifiers.update({knn: "K nearest neighbour"}) svm = SVC(kernel="rbf", C=1) dictOfClassifiers.update({svm: "SVM"}) lr = LogisticRegression(C=1, max_iter=100) dictOfClassifiers.update({lr: "Logistic Regression"}) dt = DecisionTreeClassifier(criterion='entropy', max_depth=4) dictOfClassifiers.update({dt: "Decision Tree"}) nb = GaussianNB() dictOfClassifiers.update({nb: "Naive Bayes"}) p = Perceptron(eta0=1, alpha=0.0001) dictOfClassifiers.update({p: "Perceptron"}) ann = MLPClassifier(hidden_layer_sizes=(3, 5), activation='relu', alpha=1, max_iter=2000, learning_rate='adaptive') dictOfClassifiers.update({ann: "Neural Network"}) dnn = MLPClassifier(hidden_layer_sizes=(10, 10), activation='relu', alpha=1, max_iter=2000, learning_rate='adaptive') dictOfClassifiers.update({dnn: "Deep Learning"}) rfc = RandomForestClassifier(max_depth=14, n_estimators=19,
# In[ ]: # Gaussian Naive Bayes gaussian = GaussianNB() gaussian.fit(X_train, Y_train) Y_pred = gaussian.predict(X_test) acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2) acc_gaussian # In[ ]: # Perceptron perceptron = Perceptron() perceptron.fit(X_train, Y_train) Y_pred = perceptron.predict(X_test) acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2) acc_perceptron # In[ ]: # Linear SVC linear_svc = LinearSVC() linear_svc.fit(X_train, Y_train) Y_pred = linear_svc.predict(X_test) acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2) acc_linear_svc
test_stats = {'n_test': 0, 'n_test_pos': 0} tick = time.time() parsing_time = time.time() - tick tick = time.time() # 数据集文本向量化 (哈希技巧) ------------------------------------------------------- # vectorizer = HashingVectorizer(decode_error='ignore', n_features=2 ** 18, # alternate_sign=False) # X_test = vectorizer.transform(xtest) # 这里有一些支持`partial_fit`方法的分类器 # 新创建分类器容器 partial_fit_classifiers = { 'SGD': SGDClassifier(), 'Perceptron': Perceptron(), 'NB Multinomial': MultinomialNB(alpha=0.01), 'Passive-Aggressive': PassiveAggressiveClassifier(), } # 载入旧的分类器容器 # end 数据集文本向量化 (哈希技巧) ------------------------------------------------------- vectorizing_time = time.time() - tick test_stats['n_test'] += len(ytest) test_stats['n_test_pos'] += sum(ytest) def progress(cls_name, stats): """Report progress information, return a string.报告进度信息,返回一个字符串。""" duration = time.time() - stats['t0']
micro avg 0.84 0.84 0.84 16262 macro avg 0.77 0.81 0.79 16262 weighted avg 0.85 0.84 0.84 16262 """ from sklearn import metrics fpr, tpr, thresholds = metrics.roc_curve(y_test, predicted, pos_label=1) print("AUC :", metrics.auc(fpr, tpr)) """ AUC : 0.803154835157 """ from sklearn.linear_model import Perceptron classifier5 = Perceptron(eta0=0.5, max_iter=75) mlp = classifier5.fit(x_train, y_train) print("Training accuracy: ", mlp.score(x_train, y_train)) predicted = mlp.predict(x_test) print("Testing Accuracy score: ", accuracy_score(y_test, predicted)) print("Confusion Matrix: ") print(confusion_matrix(y_test, predicted)) print("\nClassification Report: ") print(classification_report(y_test, predicted)) """ Training accuracy: 0.5 Testing Accuracy score: 0.23637928914 Confusion Matrix:
y = y.reshape(-1, 1) for idx, row in enumerate(tokens): for v in row: if v in feature_dict: X[idx, feature_dict[v]] += 1 return X, y if __name__ == '__main__': train_labels, train_tokens, train_vocab = convert_data(train_text) index_dir = {key: value for (value, key) in enumerate(train_vocab)} inversed_index_dir = {value: key for (key, value) in index_dir.items()} train_X, train_y = lists_to_arrays(train_labels, train_tokens, index_dir) # train perceptron perceptron = Perceptron(tol=1e-3, random_state=42) perceptron.fit(train_X, train_y) #print(perceptron.score(train_X,train_y)) # train logistic regression lr = LogisticRegression(random_state=42, tol=1e-4) lr.fit(train_X, train_y) #print(lr.score(train_X, train_y)) print('Top words for LR:') for coef in lr.coef_: print([inversed_index_dir[x] for x in np.argsort(coef)[::-1][:10]]) print("Top words for Perceptron:") for coef in perceptron.coef_: print([inversed_index_dir[x] for x in np.argsort(coef)[::-1][:10]])
metrics.classification_report(y_test, pred, target_names=target_names)) if opts.print_cm: print("confusion matrix:") print(metrics.confusion_matrix(y_test, pred)) print() clf_descr = str(clf).split('(')[0] return clf_descr, score, train_time, test_time results = [] for clf, name in ((RidgeClassifier(tol=1e-2, solver="lsqr"), "Ridge Classifier"), (Perceptron(n_iter=50), "Perceptron"), (PassiveAggressiveClassifier(n_iter=50), "Passive-Aggressive"), (KNeighborsClassifier(n_neighbors=10), "kNN"), (RandomForestClassifier(n_estimators=100), "Random forest")): print('=' * 80) print(name) results.append(benchmark(clf)) for penalty in ["l2", "l1"]: print('=' * 80) print("%s penalty" % penalty.upper()) # Train Liblinear model results.append(benchmark(LinearSVC(penalty=penalty, dual=False, tol=1e-3))) # Train SGD model
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) X_test, y_test = X[test_id, :], y[test_id] for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl) if test_id: X_test, y_test = X[test_id, :], y[test_id] plt.scatter(X_test[:,0],X_test[:, 1], c='', s=55, alpha=0.1, linewidths=1, marker='o', label='test set') if __name__ == '__main__': iris = datasets.load_iris() iris_X = iris.data[:, [2, 3]]#取数据的2,3列特征 iris_y = iris.target X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.3, random_state=0)#划分数据集,分为训练集和测试集 sc = StandardScaler()#特征缩放 sc.fit(X_train)#训练得每个特征得样本均值和标准差 X_train_sc = sc.transform(X_train)#对训练数据做标准化处理,即特征缩放 X_test_sc = sc.transform(X_test)#测试集与训练集同步 ppn = Perceptron(n_iter=50, eta0=0.1, random_state=0)#感知器模型,迭代次数50,学习速率为0.1每次迭代初始化重排训练集 ppn.fit(X_train_sc, y_train)#训练 y_yred = ppn.predict(X_test_sc)#用测试集进行预测 X_combined_std = np.vstack((X_train_sc, X_test_sc)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_id=range(105, 150)) plt.show()
def run(self): names = ['DNA_TO_CLASSIFY', 'attribute', 'sequence'] data = pd.read_csv(self.file_name, names = names) print('Build our dataset using custom pandas dataframe') clases = data.loc[:,'DNA_TO_CLASSIFY'] sequence = list(data.loc[:, 'sequence']) dic = {} for i, seq in enumerate(sequence): nucleotides = list(seq) nucleotides = [char for char in nucleotides if char != '\t'] nucleotides.append(clases[i]) dic[i] = nucleotides print('Convert Dict object into dataframe') df = pd.DataFrame(dic) print('transpose dataframe into correct format') df = df.transpose() df.rename(columns = {XXX__Length of sample__XX:'XXXXX__Sample DNA Name___XXXXX'}, inplace = True) print('Encoding') numerical_df = pd.get_dummies(df) numerical_df.drop('helitron_not-helitron', axis = 1, inplace = True) print(numerical_df) numerical_df.rename(columns = {'XXX__Fill_XXXX':'XXX__Fill__XXX'}, inplace = True) #Importing different classifier from sklearn from sklearn.naive_bayes import MultinomialNB from sklearn.naive_bayes import BernoulliNB from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.linear_model import PassiveAggressiveClassifier from sklearn.metrics import classification_report, accuracy_score from sklearn.model_selection import train_test_split X = numerical_df.drop(['heli'], axis = 1).values y = numerical_df['heli'].values #define a seed for reproducibility seed = 1 print('Splitting data into training and testing data') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = seed) print(X_test[0]) # Define scoring method scoring = 'accuracy' print('Model building to train') names = ['MultinomialNB', 'BernoulliNB', 'Perceptron', 'SGDClassifier', 'PassiveAggressiveClassifier'] Classifiers = [ MultinomialNB(), BernoulliNB(), Perceptron(), SGDClassifier(), PassiveAggressiveClassifier(), ] models = zip(names, Classifiers) from sklearn.model_selection import KFold, cross_val_score names = [] result = [] for name, model in models: kfold = KFold(n_splits = 5, random_state = 1) cv_results = cross_val_score(model, X_train, y_train, cv = kfold, scoring = 'accuracy', verbose=2, n_jobs=-1) result.append(cv_results) names.append(name) msg = "{0}: {1} ({2})".format(name, cv_results.mean(), cv_results.std()) print(msg) models = zip(names, Classifiers) for name, model in models: print("Training with: "+name) model.partial_fit(X_train, y_train, classes=np.unique(y_train)) y_pred = model.predict(X_test) print('Exporting') joblib.dump(model, "Models/"+name + ".pkl") print(name) print(accuracy_score(y_test, y_pred)) print(classification_report(y_test, y_pred))
score.append(lr.score(x_test,y_test)) print(' Logistic Regression ') print(cross_val_score(lr,x_train,y_train,cv=3)) print(f1(y_test,y_lr,average='micro')) #LDA lda = LinearDiscriminantAnalysis(solver='svd') lda.fit(x_train,y_train) y_lda = lda.predict(x_test) score.append(lda.score(x_test,y_test)) print(' Linear Discriminant Analysis ') print(cross_val_score(lda,x_train,y_train,cv=3)) print(f1(y_test,y_lda,average='micro')) #Perceptron per = Perceptron(penalty='l1',alpha=0.001) per.fit(x_train,y_train) y_per = per.predict(x_test) score.append(per.score(x_test,y_test)) print(' Perceptron Classifier ') print(cross_val_score(per,x_train,y_train,cv=3)) print(f1(y_test,y_per,average='micro')) #SVM svm = SVC(C=100,kernel='poly',degree=4,coef0 = 0) svm.fit(x_train,y_train) y_svm = svm.predict(x_test) score.append(svm.score(x_test,y_test)) print(' Support Vector Machine ') print(f1(y_test,y_svm,average='micro')) print(cross_val_score(svm,x_train,y_train,cv=3))
''' Multi Layer Perceptron 단층 Perceptron은 XOR 분류를 하지 못함. 하지만 층(Layer)를 늘리면 가능. 이를 MLP라고 함 ''' import numpy as np from sklearn.linear_model import Perceptron feature = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) label = np.array([0, 1, 1, 0]) ml = Perceptron(max_iter=1000).fit(feature, label) print(ml.predict(feature)) print() # 다층 신경망 사용 from sklearn.neural_network import MLPClassifier # ml2 = MLPClassifier(hidden_layer_sizes=50, verbose=2).fit(feature, label) #verbose 진행과정 프린트(Iteration 반복횟수, loss = 실제값과 예측값 차이), ml2 = MLPClassifier( hidden_layer_sizes=(10, 10, 10), learning_rate_init=0.01, max_iter=100, random_state=1, verbose=1).fit( feature, label) # hidden_layer_sizes=(10, 10, 10) 노드가 10개인 히든레이어를 3개를 준 것 print(ml2.predict(feature))
from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40], [190, 90, 47], [175, 64, 39], [177, 70, 40], [159, 55, 37], [171, 75, 42], [181, 85, 43]] Y = [ 'male', 'male', 'female', 'female', 'male', 'male', 'female', 'female', 'female', 'male', 'male' ] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4) clf_tree = tree.DecisionTreeClassifier() clf_linear = Perceptron() clf_log = LogisticRegression() clf_knn = KNeighborsClassifier(n_neighbors=2) clf_svm = SVC() clf_tree = clf_tree.fit(X_train, y_train) clf_linear = clf_linear.fit(X_train, y_train) clf_log = clf_log.fit(X_train, y_train) clf_knn = clf_knn.fit(X_train, y_train) clf_svm = clf_svm.fit(X_train, y_train) y_pred1 = clf_tree.predict(X_test) y_pred2 = clf_linear.predict(X_test) y_pred3 = clf_log.predict(X_test) y_pred4 = clf_knn.predict(X_test) y_pred5 = clf_svm.predict(X_test)
#New Code For PAN pan_train='/home/namrita/Downloads/AIdata/pan13-author-profiling-training-corpus-2013-01-09/en' pan_temp='/home/namrita/Downloads/AIdata/pantemp' # pan_test='/home/namrita/Downloads/AIdata/pan13-author-profiling-test-corpus2-2013-04-29/en' print ('Reading database ...') k,g,y_train = read_pan(pan_train) # print (y_train) # k_t, test_y, a_t = read_pan(pan_test) print('Extracting asset') train_x,f_names,chi,transformer=feature_extraction2(givenlabel,k) # print ('Writing database ...') # writeTrainToTxt(train_x,'feature.txt') # # writeTrainToTxt(y_train,'y.txt') # print ('written') train_X, test_X, train_y, test_y = train_test_split(train_x, y_train, train_size=0.80) print ('Mission successful') # print(len(X_train)) # test_x,_,_,_=feature_extraction2(givenlabel,k_t) # train_x=train_x.toarray().astype(np.float) # print (train_x.dtype) # train_x,f_names,chi,transformer=feature_extraction(givenlabel,k) # print(simple_classify(RandomForestClassifier(),test_X,test_y,train_X,train_y)) print(simple_classify(RidgeClassifier(),test_X,test_y,train_X,train_y)) print(simple_classify(Perceptron(),test_X,test_y,train_X,train_y)) print(simple_classify(PassiveAggressiveClassifier(),test_X,test_y,train_X,train_y)) print(simple_classify(RandomForestClassifier(),test_X,test_y,train_X,train_y)) print(simple_classify(KNeighborsClassifier(),test_X,test_y,train_X,train_y)) print(simple_classify(MultinomialNB(),test_X,test_y,train_X,k))
def get_model_from_name(model_name, training_params=None): # For Keras epochs = 250 if 'is_test_suite' in sys.argv: print( 'Heard that this is the test suite. Limiting epochs to 10, which will increase training speed dramatically at the expense of model accuracy' ) epochs = 10 all_model_params = { 'LogisticRegression': { 'n_jobs': -2 }, 'RandomForestClassifier': { 'n_jobs': -2 }, 'ExtraTreesClassifier': { 'n_jobs': -1 }, 'AdaBoostClassifier': { 'n_estimators': 10 }, 'SGDClassifier': { 'n_jobs': -1 }, 'Perceptron': { 'n_jobs': -1 }, 'LinearRegression': { 'n_jobs': -2 }, 'RandomForestRegressor': { 'n_jobs': -2 }, 'ExtraTreesRegressor': { 'n_jobs': -1 }, 'MiniBatchKMeans': { 'n_clusters': 8 }, 'GradientBoostingRegressor': { 'presort': False }, 'SGDRegressor': { 'shuffle': False }, 'PassiveAggressiveRegressor': { 'shuffle': False }, 'AdaBoostRegressor': { 'n_estimators': 10 }, 'XGBRegressor': { 'nthread': -1, 'n_estimators': 200 }, 'XGBClassifier': { 'nthread': -1, 'n_estimators': 200 }, 'LGBMRegressor': {}, 'LGBMClassifier': {}, 'DeepLearningRegressor': { 'epochs': epochs, 'batch_size': 50, 'verbose': 2 }, 'DeepLearningClassifier': { 'epochs': epochs, 'batch_size': 50, 'verbose': 2 } } model_params = all_model_params.get(model_name, None) if model_params is None: model_params = {} if training_params is not None: print('Now using the model training_params that you passed in:') print(training_params) # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it) model_params.update(training_params) print( 'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:' ) print(model_params) model_map = { # Classifiers 'LogisticRegression': LogisticRegression(), 'RandomForestClassifier': RandomForestClassifier(), 'RidgeClassifier': RidgeClassifier(), 'GradientBoostingClassifier': GradientBoostingClassifier(), 'ExtraTreesClassifier': ExtraTreesClassifier(), 'AdaBoostClassifier': AdaBoostClassifier(), 'SGDClassifier': SGDClassifier(), 'Perceptron': Perceptron(), 'PassiveAggressiveClassifier': PassiveAggressiveClassifier(), # Regressors # 'DeepLearningRegressor': KerasRegressor(build_fn=make_deep_learning_model, epochs=10, batch_size=10, **training_params, verbose=1), 'LinearRegression': LinearRegression(), 'RandomForestRegressor': RandomForestRegressor(), 'Ridge': Ridge(), 'ExtraTreesRegressor': ExtraTreesRegressor(), 'AdaBoostRegressor': AdaBoostRegressor(), 'RANSACRegressor': RANSACRegressor(), 'GradientBoostingRegressor': GradientBoostingRegressor(), 'Lasso': Lasso(), 'ElasticNet': ElasticNet(), 'LassoLars': LassoLars(), 'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(), 'BayesianRidge': BayesianRidge(), 'ARDRegression': ARDRegression(), 'SGDRegressor': SGDRegressor(), 'PassiveAggressiveRegressor': PassiveAggressiveRegressor(), # Clustering 'MiniBatchKMeans': MiniBatchKMeans() } if xgb_installed: model_map['XGBClassifier'] = xgb.XGBClassifier() model_map['XGBRegressor'] = xgb.XGBRegressor() if lgb_installed: model_map['LGBMRegressor'] = lgb.LGBMRegressor() model_map['LGBMClassifier'] = lgb.LGBMClassifier() if keras_installed: model_map['DeepLearningClassifier'] = KerasClassifier( build_fn=make_deep_learning_classifier) model_map['DeepLearningRegressor'] = KerasRegressor( build_fn=make_deep_learning_model) model_without_params = model_map[model_name] model_with_params = model_without_params.set_params(**model_params) return model_with_params
y= iris.target #spliting the dataset from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) #Feature scaling: standarization from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(X_train) #estimates the parameters: mean and standard deviation for EACH FEATURE DIMENSION from the training data X_train_std=sc.transform(X_train) X_test_std=sc.transform(X_test) #Perceptron from sklearn.linear_model import Perceptron ppn = Perceptron(n_iter = 40, eta0 = 0.1, random_state = 0) #random_state=0 gives the system... #... reproducibility of the initial shuffling of the training dataset after each epoch ppn.fit(X_train_std, y_train) #initializing the model y_pred = ppn.predict(X_test_std) n_y_pred = y_pred.size print(n_y_pred) n_misclassification = (y_test != y_pred).sum() print('Misclassified samples: %d' % n_misclassification) #from the previous, the missclassification error can be calculated as: misclassification_error = n_misclassification / n_y_pred print('Misclassificication error: %.2f' % misclassification_error) #Therefore the accuracy in two forms: