def TrainPerceptronRandomWeight(train_data, test_data, train_label, test_label, numFeatures, itr): weight = [] intercept = [] train_accuracy = np.empty([100]) for i in range(0, 100): net = perceptron.Perceptron(max_iter=itr, shuffle=True) net.fit(train_data[:, :numFeatures], train_label, coef_init=np.random.rand(3, numFeatures), intercept_init=np.random.rand(3)) train_accuracy[i] = net.score(train_data[:, :numFeatures], train_label) weight.append(net.coef_) intercept.append(net.intercept_) train_accuracy[i] = net.score(train_data[:, :numFeatures], train_label) # Print the results max_accuracy = np.argmax(train_accuracy) net1 = perceptron.Perceptron(max_iter=itr, shuffle=True) net1.fit(train_data[:, :numFeatures], train_label, coef_init=weight[max_accuracy], intercept_init=intercept[max_accuracy]) print('Using', numFeatures, 'features:') print("Maximum Training Accuracy: " + str(train_accuracy[max_accuracy] * 100) + " %") print("Corresponding Testing Accuracy: " + str(net1.score(test_data[:, :numFeatures], test_label) * 100) + " %") print('\nWeight vectors: ') print(net.coef_) print('\nIntercept vector: ') print(net.intercept_, '\n')
def getModels(x_train, x_test, y_train, y_test): clf1 = tree.DecisionTreeClassifier() clf1 = clf1.fit(x_train, y_train) clf2 = neighbors.KNeighborsClassifier(10, 'uniform') clf2 = clf2.fit(x_train, y_train) clf3 = linear_model.LogisticRegression(penalty='l1', verbose=0, random_state=None, fit_intercept=True) clf3 = clf3.fit(x_train, y_train) clf4 = GaussianNB() clf4 = clf4.fit(x_train, y_train) clf5 = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0) clf5 = clf5.fit(x_train, y_train) clf6 = perceptron.Perceptron(penalty='l1', n_iter=50, verbose=0, random_state=None, fit_intercept=True, eta0=0.02) clf6 = clf6.fit(x_train, y_train) clf7 = RandomForestClassifier(n_estimators=10) clf7 = clf7.fit(x_train, y_train) clf8 = svm.SVC() clf8 = clf8.fit(x_train, y_train) clf9 = GradientBoostingClassifier() clf9 = clf9.fit(x_train, y_train) return [clf1, clf2, clf3, clf4, clf5, clf6, clf7, clf8, clf9]
def GenPerceptronLayer(vals): nets = {} trainingData = {} trainingValues = {} for i in range(0, 37): indx = i + 1 data = [0] * 12 for j in range(0, 11): data[j] = float(vals[indx][j]) for j in [13, 15, 17]: if vals[indx][j] not in trainingData: trainingData[vals[indx][j]] = [] trainingValues[vals[indx][j]] = [] trainingData[vals[indx][j]].append(data) if vals[indx][j + 1] is "1": trainingValues[vals[indx][j]].append(1) else: trainingValues[vals[indx][j]].append(0) for string in trainingData: if 1 not in trainingValues[string] or 0 not in trainingValues[string]: print "-E- Only one label exists for " + string + ": " + str( trainingValues[string]) continue nets[str(string)] = perceptron.Perceptron(max_iter=100, tol=None, verbose=0, random_state=None, fit_intercept=True, eta0=0.002) nets[str(string)].fit(trainingData[string], trainingValues[string]) if debug is True: print string + ": " + str(nets[string].score( trainingData[string], trainingValues[string]) * 100) + "% testing accuracy" return nets
def main(): dataTest = pd.read_csv('HIGGS/HIGGS_1.csv', header=None) dataTestX = dataTest.values dataTestY = dataTestX[:, 0] #dataTestY = np.reshape(dataTestY, (dataTestY.shape[0], -1)) dataTestX = np.delete(dataTestX, 0, 1) dataChunksTrain = pd.read_csv('HIGGS/HIGGS_0.csv', header=None, chunksize=2000000) svmPredict = svm.SVC(gamma=0.001, C=100) # Use cross validation to find gamma. # eta 0.1 gave best accuracy model = perceptron.Perceptron(n_iter=100, verbose=0, random_state=None, fit_intercept=True, eta0=0.01, warm_start=True) mean = np.array([ 0.9914658435843994, -8.2976178820622e-06, -1.3272252572679215e-05, 0.9985363574312471, 2.6134592495411797e-05, 0.9909152318068567, -2.0275203997251415e-05, 7.71619920710906e-06, 0.9999687478206591, 0.9927294304430038, -1.0264440172703127e-05, -2.0768873493851226e-05, 1.0000080177052564, 0.9922590513707101, 1.459561349773536e-05, 3.678631990462732e-06, 1.0000114192497513, 0.9861086617144861, -5.756954065664269e-06, 1.7449033596108414e-05, 1.0000001559677123, 1.0342903040056053, 1.0248048350282475, 1.0505538681766282, 1.009741840750048, 0.972959616608593, 1.033035574431563, 0.9598119879373501 ]) stdDev = np.array([ 0.5653776754096951, 1.0088264812855468, 1.006346283885119, 0.6000184644551814, 1.0063261640156402, 0.47497472589232176, 1.009302952852424, 1.0059010877868422, 1.0278075278204606, 0.49999384024846355, 1.0093304676767396, 1.0061543903728194, 1.049397999042849, 0.4876623258003873, 1.0087467092311453, 1.0063049450318349, 1.193675521568018, 0.5057776635500334, 1.0076942258109045, 1.0063655876039794, 1.4002093224446897, 0.6746353374867367, 0.38080739505009764, 0.16457624382242395, 0.39744529874617945, 0.5254062490071941, 0.3652556048435137, 0.3133377767062806 ]) dataTestX = dataTestX - mean dataTestX = dataTestX / stdDev counter = 0 for chunk in dataChunksTrain: counter += 1 chunkX = chunk.values chunkY = chunkX[:, 0] chunkX = np.delete(chunkX, 0, 1) chunkX = chunkX - mean chunkX = chunkX / stdDev trainKerasNetwork(chunkX, chunkY.copy(), dataTestX, dataTestY.copy()) if counter == 1: break
def adaboost_avg_run_new(max_classes, avg_num_of_run, training_set, testing_set): all_error_list = [] # because datasets sometimes place the class attribute at the end or even # at the beginning or the middle, we'll separate the attribute vector from # the class-label. also note that this is the way scikit-learn does it. # train_x: the attribute vector; train_y: the class_label (train_x, train_y) = split_attribute_and_label(training_set) (test_x, test_y) = split_attribute_and_label(testing_set) # print(len(train_x)) train_subset_num = int(len(train_y) * 0.2) our_ada_training_errors = {} our_ada_testing_errors = {} # init dict of num classifier to error list for i in range(1, max_classes + 1): our_ada_training_errors[i] = [] our_ada_testing_errors[i] = [] # run ada num_runs times for i in range(avg_num_of_run): ada_obj = AdaBoost(max_classes, train_subset_num, THRESHOLD, ETA, UPPER_BOUND, ETA_WEIGHTS, False) ada_obj.fit_with_errors(train_x, train_y, test_x, test_y) for j in range(max_classes): our_ada_training_errors[j + 1].append(ada_obj.training_error[j]) our_ada_testing_errors[j + 1].append(ada_obj.testing_error[j]) for cl in range(1, max_classes + 1): scikit_error = [] for i in range(avg_num_of_run): pada = perceptron.Perceptron(max_iter=UPPER_BOUND, verbose=0, random_state=None, fit_intercept=True, eta0=ETA) bdt = AdaBoostClassifier(pada, algorithm="SAMME", n_estimators=cl) bdt.fit(train_x, train_y) result_list = bdt.predict(test_x) scikit_error.append(calculate_error(test_y, result_list)) errors = ErrorWrapper( cl, sum(our_ada_training_errors[cl]) / len(our_ada_training_errors[cl]), sum(our_ada_testing_errors[cl]) / len(our_ada_testing_errors[cl]), sum(scikit_error) / len(scikit_error)) all_error_list.append(errors) print("Train avg for %s %s" % (cl, errors.train_error)) print("Testing avg for %s %s" % (cl, errors.test_error)) print("Scikit adaboost avg for %s %s" % (cl, errors.scikit_error)) return all_error_list
def adaboost_avg_run(max_classes, avg_num_of_run, training_set, testing_set): testing_error_list = [] all_error_list = [] # because datasets sometimes place the class attribute at the end or even # at the beginning or the middle, we'll separate the attribute vector from # the class-label. also note that this is the way scikit-learn does it. # train_x: the attribute vector; train_y: the class_label (train_x, train_y) = split_attribute_and_label(training_set) (test_x, test_y) = split_attribute_and_label(testing_set) # print(len(train_x)) train_subset_num = int(len(train_y) * 0.2) for cl in range(1, max_classes + 1, 2): train_error = [] testing_error = [] scikit_error = [] for i in range(avg_num_of_run): ada_obj = AdaBoost(cl, train_subset_num, THRESHOLD, ETA, UPPER_BOUND, ETA_WEIGHTS, False) ada_obj.fit(train_x, train_y) hypothesis_list = ada_obj.predict(train_x) mistakes = ada_obj.xor_tuples(train_y, hypothesis_list) error_rate_train = classifier_error_rate(mistakes) hypothesis_list = ada_obj.predict(test_x) mistakes = ada_obj.xor_tuples(test_y, hypothesis_list) error_rate_test = classifier_error_rate(mistakes) train_error.append(error_rate_train) testing_error.append(error_rate_test) pada = perceptron.Perceptron(max_iter=UPPER_BOUND, verbose=0, random_state=None, fit_intercept=True, eta0=ETA) bdt = AdaBoostClassifier(pada, algorithm="SAMME", n_estimators=cl) bdt.fit(train_x, train_y) result_list = bdt.predict(test_x) scikit_error.append(calculate_error(test_y, result_list)) errors = ErrorWrapper(cl, sum(train_error) / len(train_error), sum(testing_error) / len(testing_error), sum(scikit_error) / len(scikit_error)) all_error_list.append(errors) print("Train avg for %s %s" % (cl, errors.train_error)) print("Testing avg for %s %s" % (cl, errors.test_error)) testing_error_list.append( (sum(testing_error) / len(testing_error)) * 100) print("Scikit adaboost avg for %s %s" % (cl, errors.scikit_error)) #return testing_error_list return all_error_list
def perceptron_train(trainData, feaSt=0, feaEnd=-1): clf = perceptron.Perceptron(n_iter=15, shuffle=False, verbose=0, random_state=None, fit_intercept=True) clf.fit(trainData[:, feaSt:feaEnd], trainData[:, -1]) return clf
def run_perceptron(x_train, x_test, y_train, y_test): clf = perceptron.Perceptron(penalty='l1', n_iter=50, verbose=0, random_state=None, fit_intercept=True, eta0=0.02) clf.fit(x_train, y_train) scores = cross_val_score(clf, x_test, y_test, cv=5) print("perceptron: %.15f" % scores.mean())
def __init__(self, training_data, test_data, num_classifiers, combo_method, classifier_layout, single_classifier): self.train = training_data self.test = test_data self.true_test, self.testing_labels = self.format_test_data() self.classifier_list = [] if classifier_layout == "random": for x in range(num_classifiers): self.classifier_list.append(self.get_random_classifier()) elif classifier_layout == "uniform": for x in range(num_classifiers): if x < num_classifiers/4: # 4 is the number of classifier types self.classifier_list.append(tree.DecisionTreeClassifier()) elif x >= num_classifiers/4 and x < 2 * (num_classifiers/4): # 4 is the number of classifier types self.classifier_list.append(svm.SVC()) elif x >= 2 * (num_classifiers/4) and x < 3 * (num_classifiers/4): # 4 is the number of classifier types self.classifier_list.append(GaussianNB()) else: self.classifier_list.append(perceptron.Perceptron()) elif classifier_layout == "single": for x in range(num_classifiers): if single_classifier == "perceptron": self.classifier_list.append(perceptron.Perceptron()) elif single_classifier == "svm": self.classifier_list.append(svm.SVC()) elif single_classifier == "gaussian": self.classifier_list.append(GaussianNB()) elif single_classifier == "decisiontree": self.classifier_list.append(tree.DecisionTreeClassifier()) else: print("NO SUCH CLASSIFIER") self.combination = combo_method self.combiner = None if combo_method == Combiner.NEURAL_NET: self.combiner = MLPClassifier() #self.combiner = MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08) if combo_method == Combiner.DECISION_TREE: self.combiner = tree.DecisionTreeClassifier() if combo_method == Combiner.SVM: self.combiner = svm.SVC()
def get_random_classifier(self): TYPES_OF_CLASSIFIERS = 3 classifier_type = random.randint(0, TYPES_OF_CLASSIFIERS - 1) if classifier_type == 0: return tree.DecisionTreeClassifier() if classifier_type == 1: return svm.SVC() if classifier_type == 2: return GaussianNB() if classifier_type == 3: return perceptron.Perceptron()
def main(): keywords = readFile('./Keywords.csv') train_data = readFile('./Datasets/Shuffled/Training.csv') validation_data = readFile('./Datasets/Shuffled/Validation.csv') test_data = readFile('./Datasets/Shuffled/Testing.csv') X_Train = getFeatureVector(train_data[0, 0], keywords) for i in range(1, train_data.shape[0]): X_Train = np.vstack( [X_Train, getFeatureVector(train_data[i, 0], keywords)]) X_Validation = getFeatureVector(validation_data[0, 0], keywords) for i in range(1, validation_data.shape[0]): X_Validation = np.vstack( [X_Validation, getFeatureVector(validation_data[i, 0], keywords)]) X_Test = getFeatureVector(test_data[0, 0], keywords) for i in range(1, test_data.shape[0]): X_Test = np.vstack( [X_Test, getFeatureVector(test_data[i, 0], keywords)]) Y_Train = train_data[:, 1] Y_Validation = validation_data[:, 1] Y_Test = test_data[:, 1] # Create the model model = perceptron.Perceptron(n_iter=490, class_weight="balanced", penalty='l2', alpha=0.0001) model.fit(X_Train, Y_Train) # Predict the result print "\n Training Data:" print "Prediction " + str(model.predict(X_Train)) print "Actual " + str(Y_Train) print "Accuracy " + str(model.score(X_Train, Y_Train) * 100) + "%" print "\n Vaildation Data:" print "Prediction " + str(model.predict(X_Validation)) print "Actual " + str(Y_Validation) print "Accuracy " + str( model.score(X_Validation, Y_Validation) * 100) + "%" print "\n Test Data:" print "Prediction " + str(model.predict(X_Test)) print "Actual " + str(Y_Test) print "Accuracy " + str(model.score(X_Test, Y_Test) * 100) + "%"
def perceptron_avg_run(avg_num_of_run, training_set, testing_set): (train_x, train_y) = split_attribute_and_label(training_set) (test_x, test_y) = split_attribute_and_label(testing_set) ptraining_error = [] perceptron_error = [] for i in range(avg_num_of_run): p = perceptron.Perceptron(max_iter=UPPER_BOUND, verbose=0, random_state=None, fit_intercept=True, eta0=ETA) p.fit(train_x, train_y) t_result_list = p.predict(train_x) ptraining_error.append(calculate_error(train_y, t_result_list)) result_list = p.predict(test_x) perceptron_error.append(calculate_error(test_y, result_list)) return sum(perceptron_error) / len(perceptron_error) , sum(ptraining_error) / len(ptraining_error)
def slp(x_train, x_test, y_train, y_test, dataset): net = p.Perceptron(max_iter=500, verbose=0, random_state=None, fit_intercept=True, eta0=0.002) print(net) model = net.fit(x_train, y_train) y_pred = model.predict(x_test) accuracy = (y_pred == y_test).sum() / len(y_test) print('Accuracy on ', dataset, ' with single layer perceptron: ', accuracy * 100, '%') print('Confusion matrix:\n', confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) return accuracy
def TrainPerceptron(train_data, test_data, train_label, test_label, numFeatures, itr): net = perceptron.Perceptron(max_iter=itr, shuffle=True) net.fit(train_data[:, :numFeatures], train_label) # Print the results print('Using', numFeatures, 'features:') print("Training Accuracy: " + str(net.score(train_data[:, :numFeatures], train_label) * 100) + " %") print("Testing Accuracy: " + str(net.score(test_data[:, :numFeatures], test_label) * 100) + " %") print('\nWeight vectors: ') print(net.coef_) print('\nIntercept vector: ') print(net.intercept_, '\n')
def mainworker(limit1,limit2): N=10 l=[] w1=[] # +1 class w2=[]#-1 class temp=[] classlist=[] countlist=[] f=open("pdata.txt") for line in f: x=(line.strip("\n")).split(",") temp=[] for i in xrange(len(x)): x[i]=int(x[i]) temp.append(x[i]) clas=temp.pop() temp=temp[:limit1]+temp[limit2+1:] l.append(temp) classlist.append(clas) f.close() X=np.array(l) y=np.array(classlist) w=l X=np.array(l) y=np.array(classlist) karray=[2,3,4,5] for k in karray: kf=cross_validation.KFold(11054, n_folds=k) averager=[] for train_index,test_index in kf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] train_data=X_train train_label=y_train test_data=X_test test_label=y_test net = perceptron.Perceptron(n_iter=100, verbose=0, random_state=None, fit_intercept=True, eta0=0.002) net.fit(train_data,train_label) predicted = net.predict(test_data) accuracy = net.score(test_data, test_label)*100 averager.append(accuracy) answer=np.mean(averager) print "The accuracy for",k,"th fold is:",answer, print '\n'
def run(): parser = argparse.ArgumentParser( description="Project 3 Part I: Perceptron") parser.add_argument('input', type=str) parser.add_argument('output', type=str) args = parser.parse_args() x, y = load_csv(args.input) target = open(args.output, 'w') weights, bias = None, None for i in range(1, 99999): net = perceptron.Perceptron(n_iter=i, verbose=0, fit_intercept=True) net.fit(x, y) if np.array_equal(weights, net.coef_[0]) and bias == net.intercept_: break weights = net.coef_[0] bias = net.intercept_ output = np.concatenate([weights.astype(int), bias.astype(int)]) output = ",".join(str(x) for x in output) target.write(output + '\n') target.close() return None
def get_weights(self, S1, S2_dict): """Returns a dictionary R where key is (i, j) and value is the weights [w_ij, theta_ij]""" R = {} for i in range(self.m): for j in range(i + 1, self.m): S_ij = S2_dict[(i, j)] labels = [ -1 if self.oracle.compare(s, S1[i]) == -1 else 1 for s in S_ij ] S_ij = set_to_matrix(S_ij, self.n) net = perceptron.Perceptron(max_iter=100, fit_intercept=True, eta0=0.002) if len(np.unique( labels)) == 2: # fit if having two unique labels net.fit(S_ij, labels) if net.score(S_ij, labels) == 1: # add to R if no training error w = net.coef_[0] theta = net.intercept_[0] R[(i, j)] = np.append(w, theta) return R
def fperceptron(x, y): #perc = perceptron.Perceptron(None,0.00001,True,None,0.01,True,0,1.0,1,0,None,False,None) perc = perceptron.Perceptron() perc.fit(x, y) #print('accuracy:', clf.score(train_data, train_answers)) return perc
ninputs = 2 net = Perceptron(ninputs) net.fit(X, y) w = net.weights[1:] b = net.weights[0] plot_dboundary(w, b) plot_dboundary_contourf(net) ''' H = net.weights_hist niter = len(H) snapshots = [int(t) for t in np.linspace(0, 20, 5)] #snapshots = [15, 20, 25] for t in snapshots: w = H[t][1:] b = H[t][0] plot_dboundary(w, b) plt.title('iter {}'.format(t)) ''' # sklearn version from sklearn.linear_model import perceptron net_sklearn = perceptron.Perceptron() net_sklearn.fit(X, y) w = net_sklearn.coef_[0] offset = net_sklearn.intercept_[0] plot_dboundary(w, b) plot_dboundary_contourf(net)
# Labels t = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # Data colormap = np.array(['r', 'k']) plt.scatter(d[0], d[1], c=colormap[t], s=40) #plt.show() # rotate the data 180 degrees d90 = np.rot90(d) d90 = np.rot90(d90) d90 = np.rot90(d90) # Create the model net = perceptron.Perceptron(n_iter=100, eta0=0.002) net.fit(d90, t) # Print the results print("Prediction " + str(net.predict(d90))) print("Actual " + str(t)) print("Accuracy " + str(net.score(d90, t) * 100) + "%") print(d90) print(t) # Plot the original data plt.scatter(d[0], d[1], c=colormap[t], s=40) # Output the values print("Coefficient 0 " + str(net.coef_[0, 0])) print("Coefficient 1 " + str(net.coef_[0, 1])) print("Bias " + str(net.intercept_))
w = [x + y for x, y in zip(w, w0)] num_mistakes = num_mistakes + 1 else: pass print("Iteration-" + str(index) + " No-of-Mistakes: " + str(num_mistakes)) accuracy = (len(Train_Data) - num_mistakes) / len(Train_Data) print("Iteration-" + str(index) + " Training-Accuracy: " + str(accuracy)) print("**************TESTING DATA********************") num_mistakes = 0 accuracy = 0 for x in range(0, len(Test_Data)): Xt = Test_Feature_Vector[x] Yt = Test_Labels[x] Yt = int(Yt) Yht = sign(dot(w, Xt)) if (Yht != Yt): num_mistakes = num_mistakes + 1 else: pass accuracy = (len(Test_Data) - num_mistakes) / len(Test_Data) print("Testing Mistakes: " + str(num_mistakes)) print("Testing Accuracy: " + str(accuracy)) print("**************REAL RESULTS********************") practice = perceptron.Perceptron(max_iter=20) practice.fit(Feature_Vector, Train_Labels) print("Real Iteration-20 Accuracy:") print(practice.score(Feature_Vector, Train_Labels) * 100)
import numpy as np from sklearn.linear_model import perceptron from sklearn.metrics import confusion_matrix from sklearn.neighbors import KNeighborsClassifier # Load text data form the file train_data = np.loadtxt("vertigo_train.txt", dtype=int) test_data = np.loadtxt("vertigo_predict.txt", dtype=int) true_class = np.loadtxt("vertigo_answers.txt", dtype=int) train_class = train_data[:, 0] train_data = np.delete(train_data, 1, 1) p = perceptron.Perceptron() perceptron_trained = p.fit(train_data, train_class) test_result_perceptron = perceptron_trained.predict(test_data) # confusion matrix cm = confusion_matrix(true_class, test_result_perceptron) accuracy_perceptron = sum(np.diag(cm)) / len(true_class) * 100 neigh = KNeighborsClassifier(metric="manhattan") nn_fitted = neigh.fit(train_data, train_class) nn_predict = nn_fitted.predict(test_data) confu_mat_nn = confusion_matrix(true_class, nn_predict) ## calculating accuracy accuracy_nn = sum(np.diag(confu_mat_nn)) / len(true_class) * 100 print("Perceptron: {0:.2f}% correct".format(accuracy_perceptron))
y_test = y_test.replace('ALL', 0) y_test = y_test.replace('AML', 1) x_train = x_train.iloc[0:-1, 1:-1] y_train = y_train.iloc[0:-1, 1] x_test = x_test.iloc[0:-1, 1:-1] y_test = y_test.iloc[0:-1, 1] #data scaling sc = StandardScaler() sc.fit(x_train) sc.fit(x_test) x_train_scale = sc.transform(x_train) x_test_scale = sc.transform(x_test) ppn = perceptron.Perceptron(n_iter=1000, eta0=0.1, random_state=20) ppn.fit(x_train, y_train) pred = ppn.predict(x_test) A = accuracy_score(pred, y_test) M = confusion_matrix(y_test, pred) print 'Accuracy for Perceptron using unscaled features: ' print A print 'Confusion Matrix : ' print M ppn.fit(x_train_scale, y_train) Acc = accuracy_score(ppn.predict(x_test_scale), y_test)
] # dataset for prediction real_X = [[190, 70, 43], [184, 84, 44], [198, 92, 48], [183, 83, 44], [166, 47, 36], [170, 60, 38], [172, 64, 39], [182, 80, 42], [180, 80, 43]] real_Y = [ 'male', 'male', 'male', 'male', 'female', 'female', 'female', 'male', 'male' ] # classfiers with default hyperparameters clf_tree = tree.DecisionTreeClassifier() clf_svc = svm.SVC(gamma='auto') clf_knn = neighbors.KNeighborsClassifier() clf_per = perceptron.Perceptron() clf_nb = GaussianNB() clf_nn = MLPClassifier(max_iter=1000) classifiers = [clf_tree, clf_svc, clf_knn, clf_per, clf_nb, clf_nn] # training models for classifier in classifiers: classifier = classifier.fit(X, Y) # predict and compare results preditions = [i for i in range(7)] accuracy = [i for i in range(7)] for i, classifier in enumerate(classifiers): preditions[i] = classifier.predict(real_X) accuracy[i] = accuracy_score(real_Y, preditions[i])
#6.3 import numpy as np import matplotlib.pyplot as plt import random from sklearn.linear_model import perceptron #Let's set up our data and our target data = np.array([[0, 1], [0, 0], [1, 0], [1, 1]]) target = np.array([0, 0, 0, 1]) #6.4 p = perceptron.Perceptron(n_iter=100) p_out = p.fit(data, target) print p_out msg = ("Coefficients: %s, Intercept: %s") print msg % (str(p.coef_), str(p.intercept_)) #6.5 colors = np.array(['k', 'r']) markers = np.array(['*', 'o']) for data, target in zip(data, target): plt.scatter(data[0], data[1], s=100, c=colors[target], marker=markers[target]) #Need to calculate a hyperplane the straight line as it intersects with z=0 #Recall that our optimisation is solving z=m1x + m2y + c #If we want to understand the straight line created at the intersection with the viewing plane of x and y (where z=0) #0=m1x + m2y +c
# coding:utf-8 ''' Created on 2015年7月8日 @author: Administrator ''' import numpy as np from sklearn.cross_validation import train_test_split from ANN.multilayer_perceptron import MultilayerPerceptronClassifier from sklearn.linear_model import perceptron X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]] * 1000) y = [0, 1, 1, 0] * 1000 X_train, X_test, y_trian, y_test = train_test_split(X, y, random_state=3) clf = MultilayerPerceptronClassifier() clf.fit(X_train, y_trian) clf2 = perceptron.Perceptron() clf2.fit(X_train, y_trian) prediction = clf.predict(X_test) for i, p in enumerate(prediction[:10]): print y_test[i], p
from sklearn.linear_model import perceptron import numpy as np # Function NOT # NOT(1) = 0 # NOT(0) = 1 X = np.array([[1], [0]]) y = np.array([0, 1]) net = perceptron.Perceptron(n_iter=10, verbose=0) net.fit(X, y) print "Prediction:" print "0 ->", net.predict(0) print "1 ->", net.predict(1)
def get_perceptron_classifier(penalty_param, fitIntercept): return perceptron.Perceptron( penalty=penalty_param, fit_intercept=fitIntercept, )
templist[x] = lmtzr.lemmatize(templist[x]) filetext=' '.join(templist) #End Lemmatizing....................................................... data.append(filetext) f.close() #At this point, data[] contains the vector of all messages , i.e., it is a list of message strings. #Also, spam[] stores the true labels. This corresponds to Y in the training set. CV=feature_extraction.text.CountVectorizer(stop_words=stopwords) #------------------------------------------------>The only difference! #Note that binary is not True in prev! vec=CV.fit_transform(data) #Here vec stores a vector corresponding to each message. Essentially the X in our learning set. #Build and train classifier. Also, take prior probabilities into account while making calculations. clf = perceptron.Perceptron(n_iter=100, verbose=0, random_state=None, fit_intercept=True, eta0=0.002) clf.fit(vec, spam) #let us validate! testfolder='part'+str(folder_validate) tflist='p'+str(folder_validate)+'flist' tf=open(tflist, 'r') testflist=tf.read().split() tf.close() testdata=[] testspam=[] actualspam=[] # print(testflist) for fl in testflist: # print(fl) if 'spm' in fl:
# the problem of perceptron classifier from sklearn.linear_model import perceptron from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer categories = ['alt.atheism', 'sci.med'] train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True) perceptron = perceptron.Perceptron(max_iter=100) cv = CountVectorizer() x_train_counts = cv.fit_transform(train.data) tfidf_tf = TfidfTransformer() x_train_tfidf = tfidf_tf.fit_transform(x_train_counts) perceptron.fit(x_train_tfidf, train.target) test_docs = ['Religion is widespread, even in modern times', 'His kidney failed', 'The pope is a controversial leader', 'White blood cells fight off infections', 'The reverend had a heart attack in church'] x_test_counts = cv.transform(test_docs) x_test_tfidf = tfidf_tf.transform(x_test_counts) pred = perceptron.predict(x_test_tfidf) for doc, category in zip(test_docs, pred): print('%r => %s' % (doc, train.target_names[category])) help(perceptron)