class PerceptronImpl(): def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True, max_iter=None, tol=None, shuffle=True, verbose=0, eta0=1.0, n_jobs=None, random_state=None, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight='balanced', warm_start=False, n_iter=None): self._hyperparams = { 'penalty': penalty, 'alpha': alpha, 'fit_intercept': fit_intercept, 'max_iter': max_iter, 'tol': tol, 'shuffle': shuffle, 'verbose': verbose, 'eta0': eta0, 'n_jobs': n_jobs, 'random_state': random_state, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'n_iter_no_change': n_iter_no_change, 'class_weight': class_weight, 'warm_start': warm_start, 'n_iter': n_iter} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X)
def preceptron(self): # Perceptron perceptron = Perceptron(penalty='l2', max_iter=1000, shuffle=True) perceptron.fit(self.X_train, self.y_train) acc = round(perceptron.score(self.X_train, self.y_train) * 100, 2) print("acc with Perceptron:", acc) self.y_pred = perceptron.predict(self.X_test)
def simulation(n, runs, margin=0, p_runs=100, d=2): ''' Run a a given number of simulations to compare svm and perceptron error rates. Generates a set of training and testing points, runs a single svm and a given number of perceptrons (avg error is taken) :param n: number of points :param p_runs: number of perceptrons to average :param runs: number of times to sun simulation :param d: dimensionality of points :return: pandas dataframe, each row ''' all_data = [] for i in range(runs): # Get test data and its gamma, split 80-20 test train train_dat, test_dat, margin = generate_labeled_points(n_train=n, n_test=ceil(n * 25), gamma=margin, dim=d) # Separate train points from labels train_points = [x[0] for x in train_dat] train_labels = [x[1] for x in train_dat] # Separate test points from their labels test_points = [x[0] for x in test_dat] test_labels = [x[1] for x in test_dat] # Run k = p_runs number of perceptrons on this same training data, take their mean error p_errors = [] seed = np.random.RandomState() for k in range(p_runs): perceptron = Perceptron(random_state=seed) perceptron.fit(train_points, train_labels) p_errors.append(perceptron.score(test_points, test_labels)) p_error = np.mean(p_errors) # Train and test with single SVM svm = SVC(kernel="linear") svm.fit(train_points, train_labels) svm_error = svm.score(test_points, test_labels) all_data.append([n, margin, p_error, svm_error]) df = pd.DataFrame( all_data, columns=['n', 'margin', 'avg perceptron_error', 'svm_error']) return df
def main(): #create the training & test sets, skipping the header row with [1:] dataset_T = genfromtxt(open('Data/demoTrain.csv', 'r'), delimiter=',', dtype='f8')[:] dataset_R = genfromtxt(open('Data/demoTarget.csv', 'r'), delimiter=',', dtype='f8')[:] dataset_v = genfromtxt(open('Data/demoTest.csv', 'r'), delimiter=',', dtype='f8')[:] trueData = genfromtxt(open('Data/validate.csv', 'r'), delimiter=',', dtype='f8')[:] target = [x for x in dataset_R] train = [x[:] for x in dataset_T] validate = [x[:] for x in dataset_v] y = [x for x in trueData] test = genfromtxt(open('Data/demoTest.csv', 'r'), delimiter=',', dtype='f8')[:] per = Perceptron(n_iter=2, shuffle=True) per.fit(train, target) #val = per.decision_function(validate) val = per.predict(validate) score = per.score(validate, y) print str(score) + "\n" for v in val: print v a = per.fit_transform(train, target) print a
def main(): #create the training & test sets, skipping the header row with [1:] dataset_T = genfromtxt(open('Data/demoTrain.csv','r'), delimiter=',', dtype='f8')[:] dataset_R = genfromtxt(open('Data/demoTarget.csv','r'), delimiter=',', dtype='f8')[:] dataset_v = genfromtxt(open('Data/demoTest.csv','r'), delimiter=',', dtype='f8')[:] trueData = genfromtxt(open('Data/validate.csv','r'), delimiter=',', dtype='f8')[:] target = [x for x in dataset_R] train = [x[:] for x in dataset_T] validate = [x[:] for x in dataset_v] y = [x for x in trueData] test = genfromtxt(open('Data/demoTest.csv','r'), delimiter=',', dtype='f8')[:] per = Perceptron(n_iter=2, shuffle=True) per.fit(train, target) #val = per.decision_function(validate) val = per.predict(validate) score = per.score(validate, y) print str(score) +"\n" for v in val: print v a= per.fit_transform(train,target) print a
def task2(sneakers_data: pd.DataFrame, boots_data: pd.DataFrame, sneakers_labels: pd.DataFrame, boots_labels: pd.DataFrame): full_data = sneakers_data.append(boots_data) full_labels = sneakers_labels.append(boots_labels) train_times = [] predict_times = [] accuracies = [] print("\tTask 2 output") num_splits = 4 for train_index, test_index in KFold(n_splits=num_splits, shuffle=True).split(full_data): train_data = full_data.iloc[train_index] test_data = full_data.iloc[test_index] train_labels = full_labels.iloc[train_index] test_labels = full_labels.iloc[test_index] pctrn = Perceptron() train_start_time = timeit.default_timer() pctrn.fit(X=train_data, y=train_labels) train_end_time = timeit.default_timer() train_time = train_end_time - train_start_time train_times.append(train_time) print("\tPerceptron took", train_time, "seconds to train on data") predict_start_time = timeit.default_timer() prediction = pctrn.predict(test_data) predict_end_time = timeit.default_timer() predict_time = predict_end_time - predict_start_time predict_times.append(predict_time) print("\tPerceptron took", predict_time, "seconds to make a prediction") accuracy = accuracy_score(test_labels, prediction) * 100 accuracies.append(accuracy) print("\tAccuracy for perceptron", accuracy, "%") confusion = confusion_matrix(test_labels, prediction) percent_true_pos = (confusion[0, 0] / len(test_labels)) * 100 percent_false_pos = (confusion[0, 1] / len(test_labels)) * 100 percent_true_neg = (confusion[1, 1] / len(test_labels)) * 100 percent_false_neg = (confusion[1, 0] / len(test_labels)) * 100 print("\tPerceptron confusion matrix true positive:", percent_true_pos, "%") print("\tPerceptron confusion matrix false positive:", percent_false_pos, "%") print("\tPerceptron confusion matrix true negative:", percent_true_neg, "%") print("\tPerceptron confusion matrix false negative:", percent_false_neg, "%\n") print("\tThe minimum train time was", np.min(train_times), "seconds") print("\tThe maximum train time was", np.max(train_times), "seconds") print("\tThe average train time was", np.mean(train_times), "seconds\n") print("\tThe minimum prediction time was", np.min(predict_times), "seconds") print("\tThe maximum prediction time was", np.max(predict_times), "seconds") print("\tThe average prediction time was", np.mean(predict_times), "seconds\n") print("\tMinimum Accuracy was", np.min(accuracies), "%") print("\tMaximum accuracy was", np.max(accuracies), "%") print("\tAverage accuracy was", np.mean(accuracies), "%\n") print( "\tTotal train time for all k-folds in perceptron with gamma value of =", np.sum(train_times), "seconds") print( "\tTotal prediction time for all k-folds in perceptron with gamma of =", np.sum(predict_times), "seconds\n")
def test_perceptron(x: list, y: list, learning_rate: float, max_iter: int) -> None: perceptron = Perceptron(max_iter=max_iter, alpha=learning_rate) perceptron.fit(x, y) plot_model(np.array(x), perceptron)
def main(): while True: intro = Text( Point(250, 300), "2048 TRAINER\n\nTrain model...R\nFull game train...G\n\n>>>Delays between moves<<<\nTest KNN model...E\nTest Perceptron model...P\nRandom model...N\n\n>>>No delays<<<\nTest KNN model...F\nTest Perceptron model...S\nRandom model...M\n\nSimple learning model...L\n\nPRESS Q TO QUIT" ) intro.setSize(20) intro.setTextColor(color_rgb(255, 255, 255)) if os.path.isfile("2048_train.csv"): data = pd.read_csv("2048_train.csv", header=None, usecols=[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ]) direction = pd.read_csv("2048_train.csv", header=None, usecols=[0]) splitRatio = 0.7 datatrainingSet, datatestSet = splitData(data, splitRatio) directiontrainingSet, directiontestSet = splitData( direction, splitRatio) splitRatio = 0.5 datatestSet, datadevelopementSet = splitData( datatestSet, splitRatio) directiontestSet, directiondevelopementSet = splitData( directiontestSet, splitRatio) direction = np.transpose(direction) isPrevData = True else: isPrevData = False if os.path.isfile("best_weights.txt"): fileContent = open("best_weights.txt", 'r') weightString = fileContent.readlines() bestWeightsX = np.array(weightString[0:17]) bestWeightsX = bestWeightsX.astype(np.float) bestWeightsY = np.array(weightString[17:34]) bestWeightsY = bestWeightsY.astype(np.float) randomRadius = float(weightString[34]) bestLearnScore = float(weightString[35]) fileContent.close() else: bestWeightsX = np.empty(0) bestWeightsY = np.empty(0) for i in range(0, 17): bestWeightsX = np.append(bestWeightsX, 0) bestWeightsY = np.append(bestWeightsY, 0) randomRadius = 100 bestLearnScore = 0 win = GraphWin("2048", 500, 600) win.setBackground(color_rgb(0, 103, 105)) intro.draw(win) if isPrevData: options = ['r', 'g', 'e', 'n', 'f', 'm', 'l', 'q', 's', 'p'] else: options = ['r', 'g', 'n', 'm', 'l', 'q'] mode = '-' while mode not in options: mode = win.getKey() if mode == 'q': win.close() return 0 if mode == 'e' or mode == 'f': knn = KNeighborsClassifier(n_neighbors=10) knn.fit(datatrainingSet, np.ravel(np.transpose(directiontrainingSet))) if mode == 'p' or mode == 's': ppn = Perceptron(eta0=0.01, n_iter=10000) ppn.fit(datatrainingSet, np.ravel(np.transpose(directiontrainingSet))) intro.undraw() win.setBackground(color_rgb(100, 100, 100)) score = 0 scoreText = Text(Point(250, 550), str(score)) scoreText.setTextColor(color_rgb(255, 255, 255)) scoreText.setSize(30) board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) #DO THE SAME THING FOR THE TILE NUMBERS tileList = [] numberList = [] board = spawn(board) board = spawn(board) for i in range(0, 4): for j in range(0, 4): tileList.append( Rectangle(Point(i * 125 + 5, j * 125 + 5), Point(i * 125 + 120, j * 125 + 120))) numberList.append( Text(Point(i * 125 + 60, j * 125 + 60), str(board[j][i]))) numberList[i * 4 + j].setSize(20) tileList[i * 4 + j].setWidth(4) tileList[i * 4 + j].setOutline(color_rgb(255, 255, 255)) numberList[i * 4 + j].setTextColor(color_rgb(0, 0, 0)) tileList[i * 4 + j].draw(win) numberList[i * 4 + j].draw(win) #THE TRAINING DATA if not isPrevData: data = np.empty((0, 16), float) direction = np.empty(0) drawBoard(board, win, tileList, numberList) move = '-' classes = ['w', 's', 'd', 'a'] moveIter = 0 nMoves = 0 totalIterations = 1 iteration = 1 currentWeightsX = generateRandomWeights(bestWeightsX, math.floor(randomRadius)) currentWeightsY = generateRandomWeights(bestWeightsY, math.floor(randomRadius)) currentBestWeightsX = currentWeightsX currentBestWeightsY = currentWeightsY if mode == 'l': maxIterations = int( input("Enter number of generations to simulate:")) while True: score = calcScore(board) if mode != 'l': drawBoard(board, win, tileList, numberList) scoreText.undraw() if mode != 'l': scoreText.setSize(30) scoreText.setText(str(score)) scoreText.draw(win) for i in range(0, 4): for j in range(0, 4): isChangeBoard[i][j] = board[i][j] if mode == 'r' or mode == 'g': move = win.getKey() if nMoves % 30 == 0 and mode == 'r': board = generateRandomGrid(board) nMoves += 1 else: if mode == 'e' or mode == 'n' or mode == 'p': time.sleep(0.5) if mode == 'e' or mode == 'f': probs = np.ravel( knn.predict_proba(gridToData(board).reshape(1, -1))) ranks = [0] * len(probs) for i, x in enumerate( sorted(range(len(probs)), key=lambda y: probs[y])): ranks[x] = i move = classes[ranks[moveIter]] moveIter += 1 elif mode == 'm': move = classes[random.randint(0, 3)] elif mode == 'l': move = classes[calculateDirection(currentWeightsX, board) * 2 + calculateDirection(currentWeightsY, board)] if moveIter > 0: move = classes[random.randint(0, 3)] moveIter += 1 elif mode == 'p' or mode == 's': move = ppn.predict(gridToData(board).reshape(1, -1)) if moveIter > 0: move = classes[random.randint(0, 3)] moveIter += 1 if move == 'w': board = shift(board, 0) elif move == 'd': board = shift(board, 1) elif move == 's': board = shift(board, 2) elif move == 'a': board = shift(board, 3) elif move == 'q': break if lose(board): print(score) if mode == 'l': board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) board = spawn(board) board = spawn(board) scoreText.setSize(16) scoreText.setText("Score: " + str(score) + " -- iteration: " + str(iteration) + " -- r: " + str(randomRadius)) if score > bestLearnScore: bestLearnScore = score currentBestWeightsX = currentWeightsX currentBestWeightsY = currentWeightsY print("current best" + str(currentBestWeightsX) + str(currentBestWeightsY)) if iteration == 1000: drawBoard(board, win, tileList, numberList) print("best score: " + str(bestLearnScore)) iteration = 1 randomRadius *= 0.9 bestWeightsX = currentBestWeightsX bestWeightsY = currentBestWeightsY print("best weights:" + str(bestWeightsX) + str(bestWeightsY)) if totalIterations / 1000 >= maxIterations: toWrite = np.empty(0) toWrite = np.append(toWrite, bestWeightsX) toWrite = np.append(toWrite, bestWeightsY) toWrite = np.append(toWrite, randomRadius) toWrite = np.append(toWrite, bestLearnScore) np.savetxt("best_weights.txt", toWrite) #WRITE WEIGHTS N STUFF break totalIterations += 1 iteration += 1 currentWeightsX = generateRandomWeights( bestWeightsX, math.floor(randomRadius)) currentWeightsY = generateRandomWeights( bestWeightsY, math.floor(randomRadius)) else: break isChanged = False for i in range(0, 4): for j in range(0, 4): if isChangeBoard[i][j] != board[i][j]: isChanged = True if isChanged: moveIter = 0 if mode == 'r' or mode == 'g': direction = np.append(direction, move) data = np.vstack([data, gridToData2(board)]) board = spawn(board) score = calcScore(board) scoreText.setSize(16) scoreText.setText(str(score) + " -- YOU LOSE (Press Q)") while move != 'q': move = win.getKey() win.close() if mode == 'r' or mode == 'g': pd.DataFrame.to_csv(pd.DataFrame( np.hstack([ np.reshape(directiontrainingSet, [np.shape(directiontrainingSet)[0], 1]), datatrainingSet ])), "2048_train.csv", index=False, header=False)
from sklearn.linear_model.perceptron import Perceptron from numbers_mass import one, two import itertools from generate_picture import read_image x = [ list(itertools.chain.from_iterable(one)), list(itertools.chain.from_iterable(two)) ] print(x) y = [1, 2] clf = Perceptron(random_state=241) clf.fit(x, y) if __name__ == "__main__": print(clf.predict([ list(itertools.chain.from_iterable(one)), ])) print( clf.predict([ list(itertools.chain.from_iterable(read_image('1.png'))), ]))
import numpy as np from matplotlib import pyplot from sklearn.linear_model.perceptron import Perceptron from sklearn.metrics import accuracy_score import usps import perceptron # Pour la fonction two_classes data, labels = usps.load_train() data_test, labels_test = usps.load_test() for k in range(10): labels_k = perceptron.two_classes(labels, k) net = Perceptron() net.fit(data, labels_k) output_train = net.predict(data) output_test = net.predict(data_test) print(k) print(" Score (train)", accuracy_score(labels_k, output_train)) labels_k_test = perceptron.two_classes(labels_test, k) print(" Score (test)", accuracy_score(labels_k_test, output_test))
class PerceptronMask(StreamModel): """ PerceptronMask A mask for scikit-learn's Perceptron classifier. Because scikit-multiflow's framework require a few interfaces, not present int scikit-learn, this mask allows the first to use classifiers native to the latter. """ def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True, max_iter=1000, tol=1e-3, shuffle=True, verbose=0, eta0=1.0, n_jobs=1, random_state=0, class_weight=None, warm_start=False): self.penalty = penalty self.alpha = alpha self.fit_intercept = fit_intercept self.max_iter = max_iter self.tol = tol self.shuffle = shuffle self.verbose = verbose self.eta0 = eta0 self.n_jobs = n_jobs self.random_state = random_state self.class_weight = class_weight self.warm_start = warm_start super().__init__() self.classifier = Perceptron(penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=self.max_iter, tol=self.tol, shuffle=self.shuffle, verbose=self.verbose, random_state=self.random_state, eta0=self.eta0, warm_start=self.warm_start, class_weight=self.class_weight, n_jobs=self.n_jobs) def fit(self, X, y, classes=None, weight=None): """ fit Calls the Perceptron fit function from sklearn. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. y: Array-like The class labels for all samples in X. classes: Not used. weight: Instance weight. If not provided, uniform weights are assumed. Returns ------- PerceptronMask self """ self.classifier.fit(X, y, sample_weight=weight) return self def partial_fit(self, X, y, classes=None, weight=None): """ partial_fit Calls the Perceptron partial_fit from sklearn. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. y: Array-like The class labels for all samples in X. classes: list, optional A list with all the possible labels of the classification problem. weight: Instance weight. If not provided, uniform weights are assumed. Returns ------- PerceptronMask self """ self.classifier.partial_fit(X, y, classes, weight) return self def predict(self, X): """ predict Uses the current model to predict samples in X. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. Returns ------- numpy.ndarray A numpy.ndarray containing the predicted labels for all instances in X. """ return np.asarray(self.classifier.predict(X)) def predict_proba(self, X): """ predict_proba Predicts the probability of each sample belonging to each one of the known classes. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) A matrix of the samples we want to predict. Returns ------- numpy.ndarray An array of shape (n_samples, n_features), in which each outer entry is associated with the X entry of the same index. And where the list in index [i] contains len(self.target_values) elements, each of which represents the probability that the i-th sample of X belongs to a certain label. """ return self.classifier._predict_proba_lr(X) def score(self, X, y): """ score Returns the predict performance for the samples in X. Parameters ---------- X: numpy.ndarray of shape (n_sample, n_features) The features matrix. y: Array-like An array-like containing the class labels for all samples in X. Returns ------- float The classifier's score. """ return self.classifier.score(X, y) def get_info(self): params = self.classifier.get_params() info = type(self).__name__ + ':' info += ' - penalty: {}'.format(params['penalty']) info += ' - alpha: {}'.format(params['alpha']) info += ' - fit_intercept: {}'.format(params['fit_intercept']) info += ' - max_iter: {}'.format(params['max_iter']) info += ' - tol: {}'.format(params['tol']) info += ' - shuffle: {}'.format(params['shuffle']) info += ' - eta0: {}'.format(params['eta0']) info += ' - warm_start: {}'.format(params['warm_start']) info += ' - class_weight: {}'.format(params['class_weight']) info += ' - n_jobs: {}'.format(params['n_jobs']) return info def reset(self): self.__init__(penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=self.max_iter, tol=self.tol, shuffle=self.shuffle, verbose=self.verbose, random_state=self.random_state, eta0=self.eta0, warm_start=self.warm_start, class_weight=self.class_weight, n_jobs=self.n_jobs)
print(acc_svc) # 0.81218 # 线性支持向量机SVC from sklearn.svm import LinearSVC linear_svc = LinearSVC() linear_svc.fit(x_train, y_train) y_pred = linear_svc.predict(x_test) acc_linear_svc = accuracy_score(y_pred, y_test) print(acc_linear_svc) # 0.77157 # 感知机 from sklearn.linear_model.perceptron import Perceptron perceptron = Perceptron() perceptron.fit(x_train, y_train) y_pred = perceptron.predict(x_test) acc_perceptron = accuracy_score(y_pred, y_test) print(acc_perceptron) # 0.77665 # 决策树 from sklearn.tree import DecisionTreeClassifier decisiontree = DecisionTreeClassifier() decisiontree.fit(x_train, y_train) y_pred = decisiontree.predict(x_test) acc_decisiontree = accuracy_score(y_pred, y_test) print(acc_decisiontree) # 0.82233 # 随机森林 from sklearn.ensemble import RandomForestClassifier
[1, -1, -1], # Naranja [1, 1, -1], # Manzana [1, -1, -1], # Naranja ]), array([ 1, -1, 1, 1 ]) ] # Crear una instancia de Perceptron con un máximo de 100 épocas perceptron = Perceptron(max_iter=1000) # Entrenar la red neuronal perceptron.fit(data[0], data[1]) def test_net_accuracy(): # Probar la certeza de la red neuronal con la data previamente alimentada # para filtrar una manzana y una naranja. data = [ array([ [1, 1, -1], # Manzana [1, -1, -1], # Naranja [-1, -1, -1] # Una naranja que es elíptica ]), array([ 1, 1, 1 ])
class PerceptronMask(BaseClassifier): """ PerceptronMask A mask for scikit-learn's Perceptron classifier. Because scikit-multiflow's framework require a few interfaces, not present int scikit-learn, this mask allows the first to use classifiers native to the latter. """ def __init__(self): super().__init__() self.classifier = Perceptron(n_iter=50) def fit(self, X, y, classes = None, weight=None): """ fit Calls the Perceptron fit function from sklearn. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. y: Array-like The class labels for all samples in X. classes: Not used. weight: Instance weight. If not provided, uniform weights are assumed. Returns ------- PerceptronMask self """ self.classifier.fit(X, y, sample_weight=weight) return self def partial_fit(self, X, y, classes=None, weight=None): """ partial_fit Calls the Perceptron partial_fit from sklearn. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. y: Array-like The class labels for all samples in X. classes: list, optional A list with all the possible labels of the classification problem. weight: Instance weight. If not provided, uniform weights are assumed. Returns ------- PerceptronMask self """ self.classifier.partial_fit(X, y, classes, weight) return self def predict(self, X): """ predict Uses the current model to predict samples in X. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. Returns ------- list A list containing the predicted labels for all instances in X. """ return self.classifier.predict(X) def predict_proba(self, X): """ predict_proba Predicts the probability of each sample belonging to each one of the known classes. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) A matrix of the samples we want to predict. Returns ------- numpy.ndarray An array of shape (n_samples, n_features), in which each outer entry is associated with the X entry of the same index. And where the list in index [i] contains len(self.classes) elements, each of which represents the probability that the i-th sample of X belongs to a certain label. """ return self.classifier.predict_proba(X) def score(self, X, y): """ score Returns the predict performance for the samples in X. Parameters ---------- X: numpy.ndarray of shape (n_sample, n_features) The features matrix. y: Array-like An array-like containing the class labels for all samples in X. Returns ------- float The classifier's score. """ return self.classifier.score(X, y) def get_info(self): params = self.classifier.get_params() penalty = params['penalty'] penalty = 'None' if penalty is None else penalty fit_int = params['fit_intercept'] fit_int = 'True' if fit_int else 'False' shuffle = params['shuffle'] shuffle = 'True' if shuffle else 'False' return 'Perceptron: penalty: ' + penalty + \ ' - alpha: ' + str(round(params['alpha'], 3)) + \ ' - fit_intercept: ' + fit_int + \ ' - n_iter: ' + str(params['n_iter']) + \ ' - shuffle: ' + shuffle
#encoding=utf8 import os from sklearn.linear_model.perceptron import Perceptron import pandas as pd if os.path.exists('./step2/result.csv'): os.remove('./step2/result.csv') # 获取训练数据 train_data = pd.read_csv('./step2/train_data.csv') # 获取训练标签 train_label = pd.read_csv('./step2/train_label.csv') train_label = train_label['target'] # 获取测试数据 test_data = pd.read_csv('./step2/test_data.csv') # 训练数据 clf = Perceptron(eta0=0.1, max_iter=500) clf.fit(train_data, train_label) res = clf.predict(test_data) # 保存 res = {"result": res} res = pd.DataFrame(res) res.to_csv('./step2/result.csv', index=0)
def result(): if request.method == 'POST': path = request.files.get('myFile') df = pd.read_csv(path, encoding="ISO-8859-1") filename = request.form['filename'] str1 = request.form['feature'] str2 = request.form['label'] if str1 in list(df) and str2 in list(df): y = df[str2] X = df[str1] else: return render_template('nameError.html') x = [] for subject in X: result = re.sub(r"http\S+", "", subject) replaced = re.sub(r'[^a-zA-Z0-9 ]+', '', result) x.append(replaced) X = pd.Series(x) X = X.str.lower() """ texts = [] for doc in X: doc = nlp(doc, disable=['parser', 'ner']) tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-'] tokens = [tok for tok in tokens if tok not in stopwords] tokens = ' '.join(tokens) texts.append(tokens) X = pd.Series(texts) """ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) tfidfvect = TfidfVectorizer(ngram_range=(1, 1)) X_train_tfidf = tfidfvect.fit_transform(X_train) start = time() clf1 = LinearSVC() clf1.fit(X_train_tfidf, y_train) pred_SVC = clf1.predict(tfidfvect.transform(X_test)) a1 = accuracy_score(y_test, pred_SVC) end = time() print("accuracy SVC: {} and time: {} s".format(a1, (end - start))) start = time() clf2 = LogisticRegression(n_jobs=-1, multi_class='multinomial', solver='newton-cg') clf2.fit(X_train_tfidf, y_train) pred_LR = clf2.predict(tfidfvect.transform(X_test)) a2 = accuracy_score(y_test, pred_LR) end = time() print("accuracy LR: {} and time: {}".format(a2, (end - start))) start = time() clf3 = RandomForestClassifier(n_jobs=-1) clf3.fit(X_train_tfidf, y_train) pred = clf3.predict(tfidfvect.transform(X_test)) a3 = accuracy_score(y_test, pred) end = time() print("accuracy RFC: {} and time: {}".format(a3, (end - start))) start = time() clf4 = MultinomialNB() clf4.fit(X_train_tfidf, y_train) pred = clf4.predict(tfidfvect.transform(X_test)) a4 = accuracy_score(y_test, pred) end = time() print("accuracy MNB: {} and time: {}".format(a4, (end - start))) start = time() clf5 = GaussianNB() clf5.fit(X_train_tfidf.toarray(), y_train) pred = clf5.predict(tfidfvect.transform(X_test).toarray()) a5 = accuracy_score(y_test, pred) end = time() print("accuracy GNB: {} and time: {}".format(a5, (end - start))) start = time() clf6 = LogisticRegressionCV(n_jobs=-1) clf6.fit(X_train_tfidf, y_train) pred_LR = clf6.predict(tfidfvect.transform(X_test)) a6 = accuracy_score(y_test, pred_LR) end = time() print("accuracy LRCV: {} and time: {}".format(a6, (end - start))) start = time() clf7 = AdaBoostClassifier() clf7.fit(X_train_tfidf, y_train) pred_LR = clf7.predict(tfidfvect.transform(X_test)) a7 = accuracy_score(y_test, pred_LR) end = time() print("accuracy ABC: {} and time: {}".format(a7, (end - start))) start = time() clf8 = BernoulliNB() clf8.fit(X_train_tfidf.toarray(), y_train) pred = clf8.predict(tfidfvect.transform(X_test).toarray()) a8 = accuracy_score(y_test, pred) end = time() print("accuracy BNB: {} and time: {}".format(a8, (end - start))) start = time() clf9 = Perceptron(n_jobs=-1) clf9.fit(X_train_tfidf.toarray(), y_train) pred = clf9.predict(tfidfvect.transform(X_test).toarray()) a9 = accuracy_score(y_test, pred) end = time() print("accuracy Per: {} and time: {}".format(a9, (end - start))) start = time() clf10 = RidgeClassifierCV() clf10.fit(X_train_tfidf.toarray(), y_train) pred = clf10.predict(tfidfvect.transform(X_test).toarray()) a10 = accuracy_score(y_test, pred) end = time() print("accuracy RidCV: {} and time: {}".format(a10, (end - start))) start = time() clf11 = SGDClassifier(n_jobs=-1) clf11.fit(X_train_tfidf.toarray(), y_train) pred = clf11.predict(tfidfvect.transform(X_test).toarray()) a11 = accuracy_score(y_test, pred) end = time() print("accuracy SGDC: {} and time: {}".format(a11, (end - start))) start = time() clf12 = SGDClassifier(n_jobs=-1) clf12.fit(X_train_tfidf.toarray(), y_train) pred = clf12.predict(tfidfvect.transform(X_test).toarray()) a12 = accuracy_score(y_test, pred) end = time() print("accuracy XGBC: {} and time: {}".format(a12, (end - start))) acu_list = [a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12] max_list = max(acu_list) if max_list == a1: pickle.dump(clf1, open(filename + '_model', 'wb')) elif max_list == a2: pickle.dump(clf2, open(filename + '_model', 'wb')) elif max_list == a3: pickle.dump(clf3, open(filename + '_model', 'wb')) elif max_list == a4: pickle.dump(clf4, open(filename + '_model', 'wb')) elif max_list == a5: pickle.dump(clf5, open(filename + '_model', 'wb')) elif max_list == a6: pickle.dump(clf6, open(filename + '_model', 'wb')) elif max_list == a7: pickle.dump(clf7, open(filename + '_model', 'wb')) elif max_list == a8: pickle.dump(clf8, open(filename + '_model', 'wb')) elif max_list == a9: pickle.dump(clf9, open(filename + '_model', 'wb')) elif max_list == a10: pickle.dump(clf10, open(filename + '_model', 'wb')) elif max_list == a11: pickle.dump(clf11, open(filename + '_model', 'wb')) elif max_list == a12: pickle.dump(clf12, open(filename + '_model', 'wb')) pickle.dump(tfidfvect, open(filename + '_tfidfVect', 'wb')) return render_template("result.html", ac1=a1, ac2=a2, ac3=a3, ac4=a4, ac5=a5, ac6=a6, ac7=a7, ac8=a8, ac9=a9, ac10=a10, ac11=a11, ac12=a12)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=i) # Normalizando as variaveis de treino e teste from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Treinando o Perceptron com o conjunto de treino previamente separado #Passa-se 100 vezes pelo conjunto de dados e o learning rate = 0.001 from sklearn.linear_model.perceptron import Perceptron classifier = Perceptron(max_iter=100, eta0=0.1) classifier.fit(X_train, y_train) # Apresentando os dados de teste a rede Perceptron e obtendo as predicoes y_pred = classifier.predict(X_test) # Fazendo a matriz de confusao from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) '''TP = # True Positives, TN = # True Negatives, FP = # False Positives, FN = # False Negatives Acerto = TP / (TP + FP) ''' #Aplicando o metodo de avaliacao TP = cm[0][0]