class PerceptronImpl(): def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True, max_iter=None, tol=None, shuffle=True, verbose=0, eta0=1.0, n_jobs=None, random_state=None, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight='balanced', warm_start=False, n_iter=None): self._hyperparams = { 'penalty': penalty, 'alpha': alpha, 'fit_intercept': fit_intercept, 'max_iter': max_iter, 'tol': tol, 'shuffle': shuffle, 'verbose': verbose, 'eta0': eta0, 'n_jobs': n_jobs, 'random_state': random_state, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'n_iter_no_change': n_iter_no_change, 'class_weight': class_weight, 'warm_start': warm_start, 'n_iter': n_iter} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def simulation(n, runs, margin=0, p_runs=100, d=2): ''' Run a a given number of simulations to compare svm and perceptron error rates. Generates a set of training and testing points, runs a single svm and a given number of perceptrons (avg error is taken) :param n: number of points :param p_runs: number of perceptrons to average :param runs: number of times to sun simulation :param d: dimensionality of points :return: pandas dataframe, each row ''' all_data = [] for i in range(runs): # Get test data and its gamma, split 80-20 test train train_dat, test_dat, margin = generate_labeled_points(n_train=n, n_test=ceil(n * 25), gamma=margin, dim=d) # Separate train points from labels train_points = [x[0] for x in train_dat] train_labels = [x[1] for x in train_dat] # Separate test points from their labels test_points = [x[0] for x in test_dat] test_labels = [x[1] for x in test_dat] # Run k = p_runs number of perceptrons on this same training data, take their mean error p_errors = [] seed = np.random.RandomState() for k in range(p_runs): perceptron = Perceptron(random_state=seed) perceptron.fit(train_points, train_labels) p_errors.append(perceptron.score(test_points, test_labels)) p_error = np.mean(p_errors) # Train and test with single SVM svm = SVC(kernel="linear") svm.fit(train_points, train_labels) svm_error = svm.score(test_points, test_labels) all_data.append([n, margin, p_error, svm_error]) df = pd.DataFrame( all_data, columns=['n', 'margin', 'avg perceptron_error', 'svm_error']) return df
def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True, max_iter=1000, tol=1e-3, shuffle=True, verbose=0, eta0=1.0, n_jobs=1, random_state=0, class_weight=None, warm_start=False): self.penalty = penalty self.alpha = alpha self.fit_intercept = fit_intercept self.max_iter = max_iter self.tol = tol self.shuffle = shuffle self.verbose = verbose self.eta0 = eta0 self.n_jobs = n_jobs self.random_state = random_state self.class_weight = class_weight self.warm_start = warm_start super().__init__() self.classifier = Perceptron(penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=self.max_iter, tol=self.tol, shuffle=self.shuffle, verbose=self.verbose, random_state=self.random_state, eta0=self.eta0, warm_start=self.warm_start, class_weight=self.class_weight, n_jobs=self.n_jobs)
def demo(): """ _test_mol This demo tests the MOL learner on a file stream, which reads from the music.csv file. The test computes the performance of the MOL learner as well as the time to create the structure and classify all the samples in the file. """ # Setup logging logging.basicConfig(format='%(message)s', level=logging.INFO) # Setup the file stream stream = FileStream("../data/datasets/music.csv", 0, 6) stream.prepare_for_use() # Setup the classifier, by default it uses Logistic Regression # classifier = MultiOutputLearner() # classifier = MultiOutputLearner(base_estimator=SGDClassifier(n_iter=100)) classifier = MultiOutputLearner(base_estimator=Perceptron()) # Setup the pipeline pipe = Pipeline([('classifier', classifier)]) pretrain_size = 150 logging.info('Pre training on %s samples', str(pretrain_size)) logging.info('Total %s samples', str(stream.n_samples)) X, y = stream.next_sample(pretrain_size) # classifier.fit(X, y) classes = stream.target_values classes_flat = list(set([item for sublist in classes for item in sublist])) pipe.partial_fit(X, y, classes=classes_flat) count = 0 true_labels = [] predicts = [] init_time = timer() logging.info('Evaluating...') while stream.has_more_samples(): X, y = stream.next_sample() # p = classifier.predict(X) p = pipe.predict(X) predicts.extend(p) true_labels.extend(y) count += 1 perf = hamming_score(true_labels, predicts) logging.info('Evaluation time: %s s', str(timer() - init_time)) logging.info('Total samples analyzed: %s', str(count)) logging.info('The classifier\'s static Hamming score : %0.3f' % perf)
def preceptron(self): # Perceptron perceptron = Perceptron(penalty='l2', max_iter=1000, shuffle=True) perceptron.fit(self.X_train, self.y_train) acc = round(perceptron.score(self.X_train, self.y_train) * 100, 2) print("acc with Perceptron:", acc) self.y_pred = perceptron.predict(self.X_test)
def fit(self, x, y): ''' metodo para treinar a arquitetura de dois niveis :x: dados para treinamento :y: rotulo dos dados :dsel_x: padroes da janela de validacao :dsel_y: rotulos da janela de validacao ''' # salvando os dados de trainamento self.x_train = x self.y_train = y # salvando as dificuldades das instancias self.H = self.kDN(x, y) # treinando o nivel 1 ######################################### self.levelone = KNeighborsClassifier(self.n_vizinhos) self.levelone.fit(x, y) # realizando a previsao para o conjunto de treinamento y_pred = self.levelone.predict(x) # salvando os indices das instancias que foram classificadas erradas indices = [i for i in range(len(y)) if y_pred[i] != y[i]] # obtendo o limiar de dificuldade do problema self.limiar = self.defineThreshold(indices) ############################################################### # treinando o nivel 2 ######################################### # obtendo as instancias dificeis x_dificeis, y_dificeis = self.hardInstances(x, y, self.limiar) # criando o ensemble self.ensemble = BaggingClassifier(base_estimator=Perceptron(), max_samples=0.9, max_features=1.0, n_estimators=100) self.ensemble.fit(x_dificeis, y_dificeis) # treinando o modelo 2 self.leveltwo = KNORAU(self.ensemble.estimators_, self.n_vizinhos) self.leveltwo.fit(x_dificeis, y_dificeis)
def main(): #create the training & test sets, skipping the header row with [1:] dataset_T = genfromtxt(open('Data/demoTrain.csv', 'r'), delimiter=',', dtype='f8')[:] dataset_R = genfromtxt(open('Data/demoTarget.csv', 'r'), delimiter=',', dtype='f8')[:] dataset_v = genfromtxt(open('Data/demoTest.csv', 'r'), delimiter=',', dtype='f8')[:] trueData = genfromtxt(open('Data/validate.csv', 'r'), delimiter=',', dtype='f8')[:] target = [x for x in dataset_R] train = [x[:] for x in dataset_T] validate = [x[:] for x in dataset_v] y = [x for x in trueData] test = genfromtxt(open('Data/demoTest.csv', 'r'), delimiter=',', dtype='f8')[:] per = Perceptron(n_iter=2, shuffle=True) per.fit(train, target) #val = per.decision_function(validate) val = per.predict(validate) score = per.score(validate, y) print str(score) + "\n" for v in val: print v a = per.fit_transform(train, target) print a
def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True, max_iter=None, tol=None, shuffle=True, verbose=0, eta0=1.0, n_jobs=None, random_state=None, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight='balanced', warm_start=False, n_iter=None): self._hyperparams = { 'penalty': penalty, 'alpha': alpha, 'fit_intercept': fit_intercept, 'max_iter': max_iter, 'tol': tol, 'shuffle': shuffle, 'verbose': verbose, 'eta0': eta0, 'n_jobs': n_jobs, 'random_state': random_state, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'n_iter_no_change': n_iter_no_change, 'class_weight': class_weight, 'warm_start': warm_start, 'n_iter': n_iter } self._wrapped_model = Op(**self._hyperparams)
def main(): #create the training & test sets, skipping the header row with [1:] dataset_T = genfromtxt(open('Data/demoTrain.csv','r'), delimiter=',', dtype='f8')[:] dataset_R = genfromtxt(open('Data/demoTarget.csv','r'), delimiter=',', dtype='f8')[:] dataset_v = genfromtxt(open('Data/demoTest.csv','r'), delimiter=',', dtype='f8')[:] trueData = genfromtxt(open('Data/validate.csv','r'), delimiter=',', dtype='f8')[:] target = [x for x in dataset_R] train = [x[:] for x in dataset_T] validate = [x[:] for x in dataset_v] y = [x for x in trueData] test = genfromtxt(open('Data/demoTest.csv','r'), delimiter=',', dtype='f8')[:] per = Perceptron(n_iter=2, shuffle=True) per.fit(train, target) #val = per.decision_function(validate) val = per.predict(validate) score = per.score(validate, y) print str(score) +"\n" for v in val: print v a= per.fit_transform(train,target) print a
def test_perceptron(x: list, y: list, learning_rate: float, max_iter: int) -> None: perceptron = Perceptron(max_iter=max_iter, alpha=learning_rate) perceptron.fit(x, y) plot_model(np.array(x), perceptron)
x_val, y_val = validacaoCompleta(x_train, y_train) elif(validacao[k] == validacao[1]): x_val, y_val = validacaoInstanciasFaceis(x_train, y_train, n_vizinhos) elif(validacao[k] == validacao[2]): x_val, y_val = validacaoInstanciasDificeis(x_train, y_train, n_vizinhos) # 3.3. End ################################################################################################ # 3.4. Instanciando os classificadores ########################################################## ########## instanciando o modelo Bagging+REP ########################################### # definindo o numero do modelo na tabela num_model = 0 # intanciando o classificador ensemble = BaggingClassifier(base_estimator=Perceptron(), max_samples=qtd_amostras, max_features=1.0, n_estimators = qtd_modelos) # treinando o modelo ensemble.fit(x_train, y_train) # realizando a poda ensemble = REP(x_val, y_val, ensemble) # computando a previsao pred = ensemble.predict(x_test) # computando a diversidade do ensemble q_statistic = MedidasDiversidade('q', x_val, y_val, ensemble)
y_test, pred, nome_datasets[h] + '-pct-' + str(pct_trainamento[i]) + '- Bagging com DecisionTree [' + str(j) + ']') # escrevendo os resultados obtidos tabela.Adicionar_Sheet_Linha(num_model, j, [acuracia, auc, f1measure, gmean]) # 3.2.1. End ################################################################################### # 3.2.2. Bagging com Main ################################################################ # numero do modelo na tabela num_model = 1 # modelo bg = BaggingClassifier(base_estimator=Perceptron(), max_samples=pct_trainamento[i], max_features=1.0, n_estimators=qtd_modelos) # treinando o modelo bg.fit(x_train, y_train) # computando a previsao pred = bg.predict(x_test) # printando os resultados acuracia, auc, f1measure, gmean = printar_resultados( y_test, pred, nome_datasets[h] + '-pct-' + str(pct_trainamento[i]) + '- Bagging com Main [' + str(j) + ']')
from sklearn.linear_model.perceptron import Perceptron from numbers_mass import one, two import itertools from generate_picture import read_image x = [ list(itertools.chain.from_iterable(one)), list(itertools.chain.from_iterable(two)) ] print(x) y = [1, 2] clf = Perceptron(random_state=241) clf.fit(x, y) if __name__ == "__main__": print(clf.predict([ list(itertools.chain.from_iterable(one)), ])) print( clf.predict([ list(itertools.chain.from_iterable(read_image('1.png'))), ]))
def main(): while True: intro = Text( Point(250, 300), "2048 TRAINER\n\nTrain model...R\nFull game train...G\n\n>>>Delays between moves<<<\nTest KNN model...E\nTest Perceptron model...P\nRandom model...N\n\n>>>No delays<<<\nTest KNN model...F\nTest Perceptron model...S\nRandom model...M\n\nSimple learning model...L\n\nPRESS Q TO QUIT" ) intro.setSize(20) intro.setTextColor(color_rgb(255, 255, 255)) if os.path.isfile("2048_train.csv"): data = pd.read_csv("2048_train.csv", header=None, usecols=[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ]) direction = pd.read_csv("2048_train.csv", header=None, usecols=[0]) splitRatio = 0.7 datatrainingSet, datatestSet = splitData(data, splitRatio) directiontrainingSet, directiontestSet = splitData( direction, splitRatio) splitRatio = 0.5 datatestSet, datadevelopementSet = splitData( datatestSet, splitRatio) directiontestSet, directiondevelopementSet = splitData( directiontestSet, splitRatio) direction = np.transpose(direction) isPrevData = True else: isPrevData = False if os.path.isfile("best_weights.txt"): fileContent = open("best_weights.txt", 'r') weightString = fileContent.readlines() bestWeightsX = np.array(weightString[0:17]) bestWeightsX = bestWeightsX.astype(np.float) bestWeightsY = np.array(weightString[17:34]) bestWeightsY = bestWeightsY.astype(np.float) randomRadius = float(weightString[34]) bestLearnScore = float(weightString[35]) fileContent.close() else: bestWeightsX = np.empty(0) bestWeightsY = np.empty(0) for i in range(0, 17): bestWeightsX = np.append(bestWeightsX, 0) bestWeightsY = np.append(bestWeightsY, 0) randomRadius = 100 bestLearnScore = 0 win = GraphWin("2048", 500, 600) win.setBackground(color_rgb(0, 103, 105)) intro.draw(win) if isPrevData: options = ['r', 'g', 'e', 'n', 'f', 'm', 'l', 'q', 's', 'p'] else: options = ['r', 'g', 'n', 'm', 'l', 'q'] mode = '-' while mode not in options: mode = win.getKey() if mode == 'q': win.close() return 0 if mode == 'e' or mode == 'f': knn = KNeighborsClassifier(n_neighbors=10) knn.fit(datatrainingSet, np.ravel(np.transpose(directiontrainingSet))) if mode == 'p' or mode == 's': ppn = Perceptron(eta0=0.01, n_iter=10000) ppn.fit(datatrainingSet, np.ravel(np.transpose(directiontrainingSet))) intro.undraw() win.setBackground(color_rgb(100, 100, 100)) score = 0 scoreText = Text(Point(250, 550), str(score)) scoreText.setTextColor(color_rgb(255, 255, 255)) scoreText.setSize(30) board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) #DO THE SAME THING FOR THE TILE NUMBERS tileList = [] numberList = [] board = spawn(board) board = spawn(board) for i in range(0, 4): for j in range(0, 4): tileList.append( Rectangle(Point(i * 125 + 5, j * 125 + 5), Point(i * 125 + 120, j * 125 + 120))) numberList.append( Text(Point(i * 125 + 60, j * 125 + 60), str(board[j][i]))) numberList[i * 4 + j].setSize(20) tileList[i * 4 + j].setWidth(4) tileList[i * 4 + j].setOutline(color_rgb(255, 255, 255)) numberList[i * 4 + j].setTextColor(color_rgb(0, 0, 0)) tileList[i * 4 + j].draw(win) numberList[i * 4 + j].draw(win) #THE TRAINING DATA if not isPrevData: data = np.empty((0, 16), float) direction = np.empty(0) drawBoard(board, win, tileList, numberList) move = '-' classes = ['w', 's', 'd', 'a'] moveIter = 0 nMoves = 0 totalIterations = 1 iteration = 1 currentWeightsX = generateRandomWeights(bestWeightsX, math.floor(randomRadius)) currentWeightsY = generateRandomWeights(bestWeightsY, math.floor(randomRadius)) currentBestWeightsX = currentWeightsX currentBestWeightsY = currentWeightsY if mode == 'l': maxIterations = int( input("Enter number of generations to simulate:")) while True: score = calcScore(board) if mode != 'l': drawBoard(board, win, tileList, numberList) scoreText.undraw() if mode != 'l': scoreText.setSize(30) scoreText.setText(str(score)) scoreText.draw(win) for i in range(0, 4): for j in range(0, 4): isChangeBoard[i][j] = board[i][j] if mode == 'r' or mode == 'g': move = win.getKey() if nMoves % 30 == 0 and mode == 'r': board = generateRandomGrid(board) nMoves += 1 else: if mode == 'e' or mode == 'n' or mode == 'p': time.sleep(0.5) if mode == 'e' or mode == 'f': probs = np.ravel( knn.predict_proba(gridToData(board).reshape(1, -1))) ranks = [0] * len(probs) for i, x in enumerate( sorted(range(len(probs)), key=lambda y: probs[y])): ranks[x] = i move = classes[ranks[moveIter]] moveIter += 1 elif mode == 'm': move = classes[random.randint(0, 3)] elif mode == 'l': move = classes[calculateDirection(currentWeightsX, board) * 2 + calculateDirection(currentWeightsY, board)] if moveIter > 0: move = classes[random.randint(0, 3)] moveIter += 1 elif mode == 'p' or mode == 's': move = ppn.predict(gridToData(board).reshape(1, -1)) if moveIter > 0: move = classes[random.randint(0, 3)] moveIter += 1 if move == 'w': board = shift(board, 0) elif move == 'd': board = shift(board, 1) elif move == 's': board = shift(board, 2) elif move == 'a': board = shift(board, 3) elif move == 'q': break if lose(board): print(score) if mode == 'l': board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) board = spawn(board) board = spawn(board) scoreText.setSize(16) scoreText.setText("Score: " + str(score) + " -- iteration: " + str(iteration) + " -- r: " + str(randomRadius)) if score > bestLearnScore: bestLearnScore = score currentBestWeightsX = currentWeightsX currentBestWeightsY = currentWeightsY print("current best" + str(currentBestWeightsX) + str(currentBestWeightsY)) if iteration == 1000: drawBoard(board, win, tileList, numberList) print("best score: " + str(bestLearnScore)) iteration = 1 randomRadius *= 0.9 bestWeightsX = currentBestWeightsX bestWeightsY = currentBestWeightsY print("best weights:" + str(bestWeightsX) + str(bestWeightsY)) if totalIterations / 1000 >= maxIterations: toWrite = np.empty(0) toWrite = np.append(toWrite, bestWeightsX) toWrite = np.append(toWrite, bestWeightsY) toWrite = np.append(toWrite, randomRadius) toWrite = np.append(toWrite, bestLearnScore) np.savetxt("best_weights.txt", toWrite) #WRITE WEIGHTS N STUFF break totalIterations += 1 iteration += 1 currentWeightsX = generateRandomWeights( bestWeightsX, math.floor(randomRadius)) currentWeightsY = generateRandomWeights( bestWeightsY, math.floor(randomRadius)) else: break isChanged = False for i in range(0, 4): for j in range(0, 4): if isChangeBoard[i][j] != board[i][j]: isChanged = True if isChanged: moveIter = 0 if mode == 'r' or mode == 'g': direction = np.append(direction, move) data = np.vstack([data, gridToData2(board)]) board = spawn(board) score = calcScore(board) scoreText.setSize(16) scoreText.setText(str(score) + " -- YOU LOSE (Press Q)") while move != 'q': move = win.getKey() win.close() if mode == 'r' or mode == 'g': pd.DataFrame.to_csv(pd.DataFrame( np.hstack([ np.reshape(directiontrainingSet, [np.shape(directiontrainingSet)[0], 1]), datatrainingSet ])), "2048_train.csv", index=False, header=False)
data = [ array([ # Vector de [forma;textura;peso] [1, 1, -1], # Manzana [1, -1, -1], # Naranja [1, 1, -1], # Manzana [1, -1, -1], # Naranja ]), array([ 1, -1, 1, 1 ]) ] # Crear una instancia de Perceptron con un máximo de 100 épocas perceptron = Perceptron(max_iter=1000) # Entrenar la red neuronal perceptron.fit(data[0], data[1]) def test_net_accuracy(): # Probar la certeza de la red neuronal con la data previamente alimentada # para filtrar una manzana y una naranja. data = [ array([ [1, 1, -1], # Manzana [1, -1, -1], # Naranja [-1, -1, -1] # Una naranja que es elíptica ]), array([ 1,
import numpy as np from matplotlib import pyplot from sklearn.linear_model.perceptron import Perceptron from sklearn.metrics import accuracy_score import usps import perceptron # Pour la fonction two_classes data, labels = usps.load_train() data_test, labels_test = usps.load_test() for k in range(10): labels_k = perceptron.two_classes(labels, k) net = Perceptron() net.fit(data, labels_k) output_train = net.predict(data) output_test = net.predict(data_test) print(k) print(" Score (train)", accuracy_score(labels_k, output_train)) labels_k_test = perceptron.two_classes(labels_test, k) print(" Score (test)", accuracy_score(labels_k_test, output_test))
def main(): # 1. Definindo variaveis para o experimento ######################################################################### qtd_modelos = 100 qtd_execucoes = 30 qtd_amostras = 0.9 qtd_folds = 10 n_vizinhos = 7 nome_datasets = ['kc1', 'kc2'] # 1. End ############################################################################################################ # for para variar entre os datasets for h in range(len(nome_datasets)): # 2. Lendo os datasets ############################################################################################ # lendo o dataset data = pd.read_csv('dataset/'+nome_datasets[h]+'.csv') # obtendo os padroes e seus respectivos rotulos df_x = np.asarray(data.iloc[:,0:-1]) df_y = np.asarray(data.iloc[:,-1]) # 2.1. Criando a tabela para salvar os dados ################################################# # criando a tabela que vai acomodar o modelo tabela = Tabela_excel() tabela.Criar_tabela(nome_tabela='arquivos_lista03/'+nome_datasets[h], folhas=['OLA', 'LCA', 'KNORA-E', 'KNORA-U', 'Arquitetura'], cabecalho=['acuracy', 'auc', 'fmeasure', 'gmean'], largura_col=5000) # 2.1. End ##################################################################################### # 2. End ############################################################################################################ # executando os algoritmos x vezes for j in range(qtd_execucoes): # 3. Dividindo os dados para treinamento e teste ################################################################ # quebrando o dataset sem sobreposicao em 90% para treinamento e 10% para teste skf = StratifiedKFold(df_y, n_folds=qtd_folds) # tomando os indices para treinamento e teste train_index, test_index = next(iter(skf)) # obtendo os conjuntos de dados para treinamento e teste x_train = df_x[train_index] y_train = df_y[train_index] x_test = df_x[test_index] y_test = df_y[test_index] # 3. End ######################################################################################################### # 4. Gerando o pool de classificadores ########################################################################## # intanciando o classificador ensemble = BaggingClassifier(base_estimator=Perceptron(), max_samples=qtd_amostras, max_features=1.0, n_estimators = qtd_modelos) # treinando o modelo ensemble.fit(x_train, y_train) # 4. End ######################################################################################################## # 5. Instanciando os classificadores ########################################################## ################################### OLA ######################################################## executar_modelo('OLA', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela) ################################################################################################ ################################### LCA ######################################################## executar_modelo('LCA', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela) ################################################################################################ ################################### KNORAE ##################################################### executar_modelo('KNORAE', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela) ################################################################################################ ################################### KNORAU ##################################################### executar_modelo('KNORAU', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela) ################################################################################################ ################################### Arquitetura ################################################ # importando o metodo arq = Arquitetura(n_vizinhos) # treinando o metodo arq.fit(x_train, y_train) # realizando a previsao pred = arq.predict(x_test) # printando os resultados nome = 'Arquitetura' acuracia, auc, f1measure, gmean = printar_resultados(y_test, pred, nome_datasets[h]+'-'+nome+'-['+str(j)+']') # escrevendo os resultados obtidos tabela.Adicionar_Sheet_Linha(4, j, [acuracia, auc, f1measure, gmean])
'NearestNeighbors':NearestNeighbors(), 'Normalizer':Normalizer(), 'NuSVC':NuSVC(), 'NuSVR':NuSVR(), 'Nystroem':Nystroem(), 'OAS':OAS(), 'OneClassSVM':OneClassSVM(), 'OrthogonalMatchingPursuit':OrthogonalMatchingPursuit(), 'OrthogonalMatchingPursuitCV':OrthogonalMatchingPursuitCV(), 'PCA':PCA(), 'PLSCanonical':PLSCanonical(), 'PLSRegression':PLSRegression(), 'PLSSVD':PLSSVD(), 'PassiveAggressiveClassifier':PassiveAggressiveClassifier(), 'PassiveAggressiveRegressor':PassiveAggressiveRegressor(), 'Perceptron':Perceptron(), 'ProjectedGradientNMF':ProjectedGradientNMF(), 'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(), 'RANSACRegressor':RANSACRegressor(), 'RBFSampler':RBFSampler(), 'RadiusNeighborsClassifier':RadiusNeighborsClassifier(), 'RadiusNeighborsRegressor':RadiusNeighborsRegressor(), 'RandomForestClassifier':RandomForestClassifier(), 'RandomForestRegressor':RandomForestRegressor(), 'RandomizedLasso':RandomizedLasso(), 'RandomizedLogisticRegression':RandomizedLogisticRegression(), 'RandomizedPCA':RandomizedPCA(), 'Ridge':Ridge(), 'RidgeCV':RidgeCV(), 'RidgeClassifier':RidgeClassifier(), 'RidgeClassifierCV':RidgeClassifierCV(),
def task2(sneakers_data: pd.DataFrame, boots_data: pd.DataFrame, sneakers_labels: pd.DataFrame, boots_labels: pd.DataFrame): full_data = sneakers_data.append(boots_data) full_labels = sneakers_labels.append(boots_labels) train_times = [] predict_times = [] accuracies = [] print("\tTask 2 output") num_splits = 4 for train_index, test_index in KFold(n_splits=num_splits, shuffle=True).split(full_data): train_data = full_data.iloc[train_index] test_data = full_data.iloc[test_index] train_labels = full_labels.iloc[train_index] test_labels = full_labels.iloc[test_index] pctrn = Perceptron() train_start_time = timeit.default_timer() pctrn.fit(X=train_data, y=train_labels) train_end_time = timeit.default_timer() train_time = train_end_time - train_start_time train_times.append(train_time) print("\tPerceptron took", train_time, "seconds to train on data") predict_start_time = timeit.default_timer() prediction = pctrn.predict(test_data) predict_end_time = timeit.default_timer() predict_time = predict_end_time - predict_start_time predict_times.append(predict_time) print("\tPerceptron took", predict_time, "seconds to make a prediction") accuracy = accuracy_score(test_labels, prediction) * 100 accuracies.append(accuracy) print("\tAccuracy for perceptron", accuracy, "%") confusion = confusion_matrix(test_labels, prediction) percent_true_pos = (confusion[0, 0] / len(test_labels)) * 100 percent_false_pos = (confusion[0, 1] / len(test_labels)) * 100 percent_true_neg = (confusion[1, 1] / len(test_labels)) * 100 percent_false_neg = (confusion[1, 0] / len(test_labels)) * 100 print("\tPerceptron confusion matrix true positive:", percent_true_pos, "%") print("\tPerceptron confusion matrix false positive:", percent_false_pos, "%") print("\tPerceptron confusion matrix true negative:", percent_true_neg, "%") print("\tPerceptron confusion matrix false negative:", percent_false_neg, "%\n") print("\tThe minimum train time was", np.min(train_times), "seconds") print("\tThe maximum train time was", np.max(train_times), "seconds") print("\tThe average train time was", np.mean(train_times), "seconds\n") print("\tThe minimum prediction time was", np.min(predict_times), "seconds") print("\tThe maximum prediction time was", np.max(predict_times), "seconds") print("\tThe average prediction time was", np.mean(predict_times), "seconds\n") print("\tMinimum Accuracy was", np.min(accuracies), "%") print("\tMaximum accuracy was", np.max(accuracies), "%") print("\tAverage accuracy was", np.mean(accuracies), "%\n") print( "\tTotal train time for all k-folds in perceptron with gamma value of =", np.sum(train_times), "seconds") print( "\tTotal prediction time for all k-folds in perceptron with gamma of =", np.sum(predict_times), "seconds\n")
class PerceptronMask(BaseClassifier): """ PerceptronMask A mask for scikit-learn's Perceptron classifier. Because scikit-multiflow's framework require a few interfaces, not present int scikit-learn, this mask allows the first to use classifiers native to the latter. """ def __init__(self): super().__init__() self.classifier = Perceptron(n_iter=50) def fit(self, X, y, classes = None, weight=None): """ fit Calls the Perceptron fit function from sklearn. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. y: Array-like The class labels for all samples in X. classes: Not used. weight: Instance weight. If not provided, uniform weights are assumed. Returns ------- PerceptronMask self """ self.classifier.fit(X, y, sample_weight=weight) return self def partial_fit(self, X, y, classes=None, weight=None): """ partial_fit Calls the Perceptron partial_fit from sklearn. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. y: Array-like The class labels for all samples in X. classes: list, optional A list with all the possible labels of the classification problem. weight: Instance weight. If not provided, uniform weights are assumed. Returns ------- PerceptronMask self """ self.classifier.partial_fit(X, y, classes, weight) return self def predict(self, X): """ predict Uses the current model to predict samples in X. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. Returns ------- list A list containing the predicted labels for all instances in X. """ return self.classifier.predict(X) def predict_proba(self, X): """ predict_proba Predicts the probability of each sample belonging to each one of the known classes. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) A matrix of the samples we want to predict. Returns ------- numpy.ndarray An array of shape (n_samples, n_features), in which each outer entry is associated with the X entry of the same index. And where the list in index [i] contains len(self.classes) elements, each of which represents the probability that the i-th sample of X belongs to a certain label. """ return self.classifier.predict_proba(X) def score(self, X, y): """ score Returns the predict performance for the samples in X. Parameters ---------- X: numpy.ndarray of shape (n_sample, n_features) The features matrix. y: Array-like An array-like containing the class labels for all samples in X. Returns ------- float The classifier's score. """ return self.classifier.score(X, y) def get_info(self): params = self.classifier.get_params() penalty = params['penalty'] penalty = 'None' if penalty is None else penalty fit_int = params['fit_intercept'] fit_int = 'True' if fit_int else 'False' shuffle = params['shuffle'] shuffle = 'True' if shuffle else 'False' return 'Perceptron: penalty: ' + penalty + \ ' - alpha: ' + str(round(params['alpha'], 3)) + \ ' - fit_intercept: ' + fit_int + \ ' - n_iter: ' + str(params['n_iter']) + \ ' - shuffle: ' + shuffle
def get_paramater_grids(data_path): """Returns all the Models / Paramters I'm searching over""" pca_nfeatures = [0, 1, 2, 5, 10, 15, 20, 24] # 0 is a pass through value that does not do PCA # 24 is safer because some features might be found in the training set. This might not be the safest design, but it works for now # some may have more or few features based on training. 24 seems safe # Number of training points allowed to be on the wrong side of hyperplane. # A point is fractionally over the line if it violates the margin # 0 is a lower bound # The approximent number of training points (170)/2 seems like a reasonable upper bound (n # The sklearn formulation is different than what was covered in the book svc_gamma = np.logspace(-7, 0, 8) #kernal parameter paramater_grids = {} # SVC paramater_grids['SVC'] = {} paramater_grids['SVC']['pipeline']=\ Pipeline([ ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline), ('feature', PCA()), ('classifier', SVC()) ]) paramater_grids['SVC']['parameters'] = \ {'classifier__kernel':('linear','rbf','sigmoid'), #Linear doesn't gues gamma, but whatevs 'classifier__C': np.logspace(-2,4,7), 'classifier__gamma': svc_gamma, 'feature__n_components': pca_nfeatures} # SVC with a polynomial kernal paramater_grids['SVC_poly'] = {} paramater_grids['SVC_poly']['pipeline']=\ Pipeline([ ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline), ('feature', PCA()), ('classifier', SVC(kernel='poly')) ]) paramater_grids['SVC_poly']['parameters'] = \ {'classifier__degree': [2, 3, 4, 5], 'classifier__C': np.logspace(-2,3,6), 'classifier__gamma': svc_gamma, 'feature__n_components': pca_nfeatures} # Gaussian Mixture paramater_grids['GM'] = {} paramater_grids['GM']['pipeline']=\ Pipeline([ ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline), ('feature', PCA()), ('classifier', GM(max_iter=250,n_init=25)) ]) paramater_grids['GM']['parameters'] = \ {'classifier__n_components': [1, 2, 4, 8, 16, 32], 'classifier__covariance_type': ['full', 'tied', 'diag', 'spherical'], 'feature__n_components': pca_nfeatures} # Perceptron paramater_grids['Perceptron_PCA'] = {} paramater_grids['Perceptron_PCA']['pipeline'] = \ Pipeline([ ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline), ('feature', PCA()), ('classifier', Perceptron()) ]) paramater_grids['Perceptron_PCA']['parameters'] = \ {'feature__n_components': pca_nfeatures} # Perceptron paramater_grids['Perceptron_LDA'] = {} paramater_grids['Perceptron_LDA']['pipeline'] = \ Pipeline([ ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline), ('feature', LinearDiscriminantAnalysis()), ('classifier', Perceptron()) ]) paramater_grids['Perceptron_LDA']['parameters'] = {} return paramater_grids
#encoding=utf8 import os from sklearn.linear_model.perceptron import Perceptron import pandas as pd if os.path.exists('./step2/result.csv'): os.remove('./step2/result.csv') # 获取训练数据 train_data = pd.read_csv('./step2/train_data.csv') # 获取训练标签 train_label = pd.read_csv('./step2/train_label.csv') train_label = train_label['target'] # 获取测试数据 test_data = pd.read_csv('./step2/test_data.csv') # 训练数据 clf = Perceptron(eta0=0.1, max_iter=500) clf.fit(train_data, train_label) res = clf.predict(test_data) # 保存 res = {"result": res} res = pd.DataFrame(res) res.to_csv('./step2/result.csv', index=0)
acc_svc = accuracy_score(y_pred, y_test) print(acc_svc) # 0.81218 # 线性支持向量机SVC from sklearn.svm import LinearSVC linear_svc = LinearSVC() linear_svc.fit(x_train, y_train) y_pred = linear_svc.predict(x_test) acc_linear_svc = accuracy_score(y_pred, y_test) print(acc_linear_svc) # 0.77157 # 感知机 from sklearn.linear_model.perceptron import Perceptron perceptron = Perceptron() perceptron.fit(x_train, y_train) y_pred = perceptron.predict(x_test) acc_perceptron = accuracy_score(y_pred, y_test) print(acc_perceptron) # 0.77665 # 决策树 from sklearn.tree import DecisionTreeClassifier decisiontree = DecisionTreeClassifier() decisiontree.fit(x_train, y_train) y_pred = decisiontree.predict(x_test) acc_decisiontree = accuracy_score(y_pred, y_test) print(acc_decisiontree) # 0.82233 # 随机森林
def __init__(self): super().__init__() self.classifier = Perceptron(n_iter=50)
class PerceptronMask(StreamModel): """ PerceptronMask A mask for scikit-learn's Perceptron classifier. Because scikit-multiflow's framework require a few interfaces, not present int scikit-learn, this mask allows the first to use classifiers native to the latter. """ def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True, max_iter=1000, tol=1e-3, shuffle=True, verbose=0, eta0=1.0, n_jobs=1, random_state=0, class_weight=None, warm_start=False): self.penalty = penalty self.alpha = alpha self.fit_intercept = fit_intercept self.max_iter = max_iter self.tol = tol self.shuffle = shuffle self.verbose = verbose self.eta0 = eta0 self.n_jobs = n_jobs self.random_state = random_state self.class_weight = class_weight self.warm_start = warm_start super().__init__() self.classifier = Perceptron(penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=self.max_iter, tol=self.tol, shuffle=self.shuffle, verbose=self.verbose, random_state=self.random_state, eta0=self.eta0, warm_start=self.warm_start, class_weight=self.class_weight, n_jobs=self.n_jobs) def fit(self, X, y, classes=None, weight=None): """ fit Calls the Perceptron fit function from sklearn. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. y: Array-like The class labels for all samples in X. classes: Not used. weight: Instance weight. If not provided, uniform weights are assumed. Returns ------- PerceptronMask self """ self.classifier.fit(X, y, sample_weight=weight) return self def partial_fit(self, X, y, classes=None, weight=None): """ partial_fit Calls the Perceptron partial_fit from sklearn. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. y: Array-like The class labels for all samples in X. classes: list, optional A list with all the possible labels of the classification problem. weight: Instance weight. If not provided, uniform weights are assumed. Returns ------- PerceptronMask self """ self.classifier.partial_fit(X, y, classes, weight) return self def predict(self, X): """ predict Uses the current model to predict samples in X. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix. Returns ------- numpy.ndarray A numpy.ndarray containing the predicted labels for all instances in X. """ return np.asarray(self.classifier.predict(X)) def predict_proba(self, X): """ predict_proba Predicts the probability of each sample belonging to each one of the known classes. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) A matrix of the samples we want to predict. Returns ------- numpy.ndarray An array of shape (n_samples, n_features), in which each outer entry is associated with the X entry of the same index. And where the list in index [i] contains len(self.target_values) elements, each of which represents the probability that the i-th sample of X belongs to a certain label. """ return self.classifier._predict_proba_lr(X) def score(self, X, y): """ score Returns the predict performance for the samples in X. Parameters ---------- X: numpy.ndarray of shape (n_sample, n_features) The features matrix. y: Array-like An array-like containing the class labels for all samples in X. Returns ------- float The classifier's score. """ return self.classifier.score(X, y) def get_info(self): params = self.classifier.get_params() info = type(self).__name__ + ':' info += ' - penalty: {}'.format(params['penalty']) info += ' - alpha: {}'.format(params['alpha']) info += ' - fit_intercept: {}'.format(params['fit_intercept']) info += ' - max_iter: {}'.format(params['max_iter']) info += ' - tol: {}'.format(params['tol']) info += ' - shuffle: {}'.format(params['shuffle']) info += ' - eta0: {}'.format(params['eta0']) info += ' - warm_start: {}'.format(params['warm_start']) info += ' - class_weight: {}'.format(params['class_weight']) info += ' - n_jobs: {}'.format(params['n_jobs']) return info def reset(self): self.__init__(penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=self.max_iter, tol=self.tol, shuffle=self.shuffle, verbose=self.verbose, random_state=self.random_state, eta0=self.eta0, warm_start=self.warm_start, class_weight=self.class_weight, n_jobs=self.n_jobs)
def result(): if request.method == 'POST': path = request.files.get('myFile') df = pd.read_csv(path, encoding="ISO-8859-1") filename = request.form['filename'] str1 = request.form['feature'] str2 = request.form['label'] if str1 in list(df) and str2 in list(df): y = df[str2] X = df[str1] else: return render_template('nameError.html') x = [] for subject in X: result = re.sub(r"http\S+", "", subject) replaced = re.sub(r'[^a-zA-Z0-9 ]+', '', result) x.append(replaced) X = pd.Series(x) X = X.str.lower() """ texts = [] for doc in X: doc = nlp(doc, disable=['parser', 'ner']) tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-'] tokens = [tok for tok in tokens if tok not in stopwords] tokens = ' '.join(tokens) texts.append(tokens) X = pd.Series(texts) """ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) tfidfvect = TfidfVectorizer(ngram_range=(1, 1)) X_train_tfidf = tfidfvect.fit_transform(X_train) start = time() clf1 = LinearSVC() clf1.fit(X_train_tfidf, y_train) pred_SVC = clf1.predict(tfidfvect.transform(X_test)) a1 = accuracy_score(y_test, pred_SVC) end = time() print("accuracy SVC: {} and time: {} s".format(a1, (end - start))) start = time() clf2 = LogisticRegression(n_jobs=-1, multi_class='multinomial', solver='newton-cg') clf2.fit(X_train_tfidf, y_train) pred_LR = clf2.predict(tfidfvect.transform(X_test)) a2 = accuracy_score(y_test, pred_LR) end = time() print("accuracy LR: {} and time: {}".format(a2, (end - start))) start = time() clf3 = RandomForestClassifier(n_jobs=-1) clf3.fit(X_train_tfidf, y_train) pred = clf3.predict(tfidfvect.transform(X_test)) a3 = accuracy_score(y_test, pred) end = time() print("accuracy RFC: {} and time: {}".format(a3, (end - start))) start = time() clf4 = MultinomialNB() clf4.fit(X_train_tfidf, y_train) pred = clf4.predict(tfidfvect.transform(X_test)) a4 = accuracy_score(y_test, pred) end = time() print("accuracy MNB: {} and time: {}".format(a4, (end - start))) start = time() clf5 = GaussianNB() clf5.fit(X_train_tfidf.toarray(), y_train) pred = clf5.predict(tfidfvect.transform(X_test).toarray()) a5 = accuracy_score(y_test, pred) end = time() print("accuracy GNB: {} and time: {}".format(a5, (end - start))) start = time() clf6 = LogisticRegressionCV(n_jobs=-1) clf6.fit(X_train_tfidf, y_train) pred_LR = clf6.predict(tfidfvect.transform(X_test)) a6 = accuracy_score(y_test, pred_LR) end = time() print("accuracy LRCV: {} and time: {}".format(a6, (end - start))) start = time() clf7 = AdaBoostClassifier() clf7.fit(X_train_tfidf, y_train) pred_LR = clf7.predict(tfidfvect.transform(X_test)) a7 = accuracy_score(y_test, pred_LR) end = time() print("accuracy ABC: {} and time: {}".format(a7, (end - start))) start = time() clf8 = BernoulliNB() clf8.fit(X_train_tfidf.toarray(), y_train) pred = clf8.predict(tfidfvect.transform(X_test).toarray()) a8 = accuracy_score(y_test, pred) end = time() print("accuracy BNB: {} and time: {}".format(a8, (end - start))) start = time() clf9 = Perceptron(n_jobs=-1) clf9.fit(X_train_tfidf.toarray(), y_train) pred = clf9.predict(tfidfvect.transform(X_test).toarray()) a9 = accuracy_score(y_test, pred) end = time() print("accuracy Per: {} and time: {}".format(a9, (end - start))) start = time() clf10 = RidgeClassifierCV() clf10.fit(X_train_tfidf.toarray(), y_train) pred = clf10.predict(tfidfvect.transform(X_test).toarray()) a10 = accuracy_score(y_test, pred) end = time() print("accuracy RidCV: {} and time: {}".format(a10, (end - start))) start = time() clf11 = SGDClassifier(n_jobs=-1) clf11.fit(X_train_tfidf.toarray(), y_train) pred = clf11.predict(tfidfvect.transform(X_test).toarray()) a11 = accuracy_score(y_test, pred) end = time() print("accuracy SGDC: {} and time: {}".format(a11, (end - start))) start = time() clf12 = SGDClassifier(n_jobs=-1) clf12.fit(X_train_tfidf.toarray(), y_train) pred = clf12.predict(tfidfvect.transform(X_test).toarray()) a12 = accuracy_score(y_test, pred) end = time() print("accuracy XGBC: {} and time: {}".format(a12, (end - start))) acu_list = [a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12] max_list = max(acu_list) if max_list == a1: pickle.dump(clf1, open(filename + '_model', 'wb')) elif max_list == a2: pickle.dump(clf2, open(filename + '_model', 'wb')) elif max_list == a3: pickle.dump(clf3, open(filename + '_model', 'wb')) elif max_list == a4: pickle.dump(clf4, open(filename + '_model', 'wb')) elif max_list == a5: pickle.dump(clf5, open(filename + '_model', 'wb')) elif max_list == a6: pickle.dump(clf6, open(filename + '_model', 'wb')) elif max_list == a7: pickle.dump(clf7, open(filename + '_model', 'wb')) elif max_list == a8: pickle.dump(clf8, open(filename + '_model', 'wb')) elif max_list == a9: pickle.dump(clf9, open(filename + '_model', 'wb')) elif max_list == a10: pickle.dump(clf10, open(filename + '_model', 'wb')) elif max_list == a11: pickle.dump(clf11, open(filename + '_model', 'wb')) elif max_list == a12: pickle.dump(clf12, open(filename + '_model', 'wb')) pickle.dump(tfidfvect, open(filename + '_tfidfVect', 'wb')) return render_template("result.html", ac1=a1, ac2=a2, ac3=a3, ac4=a4, ac5=a5, ac6=a6, ac7=a7, ac8=a8, ac9=a9, ac10=a10, ac11=a11, ac12=a12)
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=i) # Normalizando as variaveis de treino e teste from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Treinando o Perceptron com o conjunto de treino previamente separado #Passa-se 100 vezes pelo conjunto de dados e o learning rate = 0.001 from sklearn.linear_model.perceptron import Perceptron classifier = Perceptron(max_iter=100, eta0=0.1) classifier.fit(X_train, y_train) # Apresentando os dados de teste a rede Perceptron e obtendo as predicoes y_pred = classifier.predict(X_test) # Fazendo a matriz de confusao from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) '''TP = # True Positives, TN = # True Negatives, FP = # False Positives, FN = # False Negatives Acerto = TP / (TP + FP) ''' #Aplicando o metodo de avaliacao