Example #1
0
class PerceptronImpl():

    def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True, max_iter=None, tol=None, shuffle=True, verbose=0, eta0=1.0, n_jobs=None, random_state=None, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight='balanced', warm_start=False, n_iter=None):
        self._hyperparams = {
            'penalty': penalty,
            'alpha': alpha,
            'fit_intercept': fit_intercept,
            'max_iter': max_iter,
            'tol': tol,
            'shuffle': shuffle,
            'verbose': verbose,
            'eta0': eta0,
            'n_jobs': n_jobs,
            'random_state': random_state,
            'early_stopping': early_stopping,
            'validation_fraction': validation_fraction,
            'n_iter_no_change': n_iter_no_change,
            'class_weight': class_weight,
            'warm_start': warm_start,
            'n_iter': n_iter}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)
Example #2
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
def simulation(n, runs, margin=0, p_runs=100, d=2):
    '''
    Run a a given number of simulations to compare svm and perceptron error rates. Generates a set of training and testing points, runs a
    single svm and a given number of perceptrons (avg error is taken)
    :param n: number of points
    :param p_runs: number of perceptrons to average
    :param runs: number of times to sun simulation
    :param d: dimensionality of points
    :return: pandas dataframe, each row
    '''

    all_data = []
    for i in range(runs):
        # Get test data and its gamma, split 80-20 test train
        train_dat, test_dat, margin = generate_labeled_points(n_train=n,
                                                              n_test=ceil(n *
                                                                          25),
                                                              gamma=margin,
                                                              dim=d)

        # Separate train points from labels
        train_points = [x[0] for x in train_dat]
        train_labels = [x[1] for x in train_dat]

        # Separate test points from their labels
        test_points = [x[0] for x in test_dat]
        test_labels = [x[1] for x in test_dat]

        # Run k = p_runs number of perceptrons on this same training data, take their mean error
        p_errors = []
        seed = np.random.RandomState()
        for k in range(p_runs):
            perceptron = Perceptron(random_state=seed)
            perceptron.fit(train_points, train_labels)
            p_errors.append(perceptron.score(test_points, test_labels))
        p_error = np.mean(p_errors)

        # Train and test with single SVM
        svm = SVC(kernel="linear")
        svm.fit(train_points, train_labels)
        svm_error = svm.score(test_points, test_labels)

        all_data.append([n, margin, p_error, svm_error])

    df = pd.DataFrame(
        all_data, columns=['n', 'margin', 'avg perceptron_error', 'svm_error'])
    return df
Example #4
0
 def __init__(self,
              penalty=None,
              alpha=0.0001,
              fit_intercept=True,
              max_iter=1000,
              tol=1e-3,
              shuffle=True,
              verbose=0,
              eta0=1.0,
              n_jobs=1,
              random_state=0,
              class_weight=None,
              warm_start=False):
     self.penalty = penalty
     self.alpha = alpha
     self.fit_intercept = fit_intercept
     self.max_iter = max_iter
     self.tol = tol
     self.shuffle = shuffle
     self.verbose = verbose
     self.eta0 = eta0
     self.n_jobs = n_jobs
     self.random_state = random_state
     self.class_weight = class_weight
     self.warm_start = warm_start
     super().__init__()
     self.classifier = Perceptron(penalty=self.penalty,
                                  alpha=self.alpha,
                                  fit_intercept=self.fit_intercept,
                                  max_iter=self.max_iter,
                                  tol=self.tol,
                                  shuffle=self.shuffle,
                                  verbose=self.verbose,
                                  random_state=self.random_state,
                                  eta0=self.eta0,
                                  warm_start=self.warm_start,
                                  class_weight=self.class_weight,
                                  n_jobs=self.n_jobs)
Example #5
0
def demo():
    """ _test_mol

    This demo tests the MOL learner on a file stream, which reads from 
    the music.csv file.

    The test computes the performance of the MOL learner as well as 
    the time to create the structure and classify all the samples in 
    the file.

    """
    # Setup logging
    logging.basicConfig(format='%(message)s', level=logging.INFO)

    # Setup the file stream
    stream = FileStream("../data/datasets/music.csv", 0, 6)
    stream.prepare_for_use()

    # Setup the classifier, by default it uses Logistic Regression
    # classifier = MultiOutputLearner()
    # classifier = MultiOutputLearner(base_estimator=SGDClassifier(n_iter=100))
    classifier = MultiOutputLearner(base_estimator=Perceptron())

    # Setup the pipeline
    pipe = Pipeline([('classifier', classifier)])

    pretrain_size = 150
    logging.info('Pre training on %s samples', str(pretrain_size))
    logging.info('Total %s samples', str(stream.n_samples))
    X, y = stream.next_sample(pretrain_size)
    # classifier.fit(X, y)
    classes = stream.target_values
    classes_flat = list(set([item for sublist in classes for item in sublist]))
    pipe.partial_fit(X, y, classes=classes_flat)
    count = 0
    true_labels = []
    predicts = []
    init_time = timer()
    logging.info('Evaluating...')
    while stream.has_more_samples():
        X, y = stream.next_sample()
        # p = classifier.predict(X)
        p = pipe.predict(X)
        predicts.extend(p)
        true_labels.extend(y)
        count += 1
    perf = hamming_score(true_labels, predicts)
    logging.info('Evaluation time: %s s', str(timer() - init_time))
    logging.info('Total samples analyzed: %s', str(count))
    logging.info('The classifier\'s static Hamming score    : %0.3f' % perf)
    def preceptron(self):
        # Perceptron
        perceptron = Perceptron(penalty='l2', max_iter=1000, shuffle=True)
        perceptron.fit(self.X_train, self.y_train)

        acc = round(perceptron.score(self.X_train, self.y_train) * 100, 2)
        print("acc with Perceptron:", acc)

        self.y_pred = perceptron.predict(self.X_test)
Example #7
0
    def fit(self, x, y):
        '''
        metodo para treinar a arquitetura de dois niveis
        :x: dados para treinamento
        :y: rotulo dos dados
        :dsel_x: padroes da janela de validacao
        :dsel_y: rotulos da janela de validacao
        '''

        # salvando os dados de trainamento
        self.x_train = x
        self.y_train = y

        # salvando as dificuldades das instancias
        self.H = self.kDN(x, y)

        # treinando o nivel 1 #########################################
        self.levelone = KNeighborsClassifier(self.n_vizinhos)
        self.levelone.fit(x, y)

        # realizando a previsao para o conjunto de treinamento
        y_pred = self.levelone.predict(x)

        # salvando os indices das instancias que foram classificadas erradas
        indices = [i for i in range(len(y)) if y_pred[i] != y[i]]

        # obtendo o limiar de dificuldade do problema
        self.limiar = self.defineThreshold(indices)
        ###############################################################

        # treinando o nivel 2 #########################################
        # obtendo as instancias dificeis
        x_dificeis, y_dificeis = self.hardInstances(x, y, self.limiar)

        # criando o ensemble
        self.ensemble = BaggingClassifier(base_estimator=Perceptron(),
                                          max_samples=0.9,
                                          max_features=1.0,
                                          n_estimators=100)
        self.ensemble.fit(x_dificeis, y_dificeis)

        # treinando o modelo 2
        self.leveltwo = KNORAU(self.ensemble.estimators_, self.n_vizinhos)
        self.leveltwo.fit(x_dificeis, y_dificeis)
Example #8
0
def main():
    #create the training & test sets, skipping the header row with [1:]

    dataset_T = genfromtxt(open('Data/demoTrain.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    dataset_R = genfromtxt(open('Data/demoTarget.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    dataset_v = genfromtxt(open('Data/demoTest.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    trueData = genfromtxt(open('Data/validate.csv', 'r'),
                          delimiter=',',
                          dtype='f8')[:]

    target = [x for x in dataset_R]
    train = [x[:] for x in dataset_T]
    validate = [x[:] for x in dataset_v]
    y = [x for x in trueData]

    test = genfromtxt(open('Data/demoTest.csv', 'r'),
                      delimiter=',',
                      dtype='f8')[:]

    per = Perceptron(n_iter=2, shuffle=True)
    per.fit(train, target)

    #val = per.decision_function(validate)

    val = per.predict(validate)
    score = per.score(validate, y)

    print str(score) + "\n"

    for v in val:
        print v

    a = per.fit_transform(train, target)
    print a
Example #9
0
 def __init__(self,
              penalty=None,
              alpha=0.0001,
              fit_intercept=True,
              max_iter=None,
              tol=None,
              shuffle=True,
              verbose=0,
              eta0=1.0,
              n_jobs=None,
              random_state=None,
              early_stopping=False,
              validation_fraction=0.1,
              n_iter_no_change=5,
              class_weight='balanced',
              warm_start=False,
              n_iter=None):
     self._hyperparams = {
         'penalty': penalty,
         'alpha': alpha,
         'fit_intercept': fit_intercept,
         'max_iter': max_iter,
         'tol': tol,
         'shuffle': shuffle,
         'verbose': verbose,
         'eta0': eta0,
         'n_jobs': n_jobs,
         'random_state': random_state,
         'early_stopping': early_stopping,
         'validation_fraction': validation_fraction,
         'n_iter_no_change': n_iter_no_change,
         'class_weight': class_weight,
         'warm_start': warm_start,
         'n_iter': n_iter
     }
     self._wrapped_model = Op(**self._hyperparams)
Example #10
0
def main():
    #create the training & test sets, skipping the header row with [1:]



    dataset_T = genfromtxt(open('Data/demoTrain.csv','r'), delimiter=',', dtype='f8')[:]    
    
    dataset_R = genfromtxt(open('Data/demoTarget.csv','r'), delimiter=',', dtype='f8')[:]
    
    dataset_v = genfromtxt(open('Data/demoTest.csv','r'), delimiter=',', dtype='f8')[:]
    
    trueData = genfromtxt(open('Data/validate.csv','r'), delimiter=',', dtype='f8')[:]
    
    target = [x for x in dataset_R]
    train = [x[:] for x in dataset_T]
    validate = [x[:] for x in dataset_v]
    y = [x for x in trueData]
    
    
    test = genfromtxt(open('Data/demoTest.csv','r'), delimiter=',', dtype='f8')[:]
     
    
     
    per = Perceptron(n_iter=2, shuffle=True)
    per.fit(train, target)

   
    
    #val = per.decision_function(validate)
    
    val = per.predict(validate)
    score = per.score(validate, y)
    
    print str(score) +"\n"
    
    for v in val: 
        print v
    
    a= per.fit_transform(train,target)
    print a
def test_perceptron(x: list, y: list, learning_rate: float, max_iter: int) -> None:
    perceptron = Perceptron(max_iter=max_iter, alpha=learning_rate)
    perceptron.fit(x, y)
    plot_model(np.array(x), perceptron)
Example #12
0
         x_val, y_val = validacaoCompleta(x_train, y_train)
     elif(validacao[k] == validacao[1]):
         x_val, y_val = validacaoInstanciasFaceis(x_train, y_train, n_vizinhos)
     elif(validacao[k] == validacao[2]):
         x_val, y_val = validacaoInstanciasDificeis(x_train, y_train, n_vizinhos)
 
     # 3.3. End ################################################################################################
 
     # 3.4. Instanciando os classificadores ##########################################################
         
     ########## instanciando o modelo Bagging+REP ###########################################
     # definindo o numero do modelo na tabela
     num_model = 0
     
     # intanciando o classificador
     ensemble = BaggingClassifier(base_estimator=Perceptron(), 
                                 max_samples=qtd_amostras, 
                                 max_features=1.0, 
                                 n_estimators = qtd_modelos)
         
     # treinando o modelo
     ensemble.fit(x_train, y_train)
         
     # realizando a poda 
     ensemble = REP(x_val, y_val, ensemble)
                 
     # computando a previsao
     pred = ensemble.predict(x_test)
                 
     # computando a diversidade do ensemble
     q_statistic = MedidasDiversidade('q', x_val, y_val, ensemble)
Example #13
0
                y_test, pred,
                nome_datasets[h] + '-pct-' + str(pct_trainamento[i]) +
                '- Bagging com DecisionTree [' + str(j) + ']')

            # escrevendo os resultados obtidos
            tabela.Adicionar_Sheet_Linha(num_model, j,
                                         [acuracia, auc, f1measure, gmean])

            # 3.2.1. End ###################################################################################

            # 3.2.2. Bagging com Main ################################################################
            # numero do modelo na tabela
            num_model = 1

            # modelo
            bg = BaggingClassifier(base_estimator=Perceptron(),
                                   max_samples=pct_trainamento[i],
                                   max_features=1.0,
                                   n_estimators=qtd_modelos)
            # treinando o modelo
            bg.fit(x_train, y_train)

            # computando a previsao
            pred = bg.predict(x_test)

            # printando os resultados
            acuracia, auc, f1measure, gmean = printar_resultados(
                y_test, pred,
                nome_datasets[h] + '-pct-' + str(pct_trainamento[i]) +
                '- Bagging com Main [' + str(j) + ']')
Example #14
0
from sklearn.linear_model.perceptron import Perceptron
from numbers_mass import one, two
import itertools
from generate_picture import read_image

x = [
    list(itertools.chain.from_iterable(one)),
    list(itertools.chain.from_iterable(two))
]
print(x)
y = [1, 2]

clf = Perceptron(random_state=241)
clf.fit(x, y)

if __name__ == "__main__":
    print(clf.predict([
        list(itertools.chain.from_iterable(one)),
    ]))
    print(
        clf.predict([
            list(itertools.chain.from_iterable(read_image('1.png'))),
        ]))
Example #15
0
def main():
    while True:
        intro = Text(
            Point(250, 300),
            "2048 TRAINER\n\nTrain model...R\nFull game train...G\n\n>>>Delays between moves<<<\nTest KNN model...E\nTest Perceptron model...P\nRandom model...N\n\n>>>No delays<<<\nTest KNN model...F\nTest Perceptron model...S\nRandom model...M\n\nSimple learning model...L\n\nPRESS Q TO QUIT"
        )
        intro.setSize(20)
        intro.setTextColor(color_rgb(255, 255, 255))

        if os.path.isfile("2048_train.csv"):
            data = pd.read_csv("2048_train.csv",
                               header=None,
                               usecols=[
                                   1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                   14, 15, 16
                               ])
            direction = pd.read_csv("2048_train.csv", header=None, usecols=[0])
            splitRatio = 0.7
            datatrainingSet, datatestSet = splitData(data, splitRatio)
            directiontrainingSet, directiontestSet = splitData(
                direction, splitRatio)
            splitRatio = 0.5
            datatestSet, datadevelopementSet = splitData(
                datatestSet, splitRatio)
            directiontestSet, directiondevelopementSet = splitData(
                directiontestSet, splitRatio)
            direction = np.transpose(direction)
            isPrevData = True
        else:
            isPrevData = False

        if os.path.isfile("best_weights.txt"):
            fileContent = open("best_weights.txt", 'r')
            weightString = fileContent.readlines()
            bestWeightsX = np.array(weightString[0:17])
            bestWeightsX = bestWeightsX.astype(np.float)
            bestWeightsY = np.array(weightString[17:34])
            bestWeightsY = bestWeightsY.astype(np.float)
            randomRadius = float(weightString[34])
            bestLearnScore = float(weightString[35])
            fileContent.close()
        else:
            bestWeightsX = np.empty(0)
            bestWeightsY = np.empty(0)
            for i in range(0, 17):
                bestWeightsX = np.append(bestWeightsX, 0)
                bestWeightsY = np.append(bestWeightsY, 0)
                randomRadius = 100
                bestLearnScore = 0

        win = GraphWin("2048", 500, 600)
        win.setBackground(color_rgb(0, 103, 105))
        intro.draw(win)

        if isPrevData:
            options = ['r', 'g', 'e', 'n', 'f', 'm', 'l', 'q', 's', 'p']
        else:
            options = ['r', 'g', 'n', 'm', 'l', 'q']
        mode = '-'
        while mode not in options:
            mode = win.getKey()

        if mode == 'q':
            win.close()
            return 0

        if mode == 'e' or mode == 'f':
            knn = KNeighborsClassifier(n_neighbors=10)
            knn.fit(datatrainingSet,
                    np.ravel(np.transpose(directiontrainingSet)))

        if mode == 'p' or mode == 's':
            ppn = Perceptron(eta0=0.01, n_iter=10000)
            ppn.fit(datatrainingSet,
                    np.ravel(np.transpose(directiontrainingSet)))

        intro.undraw()
        win.setBackground(color_rgb(100, 100, 100))

        score = 0
        scoreText = Text(Point(250, 550), str(score))
        scoreText.setTextColor(color_rgb(255, 255, 255))
        scoreText.setSize(30)

        board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                          [0, 0, 0, 0]])

        isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                                  [0, 0, 0, 0]])

        #DO THE SAME THING FOR THE TILE NUMBERS
        tileList = []
        numberList = []

        board = spawn(board)
        board = spawn(board)

        for i in range(0, 4):
            for j in range(0, 4):
                tileList.append(
                    Rectangle(Point(i * 125 + 5, j * 125 + 5),
                              Point(i * 125 + 120, j * 125 + 120)))
                numberList.append(
                    Text(Point(i * 125 + 60, j * 125 + 60), str(board[j][i])))
                numberList[i * 4 + j].setSize(20)
                tileList[i * 4 + j].setWidth(4)
                tileList[i * 4 + j].setOutline(color_rgb(255, 255, 255))
                numberList[i * 4 + j].setTextColor(color_rgb(0, 0, 0))
                tileList[i * 4 + j].draw(win)
                numberList[i * 4 + j].draw(win)

        #THE TRAINING DATA
        if not isPrevData:
            data = np.empty((0, 16), float)
            direction = np.empty(0)

        drawBoard(board, win, tileList, numberList)

        move = '-'
        classes = ['w', 's', 'd', 'a']
        moveIter = 0
        nMoves = 0

        totalIterations = 1
        iteration = 1

        currentWeightsX = generateRandomWeights(bestWeightsX,
                                                math.floor(randomRadius))
        currentWeightsY = generateRandomWeights(bestWeightsY,
                                                math.floor(randomRadius))
        currentBestWeightsX = currentWeightsX
        currentBestWeightsY = currentWeightsY

        if mode == 'l':
            maxIterations = int(
                input("Enter number of generations to simulate:"))

        while True:

            score = calcScore(board)

            if mode != 'l':
                drawBoard(board, win, tileList, numberList)

            scoreText.undraw()
            if mode != 'l':
                scoreText.setSize(30)
                scoreText.setText(str(score))
                scoreText.draw(win)

            for i in range(0, 4):
                for j in range(0, 4):
                    isChangeBoard[i][j] = board[i][j]

            if mode == 'r' or mode == 'g':
                move = win.getKey()
                if nMoves % 30 == 0 and mode == 'r':
                    board = generateRandomGrid(board)
                nMoves += 1
            else:
                if mode == 'e' or mode == 'n' or mode == 'p':
                    time.sleep(0.5)
                if mode == 'e' or mode == 'f':
                    probs = np.ravel(
                        knn.predict_proba(gridToData(board).reshape(1, -1)))
                    ranks = [0] * len(probs)
                    for i, x in enumerate(
                            sorted(range(len(probs)), key=lambda y: probs[y])):
                        ranks[x] = i
                    move = classes[ranks[moveIter]]
                    moveIter += 1
                elif mode == 'm':
                    move = classes[random.randint(0, 3)]
                elif mode == 'l':
                    move = classes[calculateDirection(currentWeightsX, board) *
                                   2 +
                                   calculateDirection(currentWeightsY, board)]
                    if moveIter > 0:
                        move = classes[random.randint(0, 3)]
                    moveIter += 1
                elif mode == 'p' or mode == 's':
                    move = ppn.predict(gridToData(board).reshape(1, -1))
                    if moveIter > 0:
                        move = classes[random.randint(0, 3)]
                    moveIter += 1

            if move == 'w':
                board = shift(board, 0)
            elif move == 'd':
                board = shift(board, 1)
            elif move == 's':
                board = shift(board, 2)
            elif move == 'a':
                board = shift(board, 3)
            elif move == 'q':
                break

            if lose(board):
                print(score)
                if mode == 'l':
                    board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                                      [0, 0, 0, 0]])

                    isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0],
                                              [0, 0, 0, 0], [0, 0, 0, 0]])

                    board = spawn(board)
                    board = spawn(board)

                    scoreText.setSize(16)
                    scoreText.setText("Score: " + str(score) +
                                      " -- iteration: " + str(iteration) +
                                      " -- r: " + str(randomRadius))

                    if score > bestLearnScore:
                        bestLearnScore = score
                        currentBestWeightsX = currentWeightsX
                        currentBestWeightsY = currentWeightsY
                        print("current best" + str(currentBestWeightsX) +
                              str(currentBestWeightsY))

                    if iteration == 1000:
                        drawBoard(board, win, tileList, numberList)
                        print("best score: " + str(bestLearnScore))
                        iteration = 1
                        randomRadius *= 0.9
                        bestWeightsX = currentBestWeightsX
                        bestWeightsY = currentBestWeightsY
                        print("best weights:" + str(bestWeightsX) +
                              str(bestWeightsY))
                        if totalIterations / 1000 >= maxIterations:
                            toWrite = np.empty(0)
                            toWrite = np.append(toWrite, bestWeightsX)
                            toWrite = np.append(toWrite, bestWeightsY)
                            toWrite = np.append(toWrite, randomRadius)
                            toWrite = np.append(toWrite, bestLearnScore)
                            np.savetxt("best_weights.txt", toWrite)
                            #WRITE WEIGHTS N STUFF
                            break

                    totalIterations += 1
                    iteration += 1

                    currentWeightsX = generateRandomWeights(
                        bestWeightsX, math.floor(randomRadius))
                    currentWeightsY = generateRandomWeights(
                        bestWeightsY, math.floor(randomRadius))
                else:
                    break

            isChanged = False
            for i in range(0, 4):
                for j in range(0, 4):
                    if isChangeBoard[i][j] != board[i][j]:
                        isChanged = True

            if isChanged:
                moveIter = 0
                if mode == 'r' or mode == 'g':
                    direction = np.append(direction, move)
                    data = np.vstack([data, gridToData2(board)])
                board = spawn(board)

        score = calcScore(board)

        scoreText.setSize(16)
        scoreText.setText(str(score) + " -- YOU LOSE (Press Q)")
        while move != 'q':
            move = win.getKey()
        win.close()

        if mode == 'r' or mode == 'g':
            pd.DataFrame.to_csv(pd.DataFrame(
                np.hstack([
                    np.reshape(directiontrainingSet,
                               [np.shape(directiontrainingSet)[0], 1]),
                    datatrainingSet
                ])),
                                "2048_train.csv",
                                index=False,
                                header=False)
Example #16
0
data = [
    array([  # Vector de [forma;textura;peso]
        [1, 1, -1],  # Manzana
        [1, -1, -1],  # Naranja
        [1, 1, -1],  # Manzana
        [1, -1, -1],  # Naranja
    ]), array([
        1,
        -1,
        1,
        1
    ])
]

# Crear una instancia de Perceptron con un máximo de 100 épocas
perceptron = Perceptron(max_iter=1000)

# Entrenar la red neuronal
perceptron.fit(data[0], data[1])


def test_net_accuracy():
    # Probar la certeza de la red neuronal con la data previamente alimentada
    # para filtrar una manzana y una naranja.
    data = [
        array([
            [1, 1, -1],   # Manzana
            [1, -1, -1],  # Naranja
            [-1, -1, -1]  # Una naranja que es elíptica
        ]), array([
            1,
Example #17
0
import numpy as np
from matplotlib import pyplot

from sklearn.linear_model.perceptron import Perceptron
from sklearn.metrics import accuracy_score

import usps
import perceptron # Pour la fonction two_classes

data, labels           = usps.load_train()
data_test, labels_test = usps.load_test()

for k in range(10):
    labels_k = perceptron.two_classes(labels, k)

    net = Perceptron()
    net.fit(data, labels_k)
    output_train = net.predict(data)
    output_test  = net.predict(data_test)
    print(k)
    print("  Score (train)", accuracy_score(labels_k, output_train))
    labels_k_test = perceptron.two_classes(labels_test, k)
    print("  Score (test)", accuracy_score(labels_k_test, output_test))
Example #18
0
def main():
    
    # 1. Definindo variaveis para o experimento #########################################################################
    qtd_modelos = 100
    qtd_execucoes = 30
    qtd_amostras = 0.9
    qtd_folds = 10
    n_vizinhos = 7
    nome_datasets = ['kc1', 'kc2']
    # 1. End ############################################################################################################

    # for para variar entre os datasets
    for h in range(len(nome_datasets)):
    
        # 2. Lendo os datasets  ############################################################################################
        # lendo o dataset
        data = pd.read_csv('dataset/'+nome_datasets[h]+'.csv')
        
        # obtendo os padroes e seus respectivos rotulos
        df_x = np.asarray(data.iloc[:,0:-1])
        df_y = np.asarray(data.iloc[:,-1])
        
        
        # 2.1. Criando a tabela para salvar os dados  #################################################
        # criando a tabela que vai acomodar o modelo
        tabela = Tabela_excel()
        tabela.Criar_tabela(nome_tabela='arquivos_lista03/'+nome_datasets[h], 
                            folhas=['OLA', 'LCA', 'KNORA-E', 'KNORA-U', 'Arquitetura'], 
                            cabecalho=['acuracy', 'auc', 'fmeasure', 'gmean'], 
                            largura_col=5000)
        # 2.1. End #####################################################################################
        # 2. End ############################################################################################################
        
        # executando os algoritmos x vezes
        for j in range(qtd_execucoes):
            
            # 3. Dividindo os dados para treinamento e teste ################################################################
            # quebrando o dataset sem sobreposicao em 90% para treinamento e 10% para teste  
            skf = StratifiedKFold(df_y, n_folds=qtd_folds)
                
            # tomando os indices para treinamento e teste
            train_index, test_index = next(iter(skf))
                        
            # obtendo os conjuntos de dados para treinamento e teste
            x_train = df_x[train_index]
            y_train = df_y[train_index]
            x_test = df_x[test_index]
            y_test = df_y[test_index]
            # 3. End #########################################################################################################
            
            
            # 4. Gerando o pool de classificadores  ##########################################################################
            # intanciando o classificador
            ensemble = BaggingClassifier(base_estimator=Perceptron(), 
                                            max_samples=qtd_amostras, 
                                            max_features=1.0, 
                                            n_estimators = qtd_modelos)
                    
            # treinando o modelo
            ensemble.fit(x_train, y_train)
            # 4. End  ########################################################################################################
            
            # 5. Instanciando os classificadores ##########################################################
            
            ################################### OLA ########################################################
            executar_modelo('OLA', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela)
            ################################################################################################
            
            ################################### LCA ########################################################
            executar_modelo('LCA', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela)
            ################################################################################################
            
            ################################### KNORAE #####################################################
            executar_modelo('KNORAE', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela)
            ################################################################################################
            
            ################################### KNORAU #####################################################
            executar_modelo('KNORAU', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela)
            ################################################################################################
            
            ################################### Arquitetura ################################################
            # importando o metodo
            arq = Arquitetura(n_vizinhos)
            # treinando o metodo
            arq.fit(x_train, y_train)
            # realizando a previsao
            pred = arq.predict(x_test)
            # printando os resultados
            nome = 'Arquitetura'
            acuracia, auc, f1measure, gmean = printar_resultados(y_test, pred, nome_datasets[h]+'-'+nome+'-['+str(j)+']')
            # escrevendo os resultados obtidos
            tabela.Adicionar_Sheet_Linha(4, j, [acuracia, auc, f1measure, gmean])
Example #19
0
			'NearestNeighbors':NearestNeighbors(),
			'Normalizer':Normalizer(),
			'NuSVC':NuSVC(),
			'NuSVR':NuSVR(),
			'Nystroem':Nystroem(),
			'OAS':OAS(),
			'OneClassSVM':OneClassSVM(),
			'OrthogonalMatchingPursuit':OrthogonalMatchingPursuit(),
			'OrthogonalMatchingPursuitCV':OrthogonalMatchingPursuitCV(),
			'PCA':PCA(),
			'PLSCanonical':PLSCanonical(),
			'PLSRegression':PLSRegression(),
			'PLSSVD':PLSSVD(),
			'PassiveAggressiveClassifier':PassiveAggressiveClassifier(),
			'PassiveAggressiveRegressor':PassiveAggressiveRegressor(),
			'Perceptron':Perceptron(),
			'ProjectedGradientNMF':ProjectedGradientNMF(),
			'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(),
			'RANSACRegressor':RANSACRegressor(),
			'RBFSampler':RBFSampler(),
			'RadiusNeighborsClassifier':RadiusNeighborsClassifier(),
			'RadiusNeighborsRegressor':RadiusNeighborsRegressor(),
			'RandomForestClassifier':RandomForestClassifier(),
			'RandomForestRegressor':RandomForestRegressor(),
			'RandomizedLasso':RandomizedLasso(),
			'RandomizedLogisticRegression':RandomizedLogisticRegression(),
			'RandomizedPCA':RandomizedPCA(),
			'Ridge':Ridge(),
			'RidgeCV':RidgeCV(),
			'RidgeClassifier':RidgeClassifier(),
			'RidgeClassifierCV':RidgeClassifierCV(),
Example #20
0
def task2(sneakers_data: pd.DataFrame, boots_data: pd.DataFrame,
          sneakers_labels: pd.DataFrame, boots_labels: pd.DataFrame):
    full_data = sneakers_data.append(boots_data)
    full_labels = sneakers_labels.append(boots_labels)

    train_times = []
    predict_times = []
    accuracies = []

    print("\tTask 2 output")
    num_splits = 4
    for train_index, test_index in KFold(n_splits=num_splits,
                                         shuffle=True).split(full_data):
        train_data = full_data.iloc[train_index]
        test_data = full_data.iloc[test_index]

        train_labels = full_labels.iloc[train_index]
        test_labels = full_labels.iloc[test_index]

        pctrn = Perceptron()

        train_start_time = timeit.default_timer()
        pctrn.fit(X=train_data, y=train_labels)
        train_end_time = timeit.default_timer()

        train_time = train_end_time - train_start_time
        train_times.append(train_time)

        print("\tPerceptron took", train_time, "seconds to train on data")

        predict_start_time = timeit.default_timer()
        prediction = pctrn.predict(test_data)
        predict_end_time = timeit.default_timer()

        predict_time = predict_end_time - predict_start_time
        predict_times.append(predict_time)

        print("\tPerceptron took", predict_time,
              "seconds to make a prediction")

        accuracy = accuracy_score(test_labels, prediction) * 100
        accuracies.append(accuracy)

        print("\tAccuracy for perceptron", accuracy, "%")

        confusion = confusion_matrix(test_labels, prediction)

        percent_true_pos = (confusion[0, 0] / len(test_labels)) * 100
        percent_false_pos = (confusion[0, 1] / len(test_labels)) * 100
        percent_true_neg = (confusion[1, 1] / len(test_labels)) * 100
        percent_false_neg = (confusion[1, 0] / len(test_labels)) * 100

        print("\tPerceptron confusion matrix true positive:", percent_true_pos,
              "%")
        print("\tPerceptron confusion matrix false positive:",
              percent_false_pos, "%")
        print("\tPerceptron confusion matrix true negative:", percent_true_neg,
              "%")
        print("\tPerceptron confusion matrix false negative:",
              percent_false_neg, "%\n")

    print("\tThe minimum train time was", np.min(train_times), "seconds")
    print("\tThe maximum train time was", np.max(train_times), "seconds")
    print("\tThe average train time was", np.mean(train_times), "seconds\n")

    print("\tThe minimum prediction time was", np.min(predict_times),
          "seconds")
    print("\tThe maximum prediction time was", np.max(predict_times),
          "seconds")
    print("\tThe average prediction time was", np.mean(predict_times),
          "seconds\n")

    print("\tMinimum Accuracy was", np.min(accuracies), "%")
    print("\tMaximum accuracy was", np.max(accuracies), "%")
    print("\tAverage accuracy was", np.mean(accuracies), "%\n")

    print(
        "\tTotal train time for all k-folds in perceptron with gamma value of =",
        np.sum(train_times), "seconds")
    print(
        "\tTotal prediction time for all k-folds in perceptron with gamma of =",
        np.sum(predict_times), "seconds\n")
Example #21
0
class PerceptronMask(BaseClassifier):
    """ PerceptronMask

    A mask for scikit-learn's Perceptron classifier.

    Because scikit-multiflow's framework require a few interfaces, not present 
    int scikit-learn, this mask allows the first to use classifiers native to 
    the latter.

    """
    def __init__(self):
        super().__init__()
        self.classifier = Perceptron(n_iter=50)

    def fit(self, X, y, classes = None, weight=None):
        """ fit

        Calls the Perceptron fit function from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: Not used.

        weight: Instance weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.fit(X, y, sample_weight=weight)
        return self

    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit

        Calls the Perceptron partial_fit from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: list, optional
            A list with all the possible labels of the classification problem.

        weight: Instance weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.partial_fit(X, y, classes, weight)
        return self

    def predict(self, X):
        """ predict

        Uses the current model to predict samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        Returns
        -------
        list
            A list containing the predicted labels for all instances in X.

        """
        return self.classifier.predict(X)


    def predict_proba(self, X):
        """ predict_proba

        Predicts the probability of each sample belonging to each one of the 
        known classes.
    
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            A matrix of the samples we want to predict.
    
        Returns
        -------
        numpy.ndarray
            An array of shape (n_samples, n_features), in which each outer entry is 
            associated with the X entry of the same index. And where the list in 
            index [i] contains len(self.classes) elements, each of which represents 
            the probability that the i-th sample of X belongs to a certain label.
    
        """
        return self.classifier.predict_proba(X)

    def score(self, X, y):
        """ score

        Returns the predict performance for the samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_sample, n_features)
            The features matrix.

        y: Array-like
            An array-like containing the class labels for all samples in X.

        Returns
        -------
        float
            The classifier's score.

        """
        return self.classifier.score(X, y)

    def get_info(self):
        params = self.classifier.get_params()
        penalty = params['penalty']
        penalty = 'None' if penalty is None else penalty
        fit_int = params['fit_intercept']
        fit_int = 'True' if fit_int else 'False'
        shuffle = params['shuffle']
        shuffle = 'True' if shuffle else 'False'
        return 'Perceptron: penalty: ' + penalty + \
               '  -  alpha: ' + str(round(params['alpha'], 3)) + \
               '  -  fit_intercept: ' + fit_int + \
               '  -  n_iter: ' + str(params['n_iter']) + \
               '  -  shuffle: ' + shuffle
def get_paramater_grids(data_path):
    """Returns all the Models / Paramters I'm searching over"""

    pca_nfeatures = [0, 1, 2, 5, 10, 15, 20, 24]
    # 0 is a pass through value that does not do PCA
    # 24 is safer because some features might be found in the training set. This might not be the safest design, but it works for now

    # some may have more or few features based on training. 24 seems safe

    # Number of training points allowed to be on the wrong side of hyperplane.
    # A point is fractionally over the line if it violates the margin
    # 0 is a lower bound
    # The approximent number of training points (170)/2 seems like a reasonable upper bound (n
    # The sklearn formulation is different than what was covered in the book
    svc_gamma = np.logspace(-7, 0, 8)  #kernal parameter

    paramater_grids = {}

    # SVC
    paramater_grids['SVC'] = {}
    paramater_grids['SVC']['pipeline']=\
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', PCA()),
            ('classifier', SVC())
            ])
    paramater_grids['SVC']['parameters'] = \
        {'classifier__kernel':('linear','rbf','sigmoid'), #Linear doesn't gues gamma, but whatevs
         'classifier__C':  np.logspace(-2,4,7),
         'classifier__gamma': svc_gamma,
         'feature__n_components': pca_nfeatures}

    # SVC with a polynomial kernal
    paramater_grids['SVC_poly'] = {}
    paramater_grids['SVC_poly']['pipeline']=\
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', PCA()),
            ('classifier', SVC(kernel='poly'))
            ])
    paramater_grids['SVC_poly']['parameters'] = \
        {'classifier__degree': [2, 3, 4, 5],
         'classifier__C':  np.logspace(-2,3,6),
         'classifier__gamma': svc_gamma,
         'feature__n_components': pca_nfeatures}

    # Gaussian Mixture
    paramater_grids['GM'] = {}
    paramater_grids['GM']['pipeline']=\
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', PCA()),
            ('classifier', GM(max_iter=250,n_init=25))
            ])
    paramater_grids['GM']['parameters'] = \
        {'classifier__n_components': [1, 2, 4, 8, 16, 32],
         'classifier__covariance_type': ['full', 'tied', 'diag', 'spherical'],
         'feature__n_components': pca_nfeatures}

    # Perceptron
    paramater_grids['Perceptron_PCA'] = {}
    paramater_grids['Perceptron_PCA']['pipeline'] = \
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', PCA()),
            ('classifier', Perceptron())
        ])
    paramater_grids['Perceptron_PCA']['parameters'] = \
        {'feature__n_components': pca_nfeatures}

    # Perceptron
    paramater_grids['Perceptron_LDA'] = {}
    paramater_grids['Perceptron_LDA']['pipeline'] = \
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', LinearDiscriminantAnalysis()),
            ('classifier', Perceptron())
        ])
    paramater_grids['Perceptron_LDA']['parameters'] = {}

    return paramater_grids
Example #23
0
#encoding=utf8
import os
from sklearn.linear_model.perceptron import Perceptron
import pandas as pd

if os.path.exists('./step2/result.csv'):
    os.remove('./step2/result.csv')

# 获取训练数据
train_data = pd.read_csv('./step2/train_data.csv')
# 获取训练标签
train_label = pd.read_csv('./step2/train_label.csv')
train_label = train_label['target']
# 获取测试数据
test_data = pd.read_csv('./step2/test_data.csv')

# 训练数据
clf = Perceptron(eta0=0.1, max_iter=500)
clf.fit(train_data, train_label)
res = clf.predict(test_data)

# 保存
res = {"result": res}
res = pd.DataFrame(res)
res.to_csv('./step2/result.csv', index=0)
Example #24
0
acc_svc = accuracy_score(y_pred, y_test)
print(acc_svc)  # 0.81218

# 线性支持向量机SVC
from sklearn.svm import LinearSVC

linear_svc = LinearSVC()
linear_svc.fit(x_train, y_train)
y_pred = linear_svc.predict(x_test)
acc_linear_svc = accuracy_score(y_pred, y_test)
print(acc_linear_svc)  # 0.77157

# 感知机
from sklearn.linear_model.perceptron import Perceptron

perceptron = Perceptron()
perceptron.fit(x_train, y_train)
y_pred = perceptron.predict(x_test)
acc_perceptron = accuracy_score(y_pred, y_test)
print(acc_perceptron)  # 0.77665

# 决策树
from sklearn.tree import DecisionTreeClassifier

decisiontree = DecisionTreeClassifier()
decisiontree.fit(x_train, y_train)
y_pred = decisiontree.predict(x_test)
acc_decisiontree = accuracy_score(y_pred, y_test)
print(acc_decisiontree)  # 0.82233

# 随机森林
Example #25
0
 def __init__(self):
     super().__init__()
     self.classifier = Perceptron(n_iter=50)
Example #26
0
class PerceptronMask(StreamModel):
    """ PerceptronMask

    A mask for scikit-learn's Perceptron classifier.

    Because scikit-multiflow's framework require a few interfaces, not present 
    int scikit-learn, this mask allows the first to use classifiers native to 
    the latter.

    """
    def __init__(self,
                 penalty=None,
                 alpha=0.0001,
                 fit_intercept=True,
                 max_iter=1000,
                 tol=1e-3,
                 shuffle=True,
                 verbose=0,
                 eta0=1.0,
                 n_jobs=1,
                 random_state=0,
                 class_weight=None,
                 warm_start=False):
        self.penalty = penalty
        self.alpha = alpha
        self.fit_intercept = fit_intercept
        self.max_iter = max_iter
        self.tol = tol
        self.shuffle = shuffle
        self.verbose = verbose
        self.eta0 = eta0
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.class_weight = class_weight
        self.warm_start = warm_start
        super().__init__()
        self.classifier = Perceptron(penalty=self.penalty,
                                     alpha=self.alpha,
                                     fit_intercept=self.fit_intercept,
                                     max_iter=self.max_iter,
                                     tol=self.tol,
                                     shuffle=self.shuffle,
                                     verbose=self.verbose,
                                     random_state=self.random_state,
                                     eta0=self.eta0,
                                     warm_start=self.warm_start,
                                     class_weight=self.class_weight,
                                     n_jobs=self.n_jobs)

    def fit(self, X, y, classes=None, weight=None):
        """ fit

        Calls the Perceptron fit function from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: Not used.

        weight: Instance weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.fit(X, y, sample_weight=weight)
        return self

    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit

        Calls the Perceptron partial_fit from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: list, optional
            A list with all the possible labels of the classification problem.

        weight: Instance weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.partial_fit(X, y, classes, weight)
        return self

    def predict(self, X):
        """ predict

        Uses the current model to predict samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        Returns
        -------
        numpy.ndarray
            A numpy.ndarray containing the predicted labels for all instances in X.

        """
        return np.asarray(self.classifier.predict(X))

    def predict_proba(self, X):
        """ predict_proba

        Predicts the probability of each sample belonging to each one of the 
        known classes.
    
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            A matrix of the samples we want to predict.
    
        Returns
        -------
        numpy.ndarray
            An array of shape (n_samples, n_features), in which each outer entry is 
            associated with the X entry of the same index. And where the list in 
            index [i] contains len(self.target_values) elements, each of which represents
            the probability that the i-th sample of X belongs to a certain label.
    
        """
        return self.classifier._predict_proba_lr(X)

    def score(self, X, y):
        """ score

        Returns the predict performance for the samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_sample, n_features)
            The features matrix.

        y: Array-like
            An array-like containing the class labels for all samples in X.

        Returns
        -------
        float
            The classifier's score.

        """
        return self.classifier.score(X, y)

    def get_info(self):
        params = self.classifier.get_params()
        info = type(self).__name__ + ':'
        info += ' - penalty: {}'.format(params['penalty'])
        info += ' - alpha: {}'.format(params['alpha'])
        info += ' - fit_intercept: {}'.format(params['fit_intercept'])
        info += ' - max_iter: {}'.format(params['max_iter'])
        info += ' - tol: {}'.format(params['tol'])
        info += ' - shuffle: {}'.format(params['shuffle'])
        info += ' - eta0: {}'.format(params['eta0'])
        info += ' - warm_start: {}'.format(params['warm_start'])
        info += ' - class_weight: {}'.format(params['class_weight'])
        info += ' - n_jobs: {}'.format(params['n_jobs'])
        return info

    def reset(self):
        self.__init__(penalty=self.penalty,
                      alpha=self.alpha,
                      fit_intercept=self.fit_intercept,
                      max_iter=self.max_iter,
                      tol=self.tol,
                      shuffle=self.shuffle,
                      verbose=self.verbose,
                      random_state=self.random_state,
                      eta0=self.eta0,
                      warm_start=self.warm_start,
                      class_weight=self.class_weight,
                      n_jobs=self.n_jobs)
def result():
    if request.method == 'POST':
        path = request.files.get('myFile')

        df = pd.read_csv(path, encoding="ISO-8859-1")

        filename = request.form['filename']

        str1 = request.form['feature']
        str2 = request.form['label']

        if str1 in list(df) and str2 in list(df):
            y = df[str2]
            X = df[str1]
        else:
            return render_template('nameError.html')

        x = []
        for subject in X:
            result = re.sub(r"http\S+", "", subject)
            replaced = re.sub(r'[^a-zA-Z0-9 ]+', '', result)
            x.append(replaced)
        X = pd.Series(x)

        X = X.str.lower()
        """
        texts = []
        for doc in X:
            doc = nlp(doc, disable=['parser', 'ner'])
            tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-']
            tokens = [tok for tok in tokens if tok not in stopwords]
            tokens = ' '.join(tokens)
            texts.append(tokens)

        X = pd.Series(texts)
        """
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33)

        tfidfvect = TfidfVectorizer(ngram_range=(1, 1))
        X_train_tfidf = tfidfvect.fit_transform(X_train)

        start = time()
        clf1 = LinearSVC()
        clf1.fit(X_train_tfidf, y_train)
        pred_SVC = clf1.predict(tfidfvect.transform(X_test))

        a1 = accuracy_score(y_test, pred_SVC)
        end = time()
        print("accuracy SVC: {} and time: {} s".format(a1, (end - start)))

        start = time()
        clf2 = LogisticRegression(n_jobs=-1,
                                  multi_class='multinomial',
                                  solver='newton-cg')
        clf2.fit(X_train_tfidf, y_train)
        pred_LR = clf2.predict(tfidfvect.transform(X_test))
        a2 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy LR: {} and time: {}".format(a2, (end - start)))

        start = time()
        clf3 = RandomForestClassifier(n_jobs=-1)

        clf3.fit(X_train_tfidf, y_train)
        pred = clf3.predict(tfidfvect.transform(X_test))
        a3 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy RFC: {} and time: {}".format(a3, (end - start)))

        start = time()
        clf4 = MultinomialNB()

        clf4.fit(X_train_tfidf, y_train)
        pred = clf4.predict(tfidfvect.transform(X_test))
        a4 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy MNB: {} and time: {}".format(a4, (end - start)))

        start = time()
        clf5 = GaussianNB()

        clf5.fit(X_train_tfidf.toarray(), y_train)
        pred = clf5.predict(tfidfvect.transform(X_test).toarray())
        a5 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy GNB: {} and time: {}".format(a5, (end - start)))

        start = time()
        clf6 = LogisticRegressionCV(n_jobs=-1)
        clf6.fit(X_train_tfidf, y_train)
        pred_LR = clf6.predict(tfidfvect.transform(X_test))
        a6 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy LRCV: {} and time: {}".format(a6, (end - start)))

        start = time()
        clf7 = AdaBoostClassifier()
        clf7.fit(X_train_tfidf, y_train)
        pred_LR = clf7.predict(tfidfvect.transform(X_test))
        a7 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy ABC: {} and time: {}".format(a7, (end - start)))

        start = time()
        clf8 = BernoulliNB()

        clf8.fit(X_train_tfidf.toarray(), y_train)
        pred = clf8.predict(tfidfvect.transform(X_test).toarray())
        a8 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy BNB: {} and time: {}".format(a8, (end - start)))

        start = time()
        clf9 = Perceptron(n_jobs=-1)

        clf9.fit(X_train_tfidf.toarray(), y_train)
        pred = clf9.predict(tfidfvect.transform(X_test).toarray())
        a9 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy Per: {} and time: {}".format(a9, (end - start)))
        start = time()
        clf10 = RidgeClassifierCV()

        clf10.fit(X_train_tfidf.toarray(), y_train)
        pred = clf10.predict(tfidfvect.transform(X_test).toarray())
        a10 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy RidCV: {} and time: {}".format(a10, (end - start)))

        start = time()
        clf11 = SGDClassifier(n_jobs=-1)

        clf11.fit(X_train_tfidf.toarray(), y_train)
        pred = clf11.predict(tfidfvect.transform(X_test).toarray())
        a11 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy SGDC: {} and time: {}".format(a11, (end - start)))
        start = time()
        clf12 = SGDClassifier(n_jobs=-1)

        clf12.fit(X_train_tfidf.toarray(), y_train)
        pred = clf12.predict(tfidfvect.transform(X_test).toarray())
        a12 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy XGBC: {} and time: {}".format(a12, (end - start)))

        acu_list = [a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12]
        max_list = max(acu_list)

        if max_list == a1:
            pickle.dump(clf1, open(filename + '_model', 'wb'))
        elif max_list == a2:
            pickle.dump(clf2, open(filename + '_model', 'wb'))
        elif max_list == a3:
            pickle.dump(clf3, open(filename + '_model', 'wb'))
        elif max_list == a4:
            pickle.dump(clf4, open(filename + '_model', 'wb'))
        elif max_list == a5:
            pickle.dump(clf5, open(filename + '_model', 'wb'))
        elif max_list == a6:
            pickle.dump(clf6, open(filename + '_model', 'wb'))
        elif max_list == a7:
            pickle.dump(clf7, open(filename + '_model', 'wb'))
        elif max_list == a8:
            pickle.dump(clf8, open(filename + '_model', 'wb'))
        elif max_list == a9:
            pickle.dump(clf9, open(filename + '_model', 'wb'))
        elif max_list == a10:
            pickle.dump(clf10, open(filename + '_model', 'wb'))
        elif max_list == a11:
            pickle.dump(clf11, open(filename + '_model', 'wb'))
        elif max_list == a12:
            pickle.dump(clf12, open(filename + '_model', 'wb'))

        pickle.dump(tfidfvect, open(filename + '_tfidfVect', 'wb'))

        return render_template("result.html",
                               ac1=a1,
                               ac2=a2,
                               ac3=a3,
                               ac4=a4,
                               ac5=a5,
                               ac6=a6,
                               ac7=a7,
                               ac8=a8,
                               ac9=a9,
                               ac10=a10,
                               ac11=a11,
                               ac12=a12)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=i)

    # Normalizando as variaveis de treino e teste
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # Treinando o Perceptron com o conjunto de treino previamente separado
    #Passa-se 100  vezes pelo conjunto de dados e o learning rate = 0.001
    from sklearn.linear_model.perceptron import Perceptron
    classifier = Perceptron(max_iter=100, eta0=0.1)
    classifier.fit(X_train, y_train)

    # Apresentando os dados de teste a rede Perceptron e obtendo as predicoes
    y_pred = classifier.predict(X_test)

    # Fazendo a matriz de confusao
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)
    '''TP = # True Positives, TN = # True Negatives,
     FP = # False Positives, FN = # False Negatives
    
    Acerto = TP / (TP + FP)
    '''
    #Aplicando o metodo de avaliacao