def preceptron(self):
        # Perceptron
        perceptron = Perceptron(penalty='l2', max_iter=1000, shuffle=True)
        perceptron.fit(self.X_train, self.y_train)

        acc = round(perceptron.score(self.X_train, self.y_train) * 100, 2)
        print("acc with Perceptron:", acc)

        self.y_pred = perceptron.predict(self.X_test)
Beispiel #2
0
def demo():
    """ _test_mol

    This demo tests the MOL learner on a file stream, which reads from 
    the music.csv file.

    The test computes the performance of the MOL learner as well as 
    the time to create the structure and classify all the samples in 
    the file.

    """
    # Setup logging
    logging.basicConfig(format='%(message)s', level=logging.INFO)

    # Setup the file stream
    stream = FileStream("../data/datasets/music.csv", 0, 6)
    stream.prepare_for_use()

    # Setup the classifier, by default it uses Logistic Regression
    # classifier = MultiOutputLearner()
    # classifier = MultiOutputLearner(base_estimator=SGDClassifier(n_iter=100))
    classifier = MultiOutputLearner(base_estimator=Perceptron())

    # Setup the pipeline
    pipe = Pipeline([('classifier', classifier)])

    pretrain_size = 150
    logging.info('Pre training on %s samples', str(pretrain_size))
    logging.info('Total %s samples', str(stream.n_samples))
    X, y = stream.next_sample(pretrain_size)
    # classifier.fit(X, y)
    classes = stream.target_values
    classes_flat = list(set([item for sublist in classes for item in sublist]))
    pipe.partial_fit(X, y, classes=classes_flat)
    count = 0
    true_labels = []
    predicts = []
    init_time = timer()
    logging.info('Evaluating...')
    while stream.has_more_samples():
        X, y = stream.next_sample()
        # p = classifier.predict(X)
        p = pipe.predict(X)
        predicts.extend(p)
        true_labels.extend(y)
        count += 1
    perf = hamming_score(true_labels, predicts)
    logging.info('Evaluation time: %s s', str(timer() - init_time))
    logging.info('Total samples analyzed: %s', str(count))
    logging.info('The classifier\'s static Hamming score    : %0.3f' % perf)
def simulation(n, runs, margin=0, p_runs=100, d=2):
    '''
    Run a a given number of simulations to compare svm and perceptron error rates. Generates a set of training and testing points, runs a
    single svm and a given number of perceptrons (avg error is taken)
    :param n: number of points
    :param p_runs: number of perceptrons to average
    :param runs: number of times to sun simulation
    :param d: dimensionality of points
    :return: pandas dataframe, each row
    '''

    all_data = []
    for i in range(runs):
        # Get test data and its gamma, split 80-20 test train
        train_dat, test_dat, margin = generate_labeled_points(n_train=n,
                                                              n_test=ceil(n *
                                                                          25),
                                                              gamma=margin,
                                                              dim=d)

        # Separate train points from labels
        train_points = [x[0] for x in train_dat]
        train_labels = [x[1] for x in train_dat]

        # Separate test points from their labels
        test_points = [x[0] for x in test_dat]
        test_labels = [x[1] for x in test_dat]

        # Run k = p_runs number of perceptrons on this same training data, take their mean error
        p_errors = []
        seed = np.random.RandomState()
        for k in range(p_runs):
            perceptron = Perceptron(random_state=seed)
            perceptron.fit(train_points, train_labels)
            p_errors.append(perceptron.score(test_points, test_labels))
        p_error = np.mean(p_errors)

        # Train and test with single SVM
        svm = SVC(kernel="linear")
        svm.fit(train_points, train_labels)
        svm_error = svm.score(test_points, test_labels)

        all_data.append([n, margin, p_error, svm_error])

    df = pd.DataFrame(
        all_data, columns=['n', 'margin', 'avg perceptron_error', 'svm_error'])
    return df
Beispiel #4
0
    def fit(self, x, y):
        '''
        metodo para treinar a arquitetura de dois niveis
        :x: dados para treinamento
        :y: rotulo dos dados
        :dsel_x: padroes da janela de validacao
        :dsel_y: rotulos da janela de validacao
        '''

        # salvando os dados de trainamento
        self.x_train = x
        self.y_train = y

        # salvando as dificuldades das instancias
        self.H = self.kDN(x, y)

        # treinando o nivel 1 #########################################
        self.levelone = KNeighborsClassifier(self.n_vizinhos)
        self.levelone.fit(x, y)

        # realizando a previsao para o conjunto de treinamento
        y_pred = self.levelone.predict(x)

        # salvando os indices das instancias que foram classificadas erradas
        indices = [i for i in range(len(y)) if y_pred[i] != y[i]]

        # obtendo o limiar de dificuldade do problema
        self.limiar = self.defineThreshold(indices)
        ###############################################################

        # treinando o nivel 2 #########################################
        # obtendo as instancias dificeis
        x_dificeis, y_dificeis = self.hardInstances(x, y, self.limiar)

        # criando o ensemble
        self.ensemble = BaggingClassifier(base_estimator=Perceptron(),
                                          max_samples=0.9,
                                          max_features=1.0,
                                          n_estimators=100)
        self.ensemble.fit(x_dificeis, y_dificeis)

        # treinando o modelo 2
        self.leveltwo = KNORAU(self.ensemble.estimators_, self.n_vizinhos)
        self.leveltwo.fit(x_dificeis, y_dificeis)
Beispiel #5
0
def main():
    #create the training & test sets, skipping the header row with [1:]

    dataset_T = genfromtxt(open('Data/demoTrain.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    dataset_R = genfromtxt(open('Data/demoTarget.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    dataset_v = genfromtxt(open('Data/demoTest.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    trueData = genfromtxt(open('Data/validate.csv', 'r'),
                          delimiter=',',
                          dtype='f8')[:]

    target = [x for x in dataset_R]
    train = [x[:] for x in dataset_T]
    validate = [x[:] for x in dataset_v]
    y = [x for x in trueData]

    test = genfromtxt(open('Data/demoTest.csv', 'r'),
                      delimiter=',',
                      dtype='f8')[:]

    per = Perceptron(n_iter=2, shuffle=True)
    per.fit(train, target)

    #val = per.decision_function(validate)

    val = per.predict(validate)
    score = per.score(validate, y)

    print str(score) + "\n"

    for v in val:
        print v

    a = per.fit_transform(train, target)
    print a
Beispiel #6
0
 def __init__(self,
              penalty=None,
              alpha=0.0001,
              fit_intercept=True,
              max_iter=1000,
              tol=1e-3,
              shuffle=True,
              verbose=0,
              eta0=1.0,
              n_jobs=1,
              random_state=0,
              class_weight=None,
              warm_start=False):
     self.penalty = penalty
     self.alpha = alpha
     self.fit_intercept = fit_intercept
     self.max_iter = max_iter
     self.tol = tol
     self.shuffle = shuffle
     self.verbose = verbose
     self.eta0 = eta0
     self.n_jobs = n_jobs
     self.random_state = random_state
     self.class_weight = class_weight
     self.warm_start = warm_start
     super().__init__()
     self.classifier = Perceptron(penalty=self.penalty,
                                  alpha=self.alpha,
                                  fit_intercept=self.fit_intercept,
                                  max_iter=self.max_iter,
                                  tol=self.tol,
                                  shuffle=self.shuffle,
                                  verbose=self.verbose,
                                  random_state=self.random_state,
                                  eta0=self.eta0,
                                  warm_start=self.warm_start,
                                  class_weight=self.class_weight,
                                  n_jobs=self.n_jobs)
Beispiel #7
0
                y_test, pred,
                nome_datasets[h] + '-pct-' + str(pct_trainamento[i]) +
                '- Bagging com DecisionTree [' + str(j) + ']')

            # escrevendo os resultados obtidos
            tabela.Adicionar_Sheet_Linha(num_model, j,
                                         [acuracia, auc, f1measure, gmean])

            # 3.2.1. End ###################################################################################

            # 3.2.2. Bagging com Main ################################################################
            # numero do modelo na tabela
            num_model = 1

            # modelo
            bg = BaggingClassifier(base_estimator=Perceptron(),
                                   max_samples=pct_trainamento[i],
                                   max_features=1.0,
                                   n_estimators=qtd_modelos)
            # treinando o modelo
            bg.fit(x_train, y_train)

            # computando a previsao
            pred = bg.predict(x_test)

            # printando os resultados
            acuracia, auc, f1measure, gmean = printar_resultados(
                y_test, pred,
                nome_datasets[h] + '-pct-' + str(pct_trainamento[i]) +
                '- Bagging com Main [' + str(j) + ']')
Beispiel #8
0
def main():
    while True:
        intro = Text(
            Point(250, 300),
            "2048 TRAINER\n\nTrain model...R\nFull game train...G\n\n>>>Delays between moves<<<\nTest KNN model...E\nTest Perceptron model...P\nRandom model...N\n\n>>>No delays<<<\nTest KNN model...F\nTest Perceptron model...S\nRandom model...M\n\nSimple learning model...L\n\nPRESS Q TO QUIT"
        )
        intro.setSize(20)
        intro.setTextColor(color_rgb(255, 255, 255))

        if os.path.isfile("2048_train.csv"):
            data = pd.read_csv("2048_train.csv",
                               header=None,
                               usecols=[
                                   1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                   14, 15, 16
                               ])
            direction = pd.read_csv("2048_train.csv", header=None, usecols=[0])
            splitRatio = 0.7
            datatrainingSet, datatestSet = splitData(data, splitRatio)
            directiontrainingSet, directiontestSet = splitData(
                direction, splitRatio)
            splitRatio = 0.5
            datatestSet, datadevelopementSet = splitData(
                datatestSet, splitRatio)
            directiontestSet, directiondevelopementSet = splitData(
                directiontestSet, splitRatio)
            direction = np.transpose(direction)
            isPrevData = True
        else:
            isPrevData = False

        if os.path.isfile("best_weights.txt"):
            fileContent = open("best_weights.txt", 'r')
            weightString = fileContent.readlines()
            bestWeightsX = np.array(weightString[0:17])
            bestWeightsX = bestWeightsX.astype(np.float)
            bestWeightsY = np.array(weightString[17:34])
            bestWeightsY = bestWeightsY.astype(np.float)
            randomRadius = float(weightString[34])
            bestLearnScore = float(weightString[35])
            fileContent.close()
        else:
            bestWeightsX = np.empty(0)
            bestWeightsY = np.empty(0)
            for i in range(0, 17):
                bestWeightsX = np.append(bestWeightsX, 0)
                bestWeightsY = np.append(bestWeightsY, 0)
                randomRadius = 100
                bestLearnScore = 0

        win = GraphWin("2048", 500, 600)
        win.setBackground(color_rgb(0, 103, 105))
        intro.draw(win)

        if isPrevData:
            options = ['r', 'g', 'e', 'n', 'f', 'm', 'l', 'q', 's', 'p']
        else:
            options = ['r', 'g', 'n', 'm', 'l', 'q']
        mode = '-'
        while mode not in options:
            mode = win.getKey()

        if mode == 'q':
            win.close()
            return 0

        if mode == 'e' or mode == 'f':
            knn = KNeighborsClassifier(n_neighbors=10)
            knn.fit(datatrainingSet,
                    np.ravel(np.transpose(directiontrainingSet)))

        if mode == 'p' or mode == 's':
            ppn = Perceptron(eta0=0.01, n_iter=10000)
            ppn.fit(datatrainingSet,
                    np.ravel(np.transpose(directiontrainingSet)))

        intro.undraw()
        win.setBackground(color_rgb(100, 100, 100))

        score = 0
        scoreText = Text(Point(250, 550), str(score))
        scoreText.setTextColor(color_rgb(255, 255, 255))
        scoreText.setSize(30)

        board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                          [0, 0, 0, 0]])

        isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                                  [0, 0, 0, 0]])

        #DO THE SAME THING FOR THE TILE NUMBERS
        tileList = []
        numberList = []

        board = spawn(board)
        board = spawn(board)

        for i in range(0, 4):
            for j in range(0, 4):
                tileList.append(
                    Rectangle(Point(i * 125 + 5, j * 125 + 5),
                              Point(i * 125 + 120, j * 125 + 120)))
                numberList.append(
                    Text(Point(i * 125 + 60, j * 125 + 60), str(board[j][i])))
                numberList[i * 4 + j].setSize(20)
                tileList[i * 4 + j].setWidth(4)
                tileList[i * 4 + j].setOutline(color_rgb(255, 255, 255))
                numberList[i * 4 + j].setTextColor(color_rgb(0, 0, 0))
                tileList[i * 4 + j].draw(win)
                numberList[i * 4 + j].draw(win)

        #THE TRAINING DATA
        if not isPrevData:
            data = np.empty((0, 16), float)
            direction = np.empty(0)

        drawBoard(board, win, tileList, numberList)

        move = '-'
        classes = ['w', 's', 'd', 'a']
        moveIter = 0
        nMoves = 0

        totalIterations = 1
        iteration = 1

        currentWeightsX = generateRandomWeights(bestWeightsX,
                                                math.floor(randomRadius))
        currentWeightsY = generateRandomWeights(bestWeightsY,
                                                math.floor(randomRadius))
        currentBestWeightsX = currentWeightsX
        currentBestWeightsY = currentWeightsY

        if mode == 'l':
            maxIterations = int(
                input("Enter number of generations to simulate:"))

        while True:

            score = calcScore(board)

            if mode != 'l':
                drawBoard(board, win, tileList, numberList)

            scoreText.undraw()
            if mode != 'l':
                scoreText.setSize(30)
                scoreText.setText(str(score))
                scoreText.draw(win)

            for i in range(0, 4):
                for j in range(0, 4):
                    isChangeBoard[i][j] = board[i][j]

            if mode == 'r' or mode == 'g':
                move = win.getKey()
                if nMoves % 30 == 0 and mode == 'r':
                    board = generateRandomGrid(board)
                nMoves += 1
            else:
                if mode == 'e' or mode == 'n' or mode == 'p':
                    time.sleep(0.5)
                if mode == 'e' or mode == 'f':
                    probs = np.ravel(
                        knn.predict_proba(gridToData(board).reshape(1, -1)))
                    ranks = [0] * len(probs)
                    for i, x in enumerate(
                            sorted(range(len(probs)), key=lambda y: probs[y])):
                        ranks[x] = i
                    move = classes[ranks[moveIter]]
                    moveIter += 1
                elif mode == 'm':
                    move = classes[random.randint(0, 3)]
                elif mode == 'l':
                    move = classes[calculateDirection(currentWeightsX, board) *
                                   2 +
                                   calculateDirection(currentWeightsY, board)]
                    if moveIter > 0:
                        move = classes[random.randint(0, 3)]
                    moveIter += 1
                elif mode == 'p' or mode == 's':
                    move = ppn.predict(gridToData(board).reshape(1, -1))
                    if moveIter > 0:
                        move = classes[random.randint(0, 3)]
                    moveIter += 1

            if move == 'w':
                board = shift(board, 0)
            elif move == 'd':
                board = shift(board, 1)
            elif move == 's':
                board = shift(board, 2)
            elif move == 'a':
                board = shift(board, 3)
            elif move == 'q':
                break

            if lose(board):
                print(score)
                if mode == 'l':
                    board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                                      [0, 0, 0, 0]])

                    isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0],
                                              [0, 0, 0, 0], [0, 0, 0, 0]])

                    board = spawn(board)
                    board = spawn(board)

                    scoreText.setSize(16)
                    scoreText.setText("Score: " + str(score) +
                                      " -- iteration: " + str(iteration) +
                                      " -- r: " + str(randomRadius))

                    if score > bestLearnScore:
                        bestLearnScore = score
                        currentBestWeightsX = currentWeightsX
                        currentBestWeightsY = currentWeightsY
                        print("current best" + str(currentBestWeightsX) +
                              str(currentBestWeightsY))

                    if iteration == 1000:
                        drawBoard(board, win, tileList, numberList)
                        print("best score: " + str(bestLearnScore))
                        iteration = 1
                        randomRadius *= 0.9
                        bestWeightsX = currentBestWeightsX
                        bestWeightsY = currentBestWeightsY
                        print("best weights:" + str(bestWeightsX) +
                              str(bestWeightsY))
                        if totalIterations / 1000 >= maxIterations:
                            toWrite = np.empty(0)
                            toWrite = np.append(toWrite, bestWeightsX)
                            toWrite = np.append(toWrite, bestWeightsY)
                            toWrite = np.append(toWrite, randomRadius)
                            toWrite = np.append(toWrite, bestLearnScore)
                            np.savetxt("best_weights.txt", toWrite)
                            #WRITE WEIGHTS N STUFF
                            break

                    totalIterations += 1
                    iteration += 1

                    currentWeightsX = generateRandomWeights(
                        bestWeightsX, math.floor(randomRadius))
                    currentWeightsY = generateRandomWeights(
                        bestWeightsY, math.floor(randomRadius))
                else:
                    break

            isChanged = False
            for i in range(0, 4):
                for j in range(0, 4):
                    if isChangeBoard[i][j] != board[i][j]:
                        isChanged = True

            if isChanged:
                moveIter = 0
                if mode == 'r' or mode == 'g':
                    direction = np.append(direction, move)
                    data = np.vstack([data, gridToData2(board)])
                board = spawn(board)

        score = calcScore(board)

        scoreText.setSize(16)
        scoreText.setText(str(score) + " -- YOU LOSE (Press Q)")
        while move != 'q':
            move = win.getKey()
        win.close()

        if mode == 'r' or mode == 'g':
            pd.DataFrame.to_csv(pd.DataFrame(
                np.hstack([
                    np.reshape(directiontrainingSet,
                               [np.shape(directiontrainingSet)[0], 1]),
                    datatrainingSet
                ])),
                                "2048_train.csv",
                                index=False,
                                header=False)
Beispiel #9
0
         x_val, y_val = validacaoCompleta(x_train, y_train)
     elif(validacao[k] == validacao[1]):
         x_val, y_val = validacaoInstanciasFaceis(x_train, y_train, n_vizinhos)
     elif(validacao[k] == validacao[2]):
         x_val, y_val = validacaoInstanciasDificeis(x_train, y_train, n_vizinhos)
 
     # 3.3. End ################################################################################################
 
     # 3.4. Instanciando os classificadores ##########################################################
         
     ########## instanciando o modelo Bagging+REP ###########################################
     # definindo o numero do modelo na tabela
     num_model = 0
     
     # intanciando o classificador
     ensemble = BaggingClassifier(base_estimator=Perceptron(), 
                                 max_samples=qtd_amostras, 
                                 max_features=1.0, 
                                 n_estimators = qtd_modelos)
         
     # treinando o modelo
     ensemble.fit(x_train, y_train)
         
     # realizando a poda 
     ensemble = REP(x_val, y_val, ensemble)
                 
     # computando a previsao
     pred = ensemble.predict(x_test)
                 
     # computando a diversidade do ensemble
     q_statistic = MedidasDiversidade('q', x_val, y_val, ensemble)
Beispiel #10
0
import numpy as np
from matplotlib import pyplot

from sklearn.linear_model.perceptron import Perceptron
from sklearn.metrics import accuracy_score

import usps
import perceptron # Pour la fonction two_classes

data, labels           = usps.load_train()
data_test, labels_test = usps.load_test()

for k in range(10):
    labels_k = perceptron.two_classes(labels, k)

    net = Perceptron()
    net.fit(data, labels_k)
    output_train = net.predict(data)
    output_test  = net.predict(data_test)
    print(k)
    print("  Score (train)", accuracy_score(labels_k, output_train))
    labels_k_test = perceptron.two_classes(labels_test, k)
    print("  Score (test)", accuracy_score(labels_k_test, output_test))
Beispiel #11
0
from sklearn.linear_model.perceptron import Perceptron
from numbers_mass import one, two
import itertools
from generate_picture import read_image

x = [
    list(itertools.chain.from_iterable(one)),
    list(itertools.chain.from_iterable(two))
]
print(x)
y = [1, 2]

clf = Perceptron(random_state=241)
clf.fit(x, y)

if __name__ == "__main__":
    print(clf.predict([
        list(itertools.chain.from_iterable(one)),
    ]))
    print(
        clf.predict([
            list(itertools.chain.from_iterable(read_image('1.png'))),
        ]))
Beispiel #12
0
data = [
    array([  # Vector de [forma;textura;peso]
        [1, 1, -1],  # Manzana
        [1, -1, -1],  # Naranja
        [1, 1, -1],  # Manzana
        [1, -1, -1],  # Naranja
    ]), array([
        1,
        -1,
        1,
        1
    ])
]

# Crear una instancia de Perceptron con un máximo de 100 épocas
perceptron = Perceptron(max_iter=1000)

# Entrenar la red neuronal
perceptron.fit(data[0], data[1])


def test_net_accuracy():
    # Probar la certeza de la red neuronal con la data previamente alimentada
    # para filtrar una manzana y una naranja.
    data = [
        array([
            [1, 1, -1],   # Manzana
            [1, -1, -1],  # Naranja
            [-1, -1, -1]  # Una naranja que es elíptica
        ]), array([
            1,
Beispiel #13
0
def main():
    
    # 1. Definindo variaveis para o experimento #########################################################################
    qtd_modelos = 100
    qtd_execucoes = 30
    qtd_amostras = 0.9
    qtd_folds = 10
    n_vizinhos = 7
    nome_datasets = ['kc1', 'kc2']
    # 1. End ############################################################################################################

    # for para variar entre os datasets
    for h in range(len(nome_datasets)):
    
        # 2. Lendo os datasets  ############################################################################################
        # lendo o dataset
        data = pd.read_csv('dataset/'+nome_datasets[h]+'.csv')
        
        # obtendo os padroes e seus respectivos rotulos
        df_x = np.asarray(data.iloc[:,0:-1])
        df_y = np.asarray(data.iloc[:,-1])
        
        
        # 2.1. Criando a tabela para salvar os dados  #################################################
        # criando a tabela que vai acomodar o modelo
        tabela = Tabela_excel()
        tabela.Criar_tabela(nome_tabela='arquivos_lista03/'+nome_datasets[h], 
                            folhas=['OLA', 'LCA', 'KNORA-E', 'KNORA-U', 'Arquitetura'], 
                            cabecalho=['acuracy', 'auc', 'fmeasure', 'gmean'], 
                            largura_col=5000)
        # 2.1. End #####################################################################################
        # 2. End ############################################################################################################
        
        # executando os algoritmos x vezes
        for j in range(qtd_execucoes):
            
            # 3. Dividindo os dados para treinamento e teste ################################################################
            # quebrando o dataset sem sobreposicao em 90% para treinamento e 10% para teste  
            skf = StratifiedKFold(df_y, n_folds=qtd_folds)
                
            # tomando os indices para treinamento e teste
            train_index, test_index = next(iter(skf))
                        
            # obtendo os conjuntos de dados para treinamento e teste
            x_train = df_x[train_index]
            y_train = df_y[train_index]
            x_test = df_x[test_index]
            y_test = df_y[test_index]
            # 3. End #########################################################################################################
            
            
            # 4. Gerando o pool de classificadores  ##########################################################################
            # intanciando o classificador
            ensemble = BaggingClassifier(base_estimator=Perceptron(), 
                                            max_samples=qtd_amostras, 
                                            max_features=1.0, 
                                            n_estimators = qtd_modelos)
                    
            # treinando o modelo
            ensemble.fit(x_train, y_train)
            # 4. End  ########################################################################################################
            
            # 5. Instanciando os classificadores ##########################################################
            
            ################################### OLA ########################################################
            executar_modelo('OLA', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela)
            ################################################################################################
            
            ################################### LCA ########################################################
            executar_modelo('LCA', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela)
            ################################################################################################
            
            ################################### KNORAE #####################################################
            executar_modelo('KNORAE', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela)
            ################################################################################################
            
            ################################### KNORAU #####################################################
            executar_modelo('KNORAU', x_train, y_train, x_test, y_test, ensemble.estimators_, n_vizinhos, nome_datasets, h, j, tabela)
            ################################################################################################
            
            ################################### Arquitetura ################################################
            # importando o metodo
            arq = Arquitetura(n_vizinhos)
            # treinando o metodo
            arq.fit(x_train, y_train)
            # realizando a previsao
            pred = arq.predict(x_test)
            # printando os resultados
            nome = 'Arquitetura'
            acuracia, auc, f1measure, gmean = printar_resultados(y_test, pred, nome_datasets[h]+'-'+nome+'-['+str(j)+']')
            # escrevendo os resultados obtidos
            tabela.Adicionar_Sheet_Linha(4, j, [acuracia, auc, f1measure, gmean])
Beispiel #14
0
 def __init__(self):
     super().__init__()
     self.classifier = Perceptron(n_iter=50)
def test_perceptron(x: list, y: list, learning_rate: float, max_iter: int) -> None:
    perceptron = Perceptron(max_iter=max_iter, alpha=learning_rate)
    perceptron.fit(x, y)
    plot_model(np.array(x), perceptron)
Beispiel #16
0
#encoding=utf8
import os
from sklearn.linear_model.perceptron import Perceptron
import pandas as pd

if os.path.exists('./step2/result.csv'):
    os.remove('./step2/result.csv')

# 获取训练数据
train_data = pd.read_csv('./step2/train_data.csv')
# 获取训练标签
train_label = pd.read_csv('./step2/train_label.csv')
train_label = train_label['target']
# 获取测试数据
test_data = pd.read_csv('./step2/test_data.csv')

# 训练数据
clf = Perceptron(eta0=0.1, max_iter=500)
clf.fit(train_data, train_label)
res = clf.predict(test_data)

# 保存
res = {"result": res}
res = pd.DataFrame(res)
res.to_csv('./step2/result.csv', index=0)
def get_paramater_grids(data_path):
    """Returns all the Models / Paramters I'm searching over"""

    pca_nfeatures = [0, 1, 2, 5, 10, 15, 20, 24]
    # 0 is a pass through value that does not do PCA
    # 24 is safer because some features might be found in the training set. This might not be the safest design, but it works for now

    # some may have more or few features based on training. 24 seems safe

    # Number of training points allowed to be on the wrong side of hyperplane.
    # A point is fractionally over the line if it violates the margin
    # 0 is a lower bound
    # The approximent number of training points (170)/2 seems like a reasonable upper bound (n
    # The sklearn formulation is different than what was covered in the book
    svc_gamma = np.logspace(-7, 0, 8)  #kernal parameter

    paramater_grids = {}

    # SVC
    paramater_grids['SVC'] = {}
    paramater_grids['SVC']['pipeline']=\
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', PCA()),
            ('classifier', SVC())
            ])
    paramater_grids['SVC']['parameters'] = \
        {'classifier__kernel':('linear','rbf','sigmoid'), #Linear doesn't gues gamma, but whatevs
         'classifier__C':  np.logspace(-2,4,7),
         'classifier__gamma': svc_gamma,
         'feature__n_components': pca_nfeatures}

    # SVC with a polynomial kernal
    paramater_grids['SVC_poly'] = {}
    paramater_grids['SVC_poly']['pipeline']=\
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', PCA()),
            ('classifier', SVC(kernel='poly'))
            ])
    paramater_grids['SVC_poly']['parameters'] = \
        {'classifier__degree': [2, 3, 4, 5],
         'classifier__C':  np.logspace(-2,3,6),
         'classifier__gamma': svc_gamma,
         'feature__n_components': pca_nfeatures}

    # Gaussian Mixture
    paramater_grids['GM'] = {}
    paramater_grids['GM']['pipeline']=\
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', PCA()),
            ('classifier', GM(max_iter=250,n_init=25))
            ])
    paramater_grids['GM']['parameters'] = \
        {'classifier__n_components': [1, 2, 4, 8, 16, 32],
         'classifier__covariance_type': ['full', 'tied', 'diag', 'spherical'],
         'feature__n_components': pca_nfeatures}

    # Perceptron
    paramater_grids['Perceptron_PCA'] = {}
    paramater_grids['Perceptron_PCA']['pipeline'] = \
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', PCA()),
            ('classifier', Perceptron())
        ])
    paramater_grids['Perceptron_PCA']['parameters'] = \
        {'feature__n_components': pca_nfeatures}

    # Perceptron
    paramater_grids['Perceptron_LDA'] = {}
    paramater_grids['Perceptron_LDA']['pipeline'] = \
        Pipeline([
            ('cleaner', hdpp.DataCleaner(data_path + 'meta_data.csv').CleaningPipeline),
            ('feature', LinearDiscriminantAnalysis()),
            ('classifier', Perceptron())
        ])
    paramater_grids['Perceptron_LDA']['parameters'] = {}

    return paramater_grids
Beispiel #18
0
acc_svc = accuracy_score(y_pred, y_test)
print(acc_svc)  # 0.81218

# 线性支持向量机SVC
from sklearn.svm import LinearSVC

linear_svc = LinearSVC()
linear_svc.fit(x_train, y_train)
y_pred = linear_svc.predict(x_test)
acc_linear_svc = accuracy_score(y_pred, y_test)
print(acc_linear_svc)  # 0.77157

# 感知机
from sklearn.linear_model.perceptron import Perceptron

perceptron = Perceptron()
perceptron.fit(x_train, y_train)
y_pred = perceptron.predict(x_test)
acc_perceptron = accuracy_score(y_pred, y_test)
print(acc_perceptron)  # 0.77665

# 决策树
from sklearn.tree import DecisionTreeClassifier

decisiontree = DecisionTreeClassifier()
decisiontree.fit(x_train, y_train)
y_pred = decisiontree.predict(x_test)
acc_decisiontree = accuracy_score(y_pred, y_test)
print(acc_decisiontree)  # 0.82233

# 随机森林
Beispiel #19
0
def task2(sneakers_data: pd.DataFrame, boots_data: pd.DataFrame,
          sneakers_labels: pd.DataFrame, boots_labels: pd.DataFrame):
    full_data = sneakers_data.append(boots_data)
    full_labels = sneakers_labels.append(boots_labels)

    train_times = []
    predict_times = []
    accuracies = []

    print("\tTask 2 output")
    num_splits = 4
    for train_index, test_index in KFold(n_splits=num_splits,
                                         shuffle=True).split(full_data):
        train_data = full_data.iloc[train_index]
        test_data = full_data.iloc[test_index]

        train_labels = full_labels.iloc[train_index]
        test_labels = full_labels.iloc[test_index]

        pctrn = Perceptron()

        train_start_time = timeit.default_timer()
        pctrn.fit(X=train_data, y=train_labels)
        train_end_time = timeit.default_timer()

        train_time = train_end_time - train_start_time
        train_times.append(train_time)

        print("\tPerceptron took", train_time, "seconds to train on data")

        predict_start_time = timeit.default_timer()
        prediction = pctrn.predict(test_data)
        predict_end_time = timeit.default_timer()

        predict_time = predict_end_time - predict_start_time
        predict_times.append(predict_time)

        print("\tPerceptron took", predict_time,
              "seconds to make a prediction")

        accuracy = accuracy_score(test_labels, prediction) * 100
        accuracies.append(accuracy)

        print("\tAccuracy for perceptron", accuracy, "%")

        confusion = confusion_matrix(test_labels, prediction)

        percent_true_pos = (confusion[0, 0] / len(test_labels)) * 100
        percent_false_pos = (confusion[0, 1] / len(test_labels)) * 100
        percent_true_neg = (confusion[1, 1] / len(test_labels)) * 100
        percent_false_neg = (confusion[1, 0] / len(test_labels)) * 100

        print("\tPerceptron confusion matrix true positive:", percent_true_pos,
              "%")
        print("\tPerceptron confusion matrix false positive:",
              percent_false_pos, "%")
        print("\tPerceptron confusion matrix true negative:", percent_true_neg,
              "%")
        print("\tPerceptron confusion matrix false negative:",
              percent_false_neg, "%\n")

    print("\tThe minimum train time was", np.min(train_times), "seconds")
    print("\tThe maximum train time was", np.max(train_times), "seconds")
    print("\tThe average train time was", np.mean(train_times), "seconds\n")

    print("\tThe minimum prediction time was", np.min(predict_times),
          "seconds")
    print("\tThe maximum prediction time was", np.max(predict_times),
          "seconds")
    print("\tThe average prediction time was", np.mean(predict_times),
          "seconds\n")

    print("\tMinimum Accuracy was", np.min(accuracies), "%")
    print("\tMaximum accuracy was", np.max(accuracies), "%")
    print("\tAverage accuracy was", np.mean(accuracies), "%\n")

    print(
        "\tTotal train time for all k-folds in perceptron with gamma value of =",
        np.sum(train_times), "seconds")
    print(
        "\tTotal prediction time for all k-folds in perceptron with gamma of =",
        np.sum(predict_times), "seconds\n")
Beispiel #20
0
			'NearestNeighbors':NearestNeighbors(),
			'Normalizer':Normalizer(),
			'NuSVC':NuSVC(),
			'NuSVR':NuSVR(),
			'Nystroem':Nystroem(),
			'OAS':OAS(),
			'OneClassSVM':OneClassSVM(),
			'OrthogonalMatchingPursuit':OrthogonalMatchingPursuit(),
			'OrthogonalMatchingPursuitCV':OrthogonalMatchingPursuitCV(),
			'PCA':PCA(),
			'PLSCanonical':PLSCanonical(),
			'PLSRegression':PLSRegression(),
			'PLSSVD':PLSSVD(),
			'PassiveAggressiveClassifier':PassiveAggressiveClassifier(),
			'PassiveAggressiveRegressor':PassiveAggressiveRegressor(),
			'Perceptron':Perceptron(),
			'ProjectedGradientNMF':ProjectedGradientNMF(),
			'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(),
			'RANSACRegressor':RANSACRegressor(),
			'RBFSampler':RBFSampler(),
			'RadiusNeighborsClassifier':RadiusNeighborsClassifier(),
			'RadiusNeighborsRegressor':RadiusNeighborsRegressor(),
			'RandomForestClassifier':RandomForestClassifier(),
			'RandomForestRegressor':RandomForestRegressor(),
			'RandomizedLasso':RandomizedLasso(),
			'RandomizedLogisticRegression':RandomizedLogisticRegression(),
			'RandomizedPCA':RandomizedPCA(),
			'Ridge':Ridge(),
			'RidgeCV':RidgeCV(),
			'RidgeClassifier':RidgeClassifier(),
			'RidgeClassifierCV':RidgeClassifierCV(),
def result():
    if request.method == 'POST':
        path = request.files.get('myFile')

        df = pd.read_csv(path, encoding="ISO-8859-1")

        filename = request.form['filename']

        str1 = request.form['feature']
        str2 = request.form['label']

        if str1 in list(df) and str2 in list(df):
            y = df[str2]
            X = df[str1]
        else:
            return render_template('nameError.html')

        x = []
        for subject in X:
            result = re.sub(r"http\S+", "", subject)
            replaced = re.sub(r'[^a-zA-Z0-9 ]+', '', result)
            x.append(replaced)
        X = pd.Series(x)

        X = X.str.lower()
        """
        texts = []
        for doc in X:
            doc = nlp(doc, disable=['parser', 'ner'])
            tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-']
            tokens = [tok for tok in tokens if tok not in stopwords]
            tokens = ' '.join(tokens)
            texts.append(tokens)

        X = pd.Series(texts)
        """
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33)

        tfidfvect = TfidfVectorizer(ngram_range=(1, 1))
        X_train_tfidf = tfidfvect.fit_transform(X_train)

        start = time()
        clf1 = LinearSVC()
        clf1.fit(X_train_tfidf, y_train)
        pred_SVC = clf1.predict(tfidfvect.transform(X_test))

        a1 = accuracy_score(y_test, pred_SVC)
        end = time()
        print("accuracy SVC: {} and time: {} s".format(a1, (end - start)))

        start = time()
        clf2 = LogisticRegression(n_jobs=-1,
                                  multi_class='multinomial',
                                  solver='newton-cg')
        clf2.fit(X_train_tfidf, y_train)
        pred_LR = clf2.predict(tfidfvect.transform(X_test))
        a2 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy LR: {} and time: {}".format(a2, (end - start)))

        start = time()
        clf3 = RandomForestClassifier(n_jobs=-1)

        clf3.fit(X_train_tfidf, y_train)
        pred = clf3.predict(tfidfvect.transform(X_test))
        a3 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy RFC: {} and time: {}".format(a3, (end - start)))

        start = time()
        clf4 = MultinomialNB()

        clf4.fit(X_train_tfidf, y_train)
        pred = clf4.predict(tfidfvect.transform(X_test))
        a4 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy MNB: {} and time: {}".format(a4, (end - start)))

        start = time()
        clf5 = GaussianNB()

        clf5.fit(X_train_tfidf.toarray(), y_train)
        pred = clf5.predict(tfidfvect.transform(X_test).toarray())
        a5 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy GNB: {} and time: {}".format(a5, (end - start)))

        start = time()
        clf6 = LogisticRegressionCV(n_jobs=-1)
        clf6.fit(X_train_tfidf, y_train)
        pred_LR = clf6.predict(tfidfvect.transform(X_test))
        a6 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy LRCV: {} and time: {}".format(a6, (end - start)))

        start = time()
        clf7 = AdaBoostClassifier()
        clf7.fit(X_train_tfidf, y_train)
        pred_LR = clf7.predict(tfidfvect.transform(X_test))
        a7 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy ABC: {} and time: {}".format(a7, (end - start)))

        start = time()
        clf8 = BernoulliNB()

        clf8.fit(X_train_tfidf.toarray(), y_train)
        pred = clf8.predict(tfidfvect.transform(X_test).toarray())
        a8 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy BNB: {} and time: {}".format(a8, (end - start)))

        start = time()
        clf9 = Perceptron(n_jobs=-1)

        clf9.fit(X_train_tfidf.toarray(), y_train)
        pred = clf9.predict(tfidfvect.transform(X_test).toarray())
        a9 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy Per: {} and time: {}".format(a9, (end - start)))
        start = time()
        clf10 = RidgeClassifierCV()

        clf10.fit(X_train_tfidf.toarray(), y_train)
        pred = clf10.predict(tfidfvect.transform(X_test).toarray())
        a10 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy RidCV: {} and time: {}".format(a10, (end - start)))

        start = time()
        clf11 = SGDClassifier(n_jobs=-1)

        clf11.fit(X_train_tfidf.toarray(), y_train)
        pred = clf11.predict(tfidfvect.transform(X_test).toarray())
        a11 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy SGDC: {} and time: {}".format(a11, (end - start)))
        start = time()
        clf12 = SGDClassifier(n_jobs=-1)

        clf12.fit(X_train_tfidf.toarray(), y_train)
        pred = clf12.predict(tfidfvect.transform(X_test).toarray())
        a12 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy XGBC: {} and time: {}".format(a12, (end - start)))

        acu_list = [a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12]
        max_list = max(acu_list)

        if max_list == a1:
            pickle.dump(clf1, open(filename + '_model', 'wb'))
        elif max_list == a2:
            pickle.dump(clf2, open(filename + '_model', 'wb'))
        elif max_list == a3:
            pickle.dump(clf3, open(filename + '_model', 'wb'))
        elif max_list == a4:
            pickle.dump(clf4, open(filename + '_model', 'wb'))
        elif max_list == a5:
            pickle.dump(clf5, open(filename + '_model', 'wb'))
        elif max_list == a6:
            pickle.dump(clf6, open(filename + '_model', 'wb'))
        elif max_list == a7:
            pickle.dump(clf7, open(filename + '_model', 'wb'))
        elif max_list == a8:
            pickle.dump(clf8, open(filename + '_model', 'wb'))
        elif max_list == a9:
            pickle.dump(clf9, open(filename + '_model', 'wb'))
        elif max_list == a10:
            pickle.dump(clf10, open(filename + '_model', 'wb'))
        elif max_list == a11:
            pickle.dump(clf11, open(filename + '_model', 'wb'))
        elif max_list == a12:
            pickle.dump(clf12, open(filename + '_model', 'wb'))

        pickle.dump(tfidfvect, open(filename + '_tfidfVect', 'wb'))

        return render_template("result.html",
                               ac1=a1,
                               ac2=a2,
                               ac3=a3,
                               ac4=a4,
                               ac5=a5,
                               ac6=a6,
                               ac7=a7,
                               ac8=a8,
                               ac9=a9,
                               ac10=a10,
                               ac11=a11,
                               ac12=a12)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=i)

    # Normalizando as variaveis de treino e teste
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # Treinando o Perceptron com o conjunto de treino previamente separado
    #Passa-se 100  vezes pelo conjunto de dados e o learning rate = 0.001
    from sklearn.linear_model.perceptron import Perceptron
    classifier = Perceptron(max_iter=100, eta0=0.1)
    classifier.fit(X_train, y_train)

    # Apresentando os dados de teste a rede Perceptron e obtendo as predicoes
    y_pred = classifier.predict(X_test)

    # Fazendo a matriz de confusao
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)
    '''TP = # True Positives, TN = # True Negatives,
     FP = # False Positives, FN = # False Negatives
    
    Acerto = TP / (TP + FP)
    '''
    #Aplicando o metodo de avaliacao