def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """

    #declaring variables used for MLPClassifier
    hidden_layers = 6
    solver_mode = 'adam'
    activation_mode = 'tanh'
    max_iter = 200

    cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ),
                       solver=solver_mode,
                       activation=activation_mode,
                       max_iter=max_iter)

    #training the classifier
    cf.fit(input2, target2[:, 1])

    #calculate y_predicted and y_true for confusion matrix calculation

    #printing confusion matrix
    print(confusion_matrix(target2[:, 1], cf.predict(input2)))

    #plotting the hidden layer weights
    plot_hidden_layer_weights(cf.coefs_[0])

    pass
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    # parse target2 2nd column
    pose2 = []
    for target in target2:
        pose2.append(target[1])

    mlp = MLPClassifier(activation='tanh', hidden_layer_sizes=6)
    print("===========fit started===========")
    mlp.fit(input2, pose2)
    print("===========fit finished===========")
    print("classes_: ", mlp.classes_)
    print("n_layers_: ", mlp.n_layers_)
    plot_hidden_layer_weights(mlp.coefs_[0])

    print("===========predict started===========")
    prediction = mlp.predict(input2)
    print("===========predict finished===========")
    cnf_matrix = confusion_matrix(pose2, prediction)
    print(cnf_matrix)
    return
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO - done

    hidden_units = 6
    nn = MLPClassifier(activation=ACTIVATION,
                       solver='adam',
                       hidden_layer_sizes=(hidden_units, ),
                       max_iter=200)
    pose = target2[:, 1]
    nn.fit(input2, pose)

    # using index 0 because of just one hidden layer
    hidden_layer_weights = nn.coefs_[0]

    y_pred = nn.predict(input2)
    matrix = confusion_matrix(pose, y_pred)

    print("The Confusion Matrix we obtained: \n" + str(matrix))

    plot_hidden_layer_weights(hidden_layer_weights)
def ex_2_1(input2, target2):
    '''
    • Write code to train a feed-forward neural network with 1 hidden layers containing 6 hidden units
      for pose recognition. Use dataset2 for training after normalization, ‘adam’ as the training solver and
      train for 200 iterations.
    • Calculate the confusion matrix
    • Plot the weights between each input neuron and the hidden neurons to visualize what the network
      has learnt in the first layer.
      inote Use scikit-learn’s confusion_matrix function to to calculate the confusion matrix. Documentation
      for this can be found here
      inote You can use the coefs_ attribute of the model to read the weights. It is a list of length nlayers − 1
      where the ith element in the list represents the weight matrix corresponding to layer i.
      inote Use the plot_hidden_layer_weights in nn_classification_plot.py to plot the hidden weights.
    '''

    # dataset2 = normalize(input2) already done by main
    x_train = input2
    y_train = target2[:, 1]
    # print(y_train)
    nn = MLPClassifier(solver='adam',
                       activation='tanh',
                       max_iter=200,
                       hidden_layer_sizes=(6, ))
    nn.fit(x_train, y_train)
    cm = confusion_matrix(y_train, nn.predict(x_train))
    plot_hidden_layer_weights(nn.coefs_[0])
    print(cm)
    pass
def ex_2_2(input1, target1, input2, target2):
    ## TODO
    scores = []
    scores_train = []
    classifiers = []
    for i in range(10):
        classifier = MLPClassifier(hidden_layer_sizes=(20, ),
                                   solver="adam",
                                   max_iter=1000,
                                   activation="tanh",
                                   random_state=i)
        classifier.fit(input1, target1[:, 0])
        scores.append(classifier.score(input2, target2[:, 0]))
        classifiers.append(classifier)
        scores_train.append(classifier.score(input1, target1[:, 0]))

    conf_mat = confusion_matrix(target2[:, 0],
                                classifiers[np.argmax(scores)].predict(input2))

    plot_histogram_of_acc(scores_train, scores)
    #plot_histogram_of_acc(classifiers[np.argmax(scores)], classifier.score(input2, target2[:, 0]))
    #plot_histogram_of_acc(classifier.score(input1, target1[:,0]), classifier.score(input2, target2[:,0]))
    predected_target = classifier.predict(input2)
    misclassified_images = []
    for i in range(len(target2[:, 0])):
        if target2[:, 0][i] != predected_target[i]:
            misclassified_images.append(input2[i])

    for i in range(len(misclassified_images)):
        plot_image(misclassified_images[i])
    pass
Exemple #6
0
def ex_2_2(input1, target1, input2, target2):
    list = []
    train_acc = np.zeros(10)
    test_acc = np.zeros(10)
    for i in range(10):
        nn = MLPClassifier(hidden_layer_sizes=(20,),activation='tanh', max_iter=1000, random_state=None)
        list.append(nn)
        nn.fit(input1, target1[:,0]) 
        train_acc[i] = nn.score(input1, target1[:,0])
        test_acc[i] = nn.score(input2,target2[:,0])
        i_best = np.where(test_acc == test_acc.min())[0][0]
    import pdb
    pdb.set_trace() 
    y_pred = list[i_best].predict(input2)
    C = confusion_matrix(target2[:,0], y_pred, labels=None, sample_weight=None)
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO
    return train_acc, test_acc, y_pred, C
def trainModel():
    sss = []
    train_list = [["comp.speech/train/s1.wav", 0], ["comp.speech/train/s2.wav", 1], ["comp.speech/train/s3.wav", 2],
                  ["comp.speech/train/s4.wav", 3], ["comp.speech/train/s5.wav", 4], ["comp.speech/train/s6.wav", 5],
                  ["comp.speech/train/s7.wav", 6], ["comp.speech/train/s8.wav", 7]]

    for wav_name in train_list:
        add_wav_to_db(wav_name[0], wav_name[1], sss)

    data = []
    ans = []
    i = 0
    for index in xrange(len(sss)):
        for v in sss[index]:
            data.append(v[0])
            ans.append(v[1])

    clfNeural = MLPClassifier()
    clfNeural.fit(data, ans)

    clfForest = DecisionTreeClassifier(max_depth=250)
    clfForest.fit(data, ans)

    joblib.dump(clfNeural, 'model.pkl')
    joblib.dump(clfForest, 'forest.pkl')
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO
    hidden_units = 20

    test_face = target2[:, 0]
    train_face = target1[:, 0]

    test_accuracy = np.zeros(10)
    train_accuracy = np.zeros(10)

    best_network = 0
    max_accuracy = 0
    nn = MLPClassifier(activation=ACTIVATION,
                       solver="adam",
                       hidden_layer_sizes=(hidden_units, ),
                       max_iter=1000)

    for i in range(0, 10):
        nn.random_state = i

        nn.fit(input1, train_face)
        train_accuracy[i] = nn.score(input1, train_face)
        test_accuracy[i] = nn.score(input2, test_face)

        if test_accuracy[i] > max_accuracy:
            best_network = nn
            max_accuracy = test_accuracy[i]

    plot_histogram_of_acc(train_accuracy, test_accuracy)

    # Use the best network to calculate the confusion matrix for the test set.
    y_pred = best_network.predict(input2)
    matrix = confusion_matrix(test_face, y_pred)

    print("The Confusion Matrix we obtained: \n" + str(matrix))

    # Plot a few misclassified images.
    annas_favorit_number = 177
    marcos_favorit_numer = 490
    strugers_favorit_number_aka_best_mirp = 13
    manfreds_favorit_number_is_a_emirp_a_lucky_fortunate_sexy_and_happy_prime = 79
    best_numbers_ever = [
        annas_favorit_number, strugers_favorit_number_aka_best_mirp,
        marcos_favorit_numer,
        manfreds_favorit_number_is_a_emirp_a_lucky_fortunate_sexy_and_happy_prime
    ]

    for _ in best_numbers_ever:
        misclassified = np.where(test_face != best_network.predict(input2))
        plot_random_images(input2[misclassified])
def ex_2_1(input2, target2):
    ## TODO
    classifier = MLPClassifier(hidden_layer_sizes=(6, ),
                               solver="adam",
                               max_iter=200,
                               activation="tanh")

    classifier.fit(input2, target2[:, 1])
    con_mat = confusion_matrix(target2[:, 1], classifier.predict(input2))
    plot_hidden_layer_weights(classifier.coefs_[0])
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """

    #declaring variables used for MLPClassifier
    hidden_layers = 20
    solver_mode = 'adam'
    activation_mode = 'tanh'
    max_iter = 1000

    max_accuracy = 0.0

    train_accuracy = []
    test_accuracy = []
    cfn = []

    m = 0

    for m in range(10):
        cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ),
                           activation=activation_mode,
                           solver=solver_mode,
                           random_state=m,
                           max_iter=max_iter)
        cf.fit(input1, target1[:, 0])

        train_accuracy.append(cf.score(input1, target1[:, 0]))

        current_test_accuracy = cf.score(input2, target2[:, 0])

        test_accuracy.append(current_test_accuracy)

        plot_histogram_of_acc(train_accuracy[m], test_accuracy[m])

        if current_test_accuracy > max_accuracy:
            cfn = confusion_matrix(target2[:, 0], cf.predict(input2))
            max_accuracy = current_test_accuracy

    print(cfn)

    #plot_histogram_of_acc(train_accuracy, test_accuracy)
    #plot_random_images(input2)

    pass
Exemple #11
0
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO
    pose = target2[:,1]
    nn = MLPClassifier(hidden_layer_sizes=(6,) ,activation='tanh', max_iter=200)
    nn.fit(input2, pose) 
    y_pred = nn.predict(input2)
    C = confusion_matrix(pose, y_pred, labels=None, sample_weight=None)
    plot_hidden_layer_weights(nn.coefs_[0])
    return C
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    n = 10

    train_acc = np.zeros(n)
    test_acc = np.zeros(n)
    pred_test = np.zeros((n, 564))
    coefs = np.zeros((n, 960, 20))

    #print(min(target1[:,0]), max(target1[:,0]))
    # we have 20 person

    for i in range(n):
        classifier = MLPClassifier(hidden_layer_sizes=(20, ),
                                   activation='tanh',
                                   solver='adam',
                                   max_iter=5000,
                                   random_state=i)
        classifier.fit(input1, target1[:, 0])
        pred_test[i] = classifier.predict(input2)
        coefs[i] = classifier.coefs_[0]
        train_acc[i] = classifier.score(input1, target1[:, 0])
        test_acc[i] = classifier.score(input2, target2[:, 0])

    error = pred_test[1] - target2[:, 0]
    for j in range(len(error)):
        if (error[j] != 0):
            print(j)
    plot_random_images(np.row_stack((input2[175, :], input2[184, :])))
    plot_random_images(np.row_stack((input2[210, :], input2[134, :])))
    plot_random_images(np.row_stack((input2[223, :], input2[177, :])))
    plot_random_images(np.row_stack((input2[179, :], input2[186, :])))

    plot_histogram_of_acc(train_acc, test_acc)

    # best network with seed i=1
    confmat = confusion_matrix(target2[:, 0], pred_test[1])
    print(confmat)

    pass
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    train = input1
    test = input2
    target_train = target1[:, 1]
    target_test = target2[:, 1]


    ## TODO
    n_hidden_neurons = 20

    accu_list_train = np.zeros((10,1))
    accu_list_test = np.zeros((10, 1))

# Find the best seed
    for seed in range(10):
        nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,), random_state=seed)
        nn.fit(train, target_train)
        accu_list_train[seed] = nn.score(train, target_train)
        accu_list_test[seed] = nn.score(test, target_test)

    print(accu_list_train)
    print(accu_list_test)
# Compute NN weights with the best seed
    best_seed = np.argmax(accu_list_train)
    best_nn = nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,),random_state=best_seed)
    best_nn.fit(train, target_train)

# Evaluate the confusion matrix with best NN
    predictions = nn.predict(test)
    C = confusion_matrix(target_test, predictions)
    print(C)

# Plot results
    plot_histogram_of_acc(accu_list_train, accu_list_test)
    print(accu_list_test)
# Find misclassified images
    comp_array = target_test - predictions
    comp_vector2 = np.nonzero(comp_array)
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO
    n_hidden_neurons = 6
    nn = MLPClassifier(activation='tanh', solver='adam', max_iter=200, hidden_layer_sizes=(n_hidden_neurons,))
    target = target2[:,2]
    ## Train the network
    nn.fit(input2, target)
    predictions = nn.predict(input2)
    C=confusion_matrix(target,predictions)
    hidden_layer_weights = nn.coefs_[0]
    plot_hidden_layer_weights(hidden_layer_weights)
    print(C)
    def train(self, labeledDoc):
        """
        Entrena el modelo final de clasificacion
        :param labeledDoc: objeto labeledDoc
        :return: True si todo correcto, Raise exception si fallo
        """
        if self.save_loc == None:
            raise UnboundLocalError("Should have set the save path <setSaveLocation>")

        if self.dependenceModel == None:
            raise UnboundLocalError("Should have set the TextProcessing.Doc2Vec model <setDependenceModel>")

        tags_id = {}
        Y = []
        X = []
        for doc in labeledDoc:
            for tag in doc.tags[1:]:
                if tag not in tags_id:
                    tags_id[tag] = len(tags_id)

        labeledDoc.reloadDoc()
        for doc in labeledDoc:
            tags = doc.tags
            text = doc.words
            auxY = np.zeros(len(tags_id))
            for tag in tags[1:]:
                auxY[tags_id[tag]] = 1.

            Y.append(auxY)
            vecX = self.dependenceModel.predict(text)[0]
            X.append(vecX)


        Y = np.array(Y)
        X = np.array(X)

        clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1)
        clf.fit(X, Y)
        print clf.predict(X)

        joblib.dump(clf, self.save_loc)
        with open(self.save_loc+"_tags_id", "w") as fout:
            fout.write(json.dumps(tags_id))
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """

    classifier = MLPClassifier(hidden_layer_sizes=(6, ),
                               activation='tanh',
                               solver='adam',
                               max_iter=200)
    classifier.fit(input2, target2[:, 1])
    pred2 = classifier.predict(input2)
    confmat = confusion_matrix(target2[:, 1], pred2)
    coefs = classifier.coefs_
    print(confmat)
    plot_hidden_layer_weights(coefs[0])
    ## TODO
    pass
class MLPClassifierImpl():

    def __init__(self, hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10):
        self._hyperparams = {
            'hidden_layer_sizes': hidden_layer_sizes,
            'activation': activation,
            'solver': solver,
            'alpha': alpha,
            'batch_size': batch_size,
            'learning_rate': learning_rate,
            'learning_rate_init': learning_rate_init,
            'power_t': power_t,
            'max_iter': max_iter,
            'shuffle': shuffle,
            'random_state': random_state,
            'tol': tol,
            'verbose': verbose,
            'warm_start': warm_start,
            'momentum': momentum,
            'nesterovs_momentum': nesterovs_momentum,
            'early_stopping': early_stopping,
            'validation_fraction': validation_fraction,
            'beta_1': beta_1,
            'beta_2': beta_2,
            'epsilon': epsilon,
            'n_iter_no_change': n_iter_no_change}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)

    def predict_proba(self, X):
        return self._sklearn_model.predict_proba(X)
    def runTest(self, trainingFilename, startIndex, endIndex):

        a = Atomizer('learn')
        e = FeaturesExtractor()

        p = InputDataProcessor(a, e, (0.2, 0.8))
        r = InputDataReader(p)
        (X, y) = r.read_features(trainingFilename)

        n = MLPClassifier(solver='lbfgs',
                          alpha=1e-5,
                          hidden_layer_sizes=(5, ),
                          random_state=1)

        n.fit(X, y)

        a = Atomizer('test')
        e = FeaturesExtractor()

        t = Tester(a, e, n, 0.99)

        for i in range(startIndex, endIndex):
            testFilename = "suspicious-document{:05d}".format(i)
            test_file = r.get_file("dataSets/part{}/{}".format(
                1, testFilename))
            b = t.is_plagiarised(test_file)
            if b == False:
                continue
            print('odpowiedz systemu: ' + str(b[0]))

            print('stan rzeczywisty: ' + str(not not test_file['metadata']))
            csv_file = open("wyniki.csv", 'a')
            wr = csv.writer(csv_file)
            list = [
                trainingFilename, testFilename,
                str(b[0]),
                str(not not test_file['metadata'])
            ]
            wr.writerows([list])
Exemple #19
0
def neural_net_2(train, test, val, train_out, test_out, val_out, BigSigma_inv):
    clf = MLPClassifier(solver='sgd',
                        alpha=1e-5,
                        hidden_layer_sizes=(100, 1),
                        activation='logistic',
                        batch_size=BATCH_HUMAN,
                        shuffle=True,
                        max_iter=5000)

    scaler = StandardScaler()
    scaler.fit(train)
    train1 = scaler.transform(train)
    # apply same transformation to test data
    test = scaler.transform(test)
    train_out = train_out.astype(float)
    clf.fit(X=train1, y=train_out)
    predict_test = clf.predict(test)
    predict_val = clf.predict(val)
    print("TEST ERMS ACCURACY", mean_squared_error(test_out, predict_test),
          acc_manual(test_out, predict_test))
    print("VAL ERMS ACCURACY", mean_squared_error(val_out, predict_val),
          acc_manual(val_out, predict_test))
Exemple #20
0
    def compute(self):
        # Iterate Leave-One-Out Index over all vectors
        actual_matrix = self.get_actual_data_matrix()
        for params_list_index in range(len(self._params_list)):
            params = self._params_list[params_list_index]
            current_params_result = self._params_result_list[params_list_index]
            for loo_index in range(self.get_vector_count()):
                # Prepare data and labels for current leave one out
                train_data = [[
                    0 for x in range(self.get_actual_feature_count())
                ] for y in range(self.get_vector_count() - 1)]
                train_labels = [
                    0 for x in range(0,
                                     self.get_vector_count() - 1)
                ]
                test_data = [[
                    0 for x in range(0, self.get_actual_feature_count())
                ] for y in range(1)]
                test_labels = [0 for x in range(1)]
                y1 = 0
                for y in range(self.get_vector_count()):
                    if (y != loo_index):
                        for x in range(self.get_actual_feature_count()):
                            train_data[y1][x] = actual_matrix[y][x]
                        train_labels[y1] = self._labels[y]
                        y1 = y1 + 1
                for x in range(self.get_actual_feature_count()):
                    test_data[0][x] = actual_matrix[loo_index][x]
                    test_labels[0] = self._labels[loo_index]

                #clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
                clf = MLPClassifier(**params)
                clf.fit(train_data, train_labels)
                res = clf.predict(test_data)
                current_params_result.predicted_labels[loo_index] = res[0]
                #print(repr(self.get_labels()[loo_index])+"\t"+repr(res[0]))

            self._commit_params_computation(params_list_index)
        self._complete_computation()
Exemple #21
0
def ex_2_1(input2, target2):

    target2 = np.transpose(target2)
    target2 = target2[1]
    nn = MLPClassifier(hidden_layer_sizes=(8, ),
                       activation='tanh',
                       solver='adam',
                       max_iter=200)

    model = nn.fit(input2, target2)

    y_predict = model.predict(input2)

    C = confusion_matrix(y_predict, target2)
    print(C)
    hidden_layer_weights = model.coefs_

    plot_hidden_layer_weights(hidden_layer_weights[0])

    pass
Exemple #22
0
def ex_2_2(input1, target1, input2, target2):

    target1 = np.transpose(target1)
    target1 = target1[0]
    target2 = np.transpose(target2)
    target2 = target2[0]

    acc_train = np.zeros((10, ))
    acc_test = np.zeros((10, ))
    max = -1
    for i in range(10):
        nn = MLPClassifier(random_state=i,
                           hidden_layer_sizes=(20, ),
                           activation='tanh',
                           solver='adam',
                           max_iter=1000)

        model = nn.fit(input1, target1)
        acc_train[i] = model.score(input1, target1)
        acc_test[i] = model.score(input2, target2)
        if acc_test[i] > max:
            max = acc_test[i]
            y_predict = model.predict(input2)
            C = confusion_matrix(target2, y_predict)
    k = 0
    for i, a in enumerate(target2):
        if a != y_predict[i] and k < 20:
            plot_image(input2[i])
            k = k + 1

    hidden_layer_weights = model.coefs_
    plot_hidden_layer_weights(hidden_layer_weights[0])

    plot_histogram_of_acc(acc_train, acc_test)
    print(C)

    pass
    a = Atomizer('learn')
    e = FeaturesExtractor()

    p = InputDataProcessor(a, e, (0.2, 0.8))
    r = InputDataReader(p)


    r.read(part, start, end)
    print_time_interval("feature extraction")
    (X, y) = r.read_features('part{}_{}_{}.csv'.format(part, start, end))
    print_time_interval("reading serialized features")
    n = MLPClassifier(solver=solver, hidden_layer_sizes=(hidden, hidden),  verbose=True, activation='tanh', tol = 0.0)
    print(n)
    n = pickle.load( open( "network.bin", "rb" ) )
    n.fit(X, y)
    print_time_interval("network learning")
    save(n)
    #a = Atomizer('test')
    #e = FeaturesExtractor()

    #t = Tester(a, e, n, 0.8)
 
    #test_file = r.get_file("dataSets/part{}/suspicious-document{:05d}".format(8, 500 * (8 - 1) + 1))
    #b = t.is_plagiarised(test_file)
    #print('odpowiedz systemu: ' + str(b[0]))

    #print('stan rzeczywisty: ' + str(not not test_file['metadata']))
    #print_time_interval()
    end
finally:
Exemple #24
0
#getting MNIST of size 70k images
dataset = fetch_mldata("MNIST original")
X = np.array(dataset.data)  #Our Features
y = np.array(dataset.target)  #Our labels

X = X.astype('float32')

#splitting Dataset into Training and Testing dataset
#First 60k instances are for Training and last 10k are for testing
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

#Normalizing Our Features in range 0 and 1
X_train = X_train / 255
X_test = X_test / 255

#creating Neural Network
# Neural Network has one hidden layer with 512 units
# Neural NetWork is of size 784-512-10

mlp = MLPClassifier(hidden_layer_sizes=(512), max_iter=500, verbose=True)

#fitting our model
mlp.fit(X_train, y_train, epoch=50)

print("Training set score: %f" % mlp.score(X_train, y_train))  #output : 0.99
print("Test set score: %f" % mlp.score(X_test, y_test))  #output :0.98

#saving our model
joblib.dump(mlp, "model.pkl")
Exemple #25
0
df = encode_data(df)
df = delete_columns(df)
df, label = seperate_label(df)
df, scaler = scale_columns(df)

pickle.dump(scaler, open('./scaler.model', 'wb'))

x_train, x_test, y_train, y_test = train_test_split(df, label, test_size=.5)

# classifier=tree.DecisionTreeClassifier()
# classifier.fit(x_train,y_train)
# predictions=classifier.predict(x_test)

classifier = MLPClassifier()
classifier.fit(x_train, y_train)
predictions = classifier.predict(x_test)

print("Accuracy:", accuracy_score(y_test, predictions))

pickle.dump(classifier, open("model.model", 'wb'))

print(
    "Training completed. \nModel dumped succesfully..\n  -----------------------"
)

###############Evaluating#################

data = pd.read_csv("ITData_eval-unlabeled.csv")
data.columns = header
df2 = data.drop(['Satisfaction'], axis=1)
		if mode == 'train':
			print "training"
			obj = ExerciseDataProvider(".")
			X = obj.x[:,0:125]
			y = obj.t
			Xt = obj.xt[:,0:125]
			yt = obj.tt
			print "input vec shape: ", X.shape
			# print y.shape
			# print X.shape[-1]
			clf_t = MLPClassifier(algorithm='l-bfgs',
			                      alpha=1e-5,
				     			  hidden_layer_sizes=(X.shape[-1], 19),
								  random_state=1,
								  spectral_mode='fft')
			clf_t.fit(X, y)

			with open('/afs/inf.ed.ac.uk/user/s12/s1235260/model_spec3.pkl', 'wb') as m:
				p.dump((clf_t, Xt, yt) , m)

		else:
			with open('/afs/inf.ed.ac.uk/user/s12/s1235260/model_spec3.pkl', 'rb') as m:
				clf, Xt, yt = p.load(m)
			y2 = clf.predict(Xt)
			print clf.coefs_[0].shape #.shape
			print y2, yt
			print len(y2), len(yt)
			acc = sum(y2==yt) / float(len(y2))
			print acc
	    #"""
	else:
Exemple #27
0
from sklearn.neural_network.multilayer_perceptron import MLPClassifier
from sklearn import datasets
from sklearn.metrics import accuracy_score

iris = datasets.load_iris()
data = iris.data
labels = iris.target

# We add max_iter=1000 becaue the default is max_iter=200 and
# it is not enough for full convergence
mlp = MLPClassifier(random_state=1, max_iter=1000)
mlp.fit(data, labels)

pred = mlp.predict(data)

print()
print('Accuracy: %.2f' % accuracy_score(labels, pred))
Exemple #28
0
def classify_mlp(data_path):
    result_path = '%s/mlp_results.txt' % os.path.abspath(
        os.path.join(os.path.dirname(data_path),
                     os.path.join(os.pardir, os.pardir)))
    if os.path.exists(result_path):
        if data_path in open(result_path).read():
            return True
    print(data_path)

    fname = "{}/train_labels.csv".format(data_path)
    if not os.path.exists(fname):
        return True
    tr_labels = np.loadtxt(fname)

    fname = "{}/train_embeddings.csv".format(data_path)
    tr_embeddings = np.loadtxt(fname)

    fname = "{}/val_labels.csv".format(data_path)
    val_labels = np.loadtxt(fname)

    fname = "{}/val_embeddings.csv".format(data_path)
    val_embeddings = np.loadtxt(fname)

    fname = "{}/test_labels.csv".format(data_path)
    te_labels = np.loadtxt(fname)

    fname = "{}/test_embeddings.csv".format(data_path)
    te_embeddings = np.loadtxt(fname)

    clf = MLPClassifier(random_state=2,
                        max_iter=200000000,
                        hidden_layer_sizes=(64, ))
    clf.fit(tr_embeddings, tr_labels)

    tr_score = clf.score(tr_embeddings, tr_labels)
    val_score = clf.score(val_embeddings, val_labels)
    te_score = clf.score(te_embeddings, te_labels)

    tr_predictions = clf.predict(tr_embeddings)
    val_predictions = clf.predict(val_embeddings)
    te_predictions = clf.predict(te_embeddings)

    tr_fscore = f1_score(tr_predictions, tr_labels, average="weighted")
    val_fscore = f1_score(val_predictions, val_labels, average="weighted")
    te_fscore = f1_score(te_predictions, te_labels, average="weighted")
    print("tr_score %s" % tr_score)
    print("val_score %s" % val_score)
    print("te_score %s" % te_score)
    with open(result_path, mode='a') as f:
        f.write(
            'Data Path: %s\tTrain Accuracy:%s\tVal Accuracy:%s\tTest Accuracy:%s\tTrain FScore:%s\tVal FScore:%s\tTest FScore:%s\n'
            % (data_path, tr_score, val_score, te_score, tr_fscore, val_fscore,
               te_fscore))

    conf_mat = confusion_matrix(te_labels, te_predictions)
    labels = sorted(list(set(list(te_labels))))
    plot_confusion_matrix(conf_mat,
                          classes=labels,
                          normalize=True,
                          title='Normalized confusion matrix',
                          output=data_path,
                          path_name='mlp_confusion_matrix',
                          alg='mlp')
Exemple #29
0
X = X.astype('float32')

#splitting Dataset into Training and Testing dataset
#First 60k instances are for Training and last 10k are for testing
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

#Normalizing Our Features in range 0 and 1
X_train = X_train / 255
X_test = X_test / 255

#creating Neural Network
# Neural Network has one hidden layer with 240 units
# Neural NetWork is of size 784-240-10

mlp = MLPClassifier(hidden_layer_sizes=(240), max_iter=500, verbose=True)

#fitting our model
mlp.fit(X_train, y_train)
'''
Final Output:
Iteration 33, loss = 0.00299869
'''

print("Training set score: %f" % mlp.score(X_train, y_train))  #output : 0.99
print("Test set score: %f" % mlp.score(X_test, y_test))  #output :0.98

#saving our model
joblib.dump(mlp, "model.pkl")
Exemple #30
0
print(type(data))
print(type(row))
print(type(col))
fea_data_set=csr_matrix((data,(row,col)),shape=(row_index,max_col+1))
svd=TruncatedSVD(30)
svd.fit(fea_data_set)
x_new=svd.fit_transform(fea_data_set)
# pca=PCA(n_components=30)
# pca.fit(fea_data_set)
# x_new=pca.transform(fea_data_set)
xtrain,xtest,ytrain,ytest=train_test_split(x_new,label,test_size=0.2)
lg.fit(xtrain,ytrain)
nb.fit(xtrain,ytrain)
forest.fit(xtrain,ytrain)
SVM.fit(xtrain,ytrain)
mlp.fit(xtrain,ytrain)
print("------------")
print(lg.score(xtest,ytest))
print(np.mean(lg.predict(xtest)-ytest)**2)
print(lg.score(xtrain,ytrain))
print(np.mean(lg.predict(xtrain)-ytrain)**2)
print("------------")
print(nb.score(xtest,ytest))
print(np.mean(nb.predict(xtest)-ytest)**2)
print(forest.score(xtest,ytest))
print(np.mean((forest.predict(xtest)-ytest)**2))
print(SVM.score(xtest,ytest))
print(np.mean((SVM.predict(xtest)-ytest)**2))
print(mlp.score(xtest,ytest))
print(np.mean((mlp.predict(xtest)-ytest)**2))
#训练了4个模型,分别是测试集为80%,70%,50%,30%的效果
Exemple #31
0
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
#fit only to the training data
scaler.fit(X)

StandardScaler(copy=True, with_mean=True, with_std=True)

#now apply the transformations to the data:
x_train_nn = scaler.transform(X)
x_test_nn = scaler.transform(X_test)

nn = MLPClassifier(solver='lbfgs',
                   alpha=1e-5,
                   hidden_layer_sizes=(5, 2),
                   random_state=1)
print(nn.fit(x_train_nn, y))
print('Neural network model:')
nn_pred_test = nn.predict(x_test_nn)
#compute confusion matrix
from sklearn import metrics
#pred_obj = np.where(predictions==predictions[0],'N','Y')
#print(pred_obj)

cnf_matrix = metrics.confusion_matrix(y_test, nn_pred_test)
print(cnf_matrix)

#compute roc cureve
import matplotlib.pyplot as plt
y_pred_proba = nn.predict_proba(X_test)[::, 1]
y_binary = np.where(y == 'N', 0, 1)
fpr, tpr, _ = metrics.roc_curve(y_binary, y_pred_proba)
Exemple #32
0
def classify(data_path, path=None, counter=None, alg='svm'):
    out = os.path.join(data_path, '%s_%s_%s' % (alg, path, 'confusion.png'))
    if os.path.exists(out):
        return True
    fname = "{}/labels.csv".format(data_path)
    paths = pd.read_csv(fname, header=None).as_matrix()[:, 1]
    paths = map(os.path.basename, paths)  # Get the filename.
    # Remove the extension.
    paths = map(lambda x: x.split(".")[0], paths)
    paths = np.array(map(lambda path: os.path.splitext(path)[0], paths))

    fname = "{}/reps.csv".format(data_path)
    rawEmbeddings = pd.read_csv(fname, header=None).as_matrix()
    # print(rawEmbeddings.shape, paths.shape)
    folds = cross_validation.KFold(n=len(rawEmbeddings),
                                   random_state=1,
                                   n_folds=10,
                                   shuffle=True)
    scores = []
    fscores_weighted, fscores_macro, fscores_micro = [], [], []
    for idx, (train, test) in enumerate(folds):
        print idx, alg
        if alg == 'knn':
            clf = neighbors.KNeighborsClassifier(1)
        elif alg == 'svm':
            clf = svm.SVC(kernel='linear', C=1, max_iter=200000000)
            # clf = svm.LinearSVC()
            # clf = svm.SVC(kernel="poly", degree=5, C=1, verbose=10)
        elif alg == 'nn':
            # clf = MLPClassifier(random_state=2, max_iter=200000000)
            clf = MLPClassifier(random_state=2,
                                max_iter=200000000,
                                hidden_layer_sizes=(96, 64, 32))
        elif alg == 'nnd':
            # clf = MLPClassifier(random_state=2, max_iter=200000000)
            clf = MLPClassifier(random_state=2, max_iter=200000000)
        elif alg == 'poly':
            clf = svm.SVC(kernel="poly", max_iter=200000000)
        elif alg == 'rf':
            clf = RandomForestClassifier()
        clf.fit(rawEmbeddings[train], paths[train])
        gc.collect()
        score = clf.score(rawEmbeddings[test], paths[test])
        # print score, alg
        scores.append(score)
        prediction = clf.predict(rawEmbeddings[test])
        fscore_weighted = f1_score(paths[test], prediction, average="weighted")
        fscores_weighted.append(fscore_weighted)

        fscore_macro = f1_score(paths[test], prediction, average="macro")
        fscores_macro.append(fscore_macro)

        fscore_micro = f1_score(paths[test], prediction, average="micro")
        fscores_micro.append(fscore_micro)
    accuracy_dir = os.path.abspath(
        os.path.join(data_path, 'accuracies_%s.txt' % alg))

    with open(accuracy_dir, "wb") as file:
        for i in scores:
            file.writelines("%s,%s\n" % (str(i), str(counter)))
    # print "KNN Avg. score %s" % (reduce(operator.add, scores) / len(folds))
    # print "MLP Avg. score %s" % (reduce(operator.add, scores3) / len(folds))
    print "Avg. score %s" % (reduce(operator.add, scores) /
                             len(folds)), data_path
    result_path = "{}/{}_{}.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, scores) / len(folds))), str(counter), alg))
    fscores_weighted_result_path = "{}/{}_{}_fscores_weighted.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_weighted_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_weighted) / len(folds))),
                                      str(counter), alg))

    fscores_macro_result_path = "{}/{}_{}_fscores_macro.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_macro_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_macro) / len(folds))), str(counter),
                                      alg))

    fscores_micro_result_path = "{}/{}_{}_fscores_micro.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_micro_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_micro) / len(folds))), str(counter),
                                      alg))