Ejemplo n.º 1
0
def perceptron_accuracy(weights, ap=False):
    accuracy = 0
    right = 0
    for i in range(0, 208):
        if not ap:
            v = p.predict(data[i], weights=weights)
        else:
            v = p.predict(data[i], weights=weights, ap=True)
        """
        if not ap:
            if v >= .5:
                v = 1
            if v < .5:
                v = 0
        else:
            if v < .5:
                v = 1
            if v >= .5:
                v = 0
        """
        if not ap:
            if v == data[i][-1]:
                right += 1
        else:
            if v != data[i][-1]:
                right += 1

    accuracy = (right / 208) * 100
    return accuracy
Ejemplo n.º 2
0
def test_predict():
    xx = np.array([[3, 1], [1, 3]])
    w = np.array([3, -3])
    b = 0
    predictions = perceptron.predict(xx, w, b)
    assert predictions[0] == 1
    assert predictions[1] == 0
Ejemplo n.º 3
0
def test_train():
    w, b = np.array([.0, .0]), .0
    w, b, costs = trainer.train(allx(), ally(), w, b, 1000)
    assert almost_eq(costs[-1], 0.2)

    predictions = perceptron.predict(np.array([[3, 1], [1, 3]]), w, b)
    assert predictions[0] == 1
    assert predictions[1] == 0
Ejemplo n.º 4
0
def accuracy(data, weights, K):
    correct = 0
    for i in range(0, len(data)):
        label = data[i]['label']
        prediction = predict(data, weights, i, K)
        if int(label) == prediction:
            correct += 1
    return float(correct)/len(data)
Ejemplo n.º 5
0
def evaluate_circuit(n = 208, eval_perceptron = True):
        
    test_idxs = np.random.choice(data.shape[0], n, replace = False)
    correct_circuit = 0
    accuracy_circuit = 0.0
    
    correct_perceptron = 0
    accuracy_perceptron = 0.0
    #test_idxs = 208
    for i in test_idxs:
        #print(trial(weights, p_weights, ap_weights, i, plot = False))
        #print(data[i][-1])
        #print()
        trial_result = trial(weights, p_weights, ap_weights, i, plot = False, num = 1)
        if trial_result[0] == data[i][-1]:
            correct_circuit += 1
        
        if(eval_perceptron):
            #This uses multiple noisy training rows over n epochs, should we instead train the perceptron on a single noisy row n times?
            perceptron_training_data = data[test_idxs, :-1]
            perceptron_noisy_data = perceptron_training_data + np.random.rand(n, 60) 
            classes = data[[test_idxs], [-1]]
            classes = classes.reshape(n, 1)           
            perceptron_noisy_data = np.append(perceptron_noisy_data, classes, axis = 1)
            
            perceptron = p.gradient_descent(perceptron_noisy_data, 0.1, trial_result[1])
            
            if p.predict(data[i], perceptron) == data[i][-1]:
                correct_perceptron += 1
    #Train perceptron on noisy data for as many epochs as it took the circuit to decide
    #determine perceptrons accuracy 
    
    #print(perceptron)
    accuracy_circuit = (correct_circuit / n) * 100
    accuracy_perceptron = (correct_perceptron / n) * 100
    print("circuit accuracy: ", accuracy_circuit)
    print("perceptron accuracy:", accuracy_perceptron)
Ejemplo n.º 6
0
# Dans le format de votre classifier

TP_perceptron = []
FP_perceptron = []
TP_KNNWeight = []
FP_KNNWeight = []
TP_KNN = []
FP_KNN = []
TP_NN = []
FP_NN = []
Xtr, ytr, Xte, yte = split(X, y)

for k in range(len(np.unique(yte, return_counts=False))):
    labels_k = perceptron.two_classes(ytr, k)
    weights, errors = perceptron.train(Xtr, labels_k, with_errors=True)
    pred = np.array([perceptron.predict(weights, x) for x in Xte])
    TP_perceptron.append(true_positive_perceptron(pred, yte, k))
    FP_perceptron.append(false_negative_perceptron(pred, yte, k))
    # For each class we have a TP and FP

S = [0.1, 0.2, 0.5, 1, 2, 5]
K = [2, 3, 4, 5, 10, 15]
z1 = predict(Xte, Xtr, ytr)

TP_NN.append(true_positive(z1, yte))
FP_NN.append(false_negative(z1, yte))
s = 0.1
k = 10

Z1 = []
Z2 = []
Ejemplo n.º 7
0
from perceptron import train, predict

# We can use any set of Xs and Ys to fit the perceptron
# A perceptron training will only converge if the relationship between X&Y is linear
# (i.e., linearly separable data)

# Data given for the assignment
X = [[0.25, 0.353], [0.25, 0.471], [0.5, 0.353], [0.5, 0.647], [0.75, 0.705],
     [0.75, 0.882], [1, 0.705], [1, 1]]
Y = [0, 1, 0, 1, 0, 1, 0, 1]

W, b = train(X, Y, learning_rate=0.1)

# Checking if the converged values are correct
print('Prediction is correct?', predict(X, W, b) == Y)
Ejemplo n.º 8
0
    print("Loaded data.")

    X_train, Y_train = reviews_to_features(training_set)
    print("Featurized training data.")

    weights, losses = perceptron.train(X_train,
                                       Y_train,
                                       iterations=ITERATIONS,
                                       eta=ETA)
    print("Done training.")

    X_test, Y_test = reviews_to_features(dev_set)
    print("Featurized test data.")

    test_scores = perceptron.score(X_test.T, weights)
    test_sentiments = perceptron.predict(test_scores)
    (accuracy, recall, precision, f1, false_positive_rate,
     false_negative_rate) = perceptron.test(Y_test, test_sentiments)
    print(
        f"Predicted scores w/ threshold = 0.5, iterations = {ITERATIONS}, eta = {ETA}:"
    )
    print(f"  - accuracy      : {accuracy}")
    print(f"  - recall        : {recall}")
    print(f"  - precision     : {precision}")
    print(f"  - f1            : {f1}")
    print(f"  - fpr           : {false_positive_rate}")
    print(f"  - fnr           : {false_negative_rate}")

    if ENABLE_ROC:
        fpr = []
        fnr = []
Ejemplo n.º 9
0
x.fillna(0, inplace=True)
y = np.squeeze(a4a.iloc[:, 0])

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=0)

decay_grid = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
a4a_evaluation = dict()
weight_matrix = dict()
for i, decay_rate in enumerate(decay_grid):
    w = perceptron(x_train, y_train, decay_rate=decay_rate)
    weight_matrix[decay_rate] = w

    y_pred = predict(x_test, w)
    accuracy, misclassification_rate = measure_accuracy(y_pred, y_test)

    a4a_evaluation[decay_rate] = [
        round(accuracy, 2),
        round(misclassification_rate, 2)
    ]

a4a_evaluation_df = pd.DataFrame(a4a_evaluation,
                                 index=['Accuracy', 'Misclassification Rate'])

labels = list(a4a_evaluation.keys())
acc = list(a4a_evaluation_df.loc['Accuracy'].values)
err = list(a4a_evaluation_df.loc['Misclassification Rate'].values)

x = np.arange(len(labels))
Ejemplo n.º 10
0
n = 10
d = 2

data0 = np.random.randn(n, d) + 3
data1 = np.random.randn(n, d) - 3
data = np.concatenate([data0, data1])

labels = np.concatenate([np.zeros(n),
                         np.ones(n)])  # Deux classes etiquettees 0 et 1
labels = perceptron.two_classes(labels, 0)  # Deux classes etiquettees -1 et 1

weights, errors = perceptron.train(data, labels, with_errors=True)
print(weights)

for i in range(data.shape[0]):
    print(i, labels[i], perceptron.predict(weights, data[i]), data[i])

pyplot.scatter(data0[:, 0], data0[:, 1], marker="x", color="r", s=100)
pyplot.scatter(data1[:, 0], data1[:, 1], marker="*", color="b", s=100)
x0 = 0
y0 = -weights[2] / weights[1]
x1 = -weights[2] / weights[0]
y1 = 0
a = (y1 - y0) / (x1 - x0)
b = y0
pyplot.plot([-10, +10], [-10 * a + b, +10 * a + b], color="g")
pyplot.xlim(-6, 6)
pyplot.ylim(-6, 6)
pyplot.show()

pyplot.plot(errors)
Ejemplo n.º 11
0
    label_list = label_array.tolist()
    return size, label_list


# calculate accuracy
def accuracy(predictions, targets, size):
    correct_num = 0
    for i in range(size):
        if predictions[i] == targets[i]:
            correct_num += 1
    return correct_sum / size


if __name__ == "__main__":
    train_size, train_label = read_label("train_label")
    train_size, rows, cols, train_data = read_data("train_data")
    test_size, test_label = read_label("test_label")
    test_size, rows, cols, test_data = read_data("test_data")
    # learning rate = 0.1
    print("Learning rate = 0.1")
    weights = perceptron.train_perceptron(train_data, train_label, train_size,
                                          rows, cols, 0.1)
    print(
        "Training accuracy",
        accuracy(perceptron.predict(weights, train_size, train_data),
                 train_label, train_size))
    print(
        "Testing accuracy",
        accuracy(perceptron.predict(weights, test_size, test_data), test_label,
                 test_size))
Ejemplo n.º 12
0
def main():
    #Prove accuracy by comparing to perceptron and running over all examples

    #For a row n in the dataset
    #for row in dataset:
    #row = dataset[162]
    v = np.array([[0, 0, 0, 0]]).T
    #v_hist = np.array([[0, 0, 0, 0,]]).T
    tau = 1
    dt = 0.05
    steps = 0
    v_hist = np.array([[0, 0, 0, 0]]).T
    #Repeat until the decision network makes a classification - output switch units,
    # *need to decide when a decision is made
    #Now, a decision is reached when the activation of unit 3 or 4 is >= 1.1
    while (v[3][0] <= 1.1) and (v[2][0] <= 1.1) and (steps < 10):
        steps += 1
        row = dataset[162]
        #Make sure the data isnt negative
        noise = np.random.rand(1, 60)
        noise = np.append(noise, 0)
        #print(noise)

        #Add noise to the row
        noisyRow = np.add(noise, row)

        #Let the perceptron and antiperceptron predict it - unthresholded
        p_prediction = p.predict(noisyRow, p_weights)
        ap_prediction = p.predict(noisyRow, ap_weights)

        #feed those values into the decision network
        activations = weights @ v
        print(activations)
        activations[0][0] = p_prediction + activations[0][0]
        activations[1][0] = ap_prediction + activations[1][0]

        activations = sigmoid(l, activations, bias)
        v_hist = np.concatenate((v_hist, v), axis=1)
        #if (steps > 5):
        #    print(signal.savgol_filter(v_hist[3,:], 5, 4))
        #if (steps == 5):
        #    print(p_prediction)
        #    print(ap_prediction)
        #    print(v_hist)
        dv = tau * ((-v + activations) * dt + noise * np.sqrt(dt) *
                    np.random.normal(0, 1,
                                     (4, 1)))  # add noise using np.random
        v = v + dv

        if (v[3][0] > 1) or (v[2][0] > 1):
            break

    plt.figure()
    plt.plot(v_hist[0, :], dashes=[2, 2])
    plt.plot(v_hist[1, :], dashes=[1, 1])
    plt.plot(v_hist[2, :], dashes=[2, 2])
    plt.plot(v_hist[3, :], dashes=[3, 3])
    #plt.plot(v2_v1_diff, dashes = [5,5])
    plt.legend(["v1", "v2", "v3", "v4"], loc=0)
    plt.ylabel("activation")
    plt.xlabel("steps")
    plt.grid('on')
    plt.show()

    #plt.figure()
    #smoothed_v4 = signal.savgol_filter(v_hist[3,:], 901, 4)
    #smoothed_v3 = signal.savgol_filter(v_hist[2,:], 901, 4)
    #plt.plot(smoothed_v4)
    #plt.plot(smoothed_v3)
    #plt.legend(["smooth v4", "smooth v3"], loc = 0)
    #plt.grid('on')
    #plt.show()

    print(steps)
    print(steps * 60)
Ejemplo n.º 13
0
def file_main():
    data_set = [['ACD', 0.0231, 1.157, 0.919, 93.061, 0.0917],
                ['ACD', 0.0296, 1.1183, 0.9356, 80.9492, 0.0681],
                ['ACD', 0.0471, 1.3537, 1.0208, 108.7305, 0.091],
                ['ACD', 0.0165, 1.2621, 1.1879, 116.3081, 0.1154],
                ['ACD', 0.0236, 1.117, 0.8673, 77.9446, 0.066],
                ['ACD', 0.008, 1.413, 1.0474, 102.6556, 0.07],
                ['ACD', 0.0267, 1.4068, 1.1244, 107.5716, 0.0734],
                ['ACD', 0.0838, 1.1258, 1.0406, 100.2574, 0.0474],
                ['ACD', 0.0225, 1.2126, 0.9824, 98.885, 0.0928],
                ['ACD', 0.0639, 2.1101, 1.2162, 137.5727, 0.159],
                ['ACD', 0.0021, 0.8333, 0.7004, 68.5042, 0.0464],
                ['ACD', 0.0208, 1.5963, 1.0204, 142.5501, 0.1329],
                ['HM', 0.461, 2.1225, 1.5204, 133.2334, 0.0623],
                ['HM', 0.2118, 1.5373, 1.2326, 99.011, 0.0808],
                ['HM', 0.2308, 2.3465, 1.3419, 106.459, 0.0548],
                ['HM', 0.5372, 2.171, 1.8759, 135.6919, 0.0602],
                ['HM', 0.318, 2.1527, 1.1671, 130.0122, 0.0651],
                ['HM', 0.2434, 2.3092, 1.6817, 179.5259, 0.1192],
                ['HM', 0.4191, 1.5634, 0.8894, 117.2704, 0.0265],
                ['HM', 0.5952, 2.6538, 1.5957, 152.4041, 0.0752],
                ['HM', 0.3963, 2.0715, 1.2956, 124.8764, 0.094],
                ['HM', 0.1638, 1.8827, 1.0938, 105.0277, 0.0384],
                ['HM', 0.2752, 3.0803, 1.6789, 146.2936, 0.0803],
                ['HM', 0.4227, 1.6529, 0.8303, 84.3475, 0.0399]]
    # # decision tree training set
    # data_set = []
    # # Arthur Conan Doyle
    # data_set.append(process("lost_world.txt", "ACD"))
    # data_set.append(process("sherlock.txt", "ACD"))
    # data_set.append(process("study_in_scarlet.txt", "ACD"))
    # data_set.append(process("baskervilles.txt", "ACD"))
    # data_set.append(process("sign_four.txt", "ACD"))
    # data_set.append(process("return.txt", "ACD"))
    # data_set.append(process("memoirs.txt", "ACD"))
    # data_set.append(process("valley.txt", "ACD"))
    # data_set.append(process("tales_terror.txt", "ACD"))
    # data_set.append(process("white_company.txt", "ACD"))
    # data_set.append(process("last_bow.txt", "ACD"))
    # data_set.append(process("boer_war.txt", "ACD"))
    #
    # # Herman Melville
    # data_set.append(process("moby_dick.txt", "HM"))
    # data_set.append(process("bartleby.txt", "HM"))
    # data_set.append(process("confidence_man.txt", "HM"))
    # data_set.append(process("pierre.txt", "HM"))
    # data_set.append(process("white_jacket.txt", "HM"))
    # data_set.append(process("typee.txt", "HM"))
    # data_set.append(process("battle_pieces.txt", "HM"))
    # data_set.append(process("redburn.txt", "HM"))
    # data_set.append(process("omoo.txt", "HM"))
    # data_set.append(process("israel_potter.txt", "HM"))
    # data_set.append(process("my_chimney.txt", "HM"))
    # data_set.append(process("mardi.txt", "HM"))
    # Decision tree test data
    shuffle(data_set)
    # Read test data and process it
    file = open("test_set_file.txt", "r")
    test_data = []
    get_test_data(file, test_data)
    processed_test_data = []
    for data_point in test_data:
        processed_test_data.append(
            process_string([data_point.author], data_point.data))
    correct_count = 0
    total_count = 0
    # Decision tree classifier usage
    decision_tree = dt.DecisionTree(data_set, ["HM", "ACD"], 4)
    processed_test_data.extend(data_set)
    shuffle(processed_test_data)
    for data_point in processed_test_data:
        total_count += 1
        node = decision_tree.tree
        while node.FINAL_LABEL == "":
            if data_point[node.att_index] <= node.threshold:
                node = node.left
            elif data_point[node.att_index] > node.threshold:
                node = node.right
        if node.FINAL_LABEL == data_point[0]:
            correct_count += 1
    # Perceptron classifier
    processed_test_data.extend(copy.deepcopy(data_set))
    for data_point in data_set:
        if "ACD" in data_point[0]:
            data_point[0] = 1
        elif "HM" in data_point[0]:
            data_point[0] = 0
    for data_point in processed_test_data:
        if "ACD" in data_point[0]:
            data_point[0] = 1
        elif "HM" in data_point[0]:
            data_point[0] = 0
    weights = pt.train_perceptron(processed_test_data, 0.01, 20000)
    for data_point in processed_test_data:
        total_count += 1
        prediction = pt.predict(data_point, weights)
        if data_point[0] == int(prediction):
            correct_count += 1
    print("Perceptron weights: " + str(weights))
    print("Total correct: " + str((correct_count / total_count) * 100))
    print("done")
Ejemplo n.º 14
0
def runTrials(trial, trainingDigits, testingDigits, testingLabelsData , flattenedTestDigits, percent):
    

    testIndices = []
    guesses = []
    #Start timing bayes
    bayesStart = time.process_time()
    learningDigits = classifier.pickData(trainingDigits, percent)

    formattedDigits = classifier.getFormattedTraining(learningDigits)
            
    condProbCounters = classifier.getCondProbs(learningDigits)
    for i in range(0, len(testingDigits)):
        

        testIndices.append(i)
        #lp = LineProfiler()
        #lp_wrapper = lp(classifier.naiveBayes)
        #guesses.append(lp_wrapper(i, testingDigits, formattedDigits, percent, condProbCounters))
        #lp.print_stats()
        guesses.append(classifier.naiveBayes(i, testingDigits, formattedDigits, percent, condProbCounters))
    
    #End timing bayes
    bayesEnd = time.process_time()
    bayesTrainingTime = bayesEnd - bayesStart


    numCorrect = 0
    for tup in guesses:
        digitIndex = tup[0]
        guess = tup[1]
        if( int(guess) == int(testingLabelsData[digitIndex])):
            numCorrect+=1
    accBayes = (numCorrect/len(testingDigits)) * 100
    print("Accuracy for Naive Bayes classifier trial: " + str(trial) + " is: " + str(accBayes))

    #Begin perceptron training and prediction:
    numPixels = classifier.digitRowLen * classifier.digitColLen
    WVectors = perceptron.initializeWeightVecs(numPixels)

    
    #Start training time for perceptron
    startTime = time.process_time()
    for iteration in range(0, maxIter):
        #pool.starmap(perceptron.train, zip(itertools.repeat(WVectors), itertools.repeat(trainingDigits), itertools.repeat(percent)))
        perceptron.train(WVectors, trainingDigits, percent, iteration)

    #End training time
    endTime = time.process_time()
    perceptronTrainingTime = endTime - startTime 

    numCorrect = 0
    for k in range(0, len(flattenedTestDigits)):
	    testDigit = flattenedTestDigits[k]
	    guess = perceptron.predict(WVectors, testDigit)
	    if(guess == int(testingLabelsData[k])):
		    numCorrect+=1
    acc = (numCorrect/len(testingDigits)) * 100
    print("Accuracy for Perceptron trial: " + str(trial) + " is: " + str(acc))

        
    percentFolderPath = "{0:d} percent/".format(int(percent*100))


    with open("trainingData/" + percentFolderPath + "output{0:d}.txt".format(trial), "w+") as f:
            with redirect_stdout(f):
                print("%s %s %s %s" % ("Bayes Acc: ", str(accBayes) , "Bayes training time: ", str(bayesTrainingTime)))
                print("%s %s %s %s" % ("Percep Acc: ", str(acc), "Percep training time: ", str(perceptronTrainingTime)))
Ejemplo n.º 15
0
# instance perceptron
perceptron = perceptron.Perceptron()
perceptron.input_length = 4  # set input length

# prepare dataset
iris_dataset = generate_iris_dataset()
ts_input_iris = np.array([specs[0] for specs in iris_dataset])  # input data
ts_output_iris = np.array([specs[1]
                           for specs in iris_dataset])  # expected output data

# train
perceptron.train(ts_input_iris, ts_output_iris)

# test al dataset


def test_all():
    for ts_input, expected in zip(ts_input_iris, ts_output_iris):
        output = perceptron.predict(ts_input)
        expected = 'OK' if expected == output else 'FAIL'
        iris_type_name = "Iris-setosa" if output == 1 else "Iris-versicolor"
        print(f'Input:{ts_input} Output: {output} = {iris_type_name}')


# you can test a iris measurements
p_input = [5.9, 3.0, 4.2, 1.0]
if (perceptron.predict(p_input) == 1):
    print("Is a Iris setosa")
else:
    print("Is a Iris versicolor")
Ejemplo n.º 16
0
def main():
    data_set = [['ACD', 0.0231, 1.157, 0.919, 93.061, 0.0917],
                ['ACD', 0.0296, 1.1183, 0.9356, 80.9492, 0.0681],
                ['ACD', 0.0471, 1.3537, 1.0208, 108.7305, 0.091],
                ['ACD', 0.0165, 1.2621, 1.1879, 116.3081, 0.1154],
                ['ACD', 0.0236, 1.117, 0.8673, 77.9446, 0.066],
                ['ACD', 0.008, 1.413, 1.0474, 102.6556, 0.07],
                ['ACD', 0.0267, 1.4068, 1.1244, 107.5716, 0.0734],
                ['ACD', 0.0838, 1.1258, 1.0406, 100.2574, 0.0474],
                ['ACD', 0.0225, 1.2126, 0.9824, 98.885, 0.0928],
                ['ACD', 0.0639, 2.1101, 1.2162, 137.5727, 0.159],
                ['ACD', 0.0021, 0.8333, 0.7004, 68.5042, 0.0464],
                ['ACD', 0.0208, 1.5963, 1.0204, 142.5501, 0.1329],
                ['HM', 0.461, 2.1225, 1.5204, 133.2334, 0.0623],
                ['HM', 0.2118, 1.5373, 1.2326, 99.011, 0.0808],
                ['HM', 0.2308, 2.3465, 1.3419, 106.459, 0.0548],
                ['HM', 0.5372, 2.171, 1.8759, 135.6919, 0.0602],
                ['HM', 0.318, 2.1527, 1.1671, 130.0122, 0.0651],
                ['HM', 0.2434, 2.3092, 1.6817, 179.5259, 0.1192],
                ['HM', 0.4191, 1.5634, 0.8894, 117.2704, 0.0265],
                ['HM', 0.5952, 2.6538, 1.5957, 152.4041, 0.0752],
                ['HM', 0.3963, 2.0715, 1.2956, 124.8764, 0.094],
                ['HM', 0.1638, 1.8827, 1.0938, 105.0277, 0.0384],
                ['HM', 0.2752, 3.0803, 1.6789, 146.2936, 0.0803],
                ['HM', 0.4227, 1.6529, 0.8303, 84.3475, 0.0399]]

    if len(sys.argv) > 1:
        if sys.argv[1] != "train" and sys.argv[1] != "predict":
            print("Unknown argument, please enter 'predict' or 'train'")
            sys.exit(1)

        elif sys.argv[1] == "train":
            # Train your model
            model = input(
                "Which model would you like to train ? Perceptron(p) or Decision Tree(d): "
            )
            if model != "p" and model != "d":
                print("Sorry! Wrong argument")

            elif model == "p":
                perceptron_data = copy.deepcopy(data_set)
                for data_point in perceptron_data:
                    if "ACD" in data_point[0]:
                        data_point[0] = 1
                    elif "HM" in data_point[0]:
                        data_point[0] = 0
                shuffle(perceptron_data)
                weights = pt.train_perceptron(perceptron_data, 0.01, 20000)
                predict = input(
                    "A perceptron has been trained. Would you like to make a prediction?(y/n) "
                )
                if predict == "y":
                    filename = input(
                        "Please enter the name of the file containing text for author identification: "
                    )
                    data_value = fp.process(filename, "NA")
                    prediction = pt.predict(data_value, weights)
                    if int(prediction) == 1:
                        print("Author is Arthur Conan Doyle.")
                    elif int(prediction) == 0:
                        print("Author is Herman Melville.")
            elif model == "d":
                max_depth = int(
                    input(
                        "Please enter the maximum depth of the decision tree: "
                    ))
                entropy_cutoff = float(
                    input(
                        "Please enter the entropy cutoff of the decision tree(ideal is 0.0): "
                    ))
                print("Training a decision tree on training data...")
                tree = dt.DecisionTree(shuffle(data_set), ["ACD", "HM"],
                                       max_depth, entropy_cutoff)

                predict = input(
                    "The decision tree has been trained. Would you like to make a prediction?(y/n) "
                )
                if predict == "y":
                    filename = input(
                        "Please enter the name of the file containing text for author identification: "
                    )
                    data_value = fp.process(filename, "NA")

                    node = tree
                    while node.FINAL_LABEL == "":
                        if data_value[node.att_index] <= node.threshold:
                            node = node.left
                        elif data_value[node.att_index] > node.threshold:
                            node = node.right
                    if node.FINAL_LABEL == "ACD":
                        print("The author is Arthur Conan Doyle")
                    else:
                        print("The author is Herman Melville")

        elif sys.argv[1] == "predict":
            filename = sys.argv[2]
            print("Predicting using an existing model: ")
            model_file = open("model_perceptron.txt", "r")
            line = model_file.readline().split(",")
            weights = []
            for weight in line:
                weights.append(float(weight))
            data_value = fp.process(filename, "NA")
            prediction = pt.predict(data_value, weights)
            if int(prediction) == 1:
                print("Author is Arthur Conan Doyle.")
            elif int(prediction) == 0:
                print("Author is Herman Melville")

    else:
        print("Please enter argument 'train' or 'predict'. ")
        sys.exit(1)
Ejemplo n.º 17
0
def test_all():
    for ts_input, expected in zip(ts_input_iris, ts_output_iris):
        output = perceptron.predict(ts_input)
        expected = 'OK' if expected == output else 'FAIL'
        iris_type_name = "Iris-setosa" if output == 1 else "Iris-versicolor"
        print(f'Input:{ts_input} Output: {output} = {iris_type_name}')
Ejemplo n.º 18
0
def accuracy(truth, output):
    n = truth.shape[0]
    accur = 0.
    for i in range(n):
        if truth[i] == output[i]:
            accur += 1
    return accur / n


data, labels = usps.load_train()
data_test, labels_test = usps.load_test()

for k in range(10):
    labels_k = perceptron.two_classes(labels, k)
    weights, errors = perceptron.train(data, labels_k, with_errors=True)

    print(k)
    output = np.array([perceptron.predict(weights, x) for x in data])
    print("  Score (train)", accuracy(labels_k, output))
    output = np.array([perceptron.predict(weights, x) for x in data_test])
    print("  Score (test)",
          accuracy(perceptron.two_classes(labels_test, k), output))

    pyplot.clf()
    pyplot.imshow(weights[:-1].reshape((16, 16)), cmap=pyplot.gray())
    pyplot.colorbar()
    pyplot.savefig("usps_" + str(k) + "-weights.png")

    pyplot.plot(errors)
    pyplot.savefig("usps_" + str(k) + "-errors.png")