Esempio n. 1
0
    updated_weight_vector = weight_vector + modified_feature_vector
    return updated_weight_vector

def get_predicted_value(weight_vector, feature_vector):
    dot_product_value = np.dot(weight_vector, feature_vector)
    predicted_value = 1 if dot_product_value >= 0 else -1
    return predicted_value

if __name__ == '__main__':
    import create_feature_vectors

    # Train on the training set to get weight vector
    print("Running with last trained weight vector")
    (feature_vector_list_training,
     is_spam_list_training,
     vocabulary_list) = create_feature_vectors.run('./output_data/training_set')
    (weight_vector,
     total_number_of_misclassifications,
     number_of_runs) = perceptron_train(
                                feature_vector_list_training, is_spam_list_training,
                                display_intermediate_steps=True)

    print('Total number of misclassifications: ' + str(total_number_of_misclassifications))
    print('Number of runs: ' + str(number_of_runs))

    # Verify error on the training set
    error = perceptron_test(weight_vector, feature_vector_list_training, is_spam_list_training)
    print('Error on training set: ' + error)

    # Verify error on the validation set
    (feature_vector_list_validation,
Esempio n. 2
0
    os.makedirs(output_directory)
else:
    os.makedirs(output_directory)

# Run Script for ps1 (Perceptron Implementation)
import split_training_data
import create_feature_vectors
import perceptron

# Part 1: Split Training data into training and validation set
split_training_data.run()

# Part 2: Transform each email in the training set into a feature vector
(feature_vector_list_training,
 is_spam_list_training,
 vocabulary_list) = create_feature_vectors.run('./output_data/training_set')

# Part 3/4: Train the data on the training set and return the last weight vector. Test the percent
# error when this weight is run on the validation set
print('\n=========================================================================================')
print('Problem 4:')
(weight_vector,
 total_number_of_misclassifications,
 number_of_runs) = perceptron.perceptron_train(feature_vector_list_training, is_spam_list_training)

(feature_vector_list_validation,
 is_spam_list_validation,
 _) = create_feature_vectors.run('./output_data/validation_set', vocabulary_list)

training_set_error = perceptron.perceptron_test(
                            weight_vector, feature_vector_list_training, is_spam_list_training)