updated_weight_vector = weight_vector + modified_feature_vector return updated_weight_vector def get_predicted_value(weight_vector, feature_vector): dot_product_value = np.dot(weight_vector, feature_vector) predicted_value = 1 if dot_product_value >= 0 else -1 return predicted_value if __name__ == '__main__': import create_feature_vectors # Train on the training set to get weight vector print("Running with last trained weight vector") (feature_vector_list_training, is_spam_list_training, vocabulary_list) = create_feature_vectors.run('./output_data/training_set') (weight_vector, total_number_of_misclassifications, number_of_runs) = perceptron_train( feature_vector_list_training, is_spam_list_training, display_intermediate_steps=True) print('Total number of misclassifications: ' + str(total_number_of_misclassifications)) print('Number of runs: ' + str(number_of_runs)) # Verify error on the training set error = perceptron_test(weight_vector, feature_vector_list_training, is_spam_list_training) print('Error on training set: ' + error) # Verify error on the validation set (feature_vector_list_validation,
os.makedirs(output_directory) else: os.makedirs(output_directory) # Run Script for ps1 (Perceptron Implementation) import split_training_data import create_feature_vectors import perceptron # Part 1: Split Training data into training and validation set split_training_data.run() # Part 2: Transform each email in the training set into a feature vector (feature_vector_list_training, is_spam_list_training, vocabulary_list) = create_feature_vectors.run('./output_data/training_set') # Part 3/4: Train the data on the training set and return the last weight vector. Test the percent # error when this weight is run on the validation set print('\n=========================================================================================') print('Problem 4:') (weight_vector, total_number_of_misclassifications, number_of_runs) = perceptron.perceptron_train(feature_vector_list_training, is_spam_list_training) (feature_vector_list_validation, is_spam_list_validation, _) = create_feature_vectors.run('./output_data/validation_set', vocabulary_list) training_set_error = perceptron.perceptron_test( weight_vector, feature_vector_list_training, is_spam_list_training)