예제 #1
0
def predict_email_spam(_filename):
    _file_contents = open('../data/' + _filename).read()
    _word_indices = process_email(_file_contents)
    _features = email_features(_word_indices)
    _p = Classification.predict(_features.T)
    print('\nProcessed %s\n\nSpam Classification: %d\n' % (_filename, _p[0]))
    print('(1 indicates spam, 0 indicates not spam)\n\n')
예제 #2
0
    print('\nProcessed %s\n\nSpam Classification: %d\n' % (_filename, _p[0]))
    print('(1 indicates spam, 0 indicates not spam)\n\n')


if __name__ == '__main__':
    # ==================== Part 1: Email Preprocessing ====================
    # To use an SVM to classify emails into Spam v.s. Non-Spam, you first need
    # to convert each email into a vector of features. In this part, you will
    # implement the preprocessing steps for each email. You should
    # complete the code in processEmail.m to produce a word indices vector
    # for a given email.

    print('\nPreprocessing sample email (emailSample1.txt)\n')
    # Extract Features
    file_contents = open('../data/emailSample1.txt').read()
    word_indices = process_email(file_contents)
    # Print Stats
    print('Word Indices:\n')
    print_index = 0
    for print_value in word_indices:
        print_index += 1
        print("%4d" % print_value, end=' ')
        if print_index % 10 == 0:
            print('\n')
    print('\n')
    print('Program paused. Press enter to continue.\n')
    # pause_func()

    # ==================== Part 2: Feature Extraction ====================
    # Now, you will convert each email into a vector of features in R^n.
    # You should complete the code in emailFeatures.m to produce a feature
예제 #3
0
from processEmail import process_email
from vocabulary import map_words

# Pre-process email
words = process_email(
    "<html>HELLO world pythoner! It's $100, how are \n you're \t doing? http://ya.ru</html>"
)

# For each word in the email find it's index in the vocabulary
words_indices = map_words(words)

# Train classifier
# todo

# Predict
# todo

# Analyze
# todo
def classify_email(filename):
    x = email_features(process_email(read_file(filename), 'vocab.txt')).reshape(1, -1)
    pred = model.predict(x)
    print('\nProcessed {}\n\nSpam Classification: {}\n'.format(filename, pred))
    print('(1 indicates spam, 0 indicates not spam)\n\n')
def pause():
    input("")


"""## Part 1: Email Pre-processing
  To use an SVM to classify emails into Spam v.s. Non-Spam, you first need
  to convert each email into a vector of features. In this part, you will
  implement the pre-processing steps for each email. You should
  complete the code in processEmail.py to produce a word indices vector
  for a given email."""

print('\nPre-processing sample email (emailSample1.txt)\n')

# Extract Features
file_contents = read_file('emailSample1.txt')
word_indices = process_email(file_contents, 'vocab.txt')

# Print Stats
print('Word Indices: \n')
print(word_indices)
print('\n\n')

print('Program paused. Press enter to continue.\n')
pause()
"""
## Part 2: Feature Extraction
  Convert each email into a vector of features in R^n.
"""

print('\nExtracting features from sample email (emailSample1.txt)\n')
def classify_email(filename):
    x = email_features(process_email(read_file(filename),
                                     'vocab.txt')).reshape(1, -1)
    pred = model.predict(x)
    print('\nProcessed {}\n\nSpam Classification: {}\n'.format(filename, pred))
    print('(1 indicates spam, 0 indicates not spam)\n\n')
def pause():
    input("")


"""## Part 1: Email Pre-processing
  To use an SVM to classify emails into Spam v.s. Non-Spam, you first need
  to convert each email into a vector of features. In this part, you will
  implement the pre-processing steps for each email. You should
  complete the code in processEmail.py to produce a word indices vector
  for a given email."""

print('\nPre-processing sample email (emailSample1.txt)\n')

# Extract Features
file_contents = read_file('emailSample1.txt')
word_indices = process_email(file_contents, 'vocab.txt')

# Print Stats
print('Word Indices: \n')
print(word_indices)
print('\n\n')

print('Program paused. Press enter to continue.\n')
pause()
"""
## Part 2: Feature Extraction
  Convert each email into a vector of features in R^n.
"""

print('\nExtracting features from sample email (emailSample1.txt)\n')