Beispiel #1
0
    vectorizer = joblib.load(path + '\\model\\tf_idf.sav')

    #setting up log files
    dt = datetime.datetime.now()
    log_file = path + '\\logs\\' + str(dt.year) + str(dt.month) + str(
        dt.day) + '-' + str(randint(0, 10000)) + '.txt'
    logger = logging.getLogger(log_file)
    logger.setLevel(logging.INFO)
    logging.info('Starting...')

    #fetching email contents
    #storing email subject along with body
    #this is to classify those emails which dont have text but
    #just an image embedded in the body
    for uid in data:
        dict = mail.parse_email(uid)
        content = [dict['Subject'] + '\n' + dict['Body']]

        #extracting features
        #getting predicition from classifier
        features = vectorizer.transform(content)
        pred = clf.predict(features)

        #moving to spam folder (if predicted as spam)
        if pred[0] == 1:
            mov = mail.move_email(data[1], 'Inbox', '[Gmail]/Spam')
            if mov == 'OK':
                message = 'Following email moved to spam:\nuid: ' + str(
                    uid
                ) + '\nSubject: ' + str(dict['Subject']) + '\nContent: ' + str(
                    dict['Body']) + '\n----------------------------------'