Ejemplo n.º 1
0
 def __init__(self, folder):
     self.folder = folder
     self.spams = []
     self.hams = []
     corp = Corpus(folder)
     for fname, content in corp.emails():
         if self.is_ham(fname):
             self.hams.append(Email(fname, content))
         else:
             self.spams.append(Email(fname, content))
Ejemplo n.º 2
0
 def test(self, directory):
     corp = Corpus(directory)
     result = {}
     if self.trained:
         for fname, body in corp.emails():
             SpamSum = 0
             HamSum = 0
             examined = Email(fname, body)
             for spam in self.spams:
                 SpamSum += Email.compare_emails(examined, spam)
             for ham in self.hams:
                 HamSum += Email.compare_emails(examined, ham)
             if SpamSum / len(self.spams) > HamSum / len(self.hams):
                 result[fname] = 'SPAM'
             else:
                 result[fname] = 'OK'
         write_classification_to_file(
             directory + os.path.sep + '!prediction.txt', result)
     else:
         for fname, body in corp.emails():
             counter = 0
             for word in self.blacklist:
                 if word in body:
                     counter += 1
             if counter > 3:
                 result[fname] = 'SPAM'
             else:
                 result[fname] = 'OK'
         write_classification_to_file(
             directory + os.path.sep + '!prediction.txt', result)
Ejemplo n.º 3
0
#coding: utf-8
from my_email import Email

e = Email()
e.send_mail('윤준연', '*****@*****.**')
e.send_mail('윤준연', '*****@*****.**', 'emails.xlsx')
e.send_mail('윤준연', '*****@*****.**', 'd:\emails.xlsx')
Ejemplo n.º 4
0
Archivo: auto.py Proyecto: Celox16/TIL
from my_email import Email
from my_news import News
from my_excel import Excel

m_email = Email()
m_news = News()
m_excel = Excel()

news_list = m_news.find_news('fascampus')

m_email.from_email = '*****@*****.**'
m_email.to_email = '*****@*****.**'
m_email.subject = 'Dear. '

for news in news_list:
    m_email.contents = m_email.contents + news + '\n'

m_email.send_mail()

m_excel.excel_file = 'result.xlsx'
m_excel.save_to_excel(news_list)
Ejemplo n.º 5
0
from my_email import Email


class Corpus:
    def __init__(self, folder):
        self.folder = folder
        self.files = os.listdir(folder)

    def emails(self):
        # ignore some file
        for file in self.files:
            if file[0] == '!':
                continue
            filePath = self.folder + os.path.sep + file
            with open(filePath, 'rt', encoding='utf-8') as f:
                yield file, f.read()


if __name__ == "__main__":
    path = "data\\2"
    corpus = Corpus(path)
    count = 0
    # Go through all emails and print the filename and the message body
    for fname, body in corpus.emails():
        print(fname)
        print(body)
        print('-------------------------')
        count += 1
        email = Email(fname, body)
    print('Finished: ', count, 'files processed.')
Ejemplo n.º 6
0
def detectLanguagesMails(repertory, out, LFRA, LENG, n=1, Type=False):
    file_FRA = open(LFRA, "r")
    learning_FRA = ""
    for line in file_FRA:
        learning_FRA += line
    file_FRA.close()

    file_ENG = open(LENG, "r")
    learning_ENG = ""
    for line in file_ENG:
        learning_ENG += line
    file_ENG.close()

    detector = LangDetectorByNGrams()
    detector.addDocument(learning_FRA, "french", n, Type)
    detector.addDocument(learning_ENG, "english", n, Type)

    SW_detect = LangDectectorStopWords()

    language_mails = dict()
    language_paragraphes = dict()
    language_sentences = dict()
    language_mails["conflicts"] = 0
    globalfile = ""
    nb_mails = 0

    nb_paragraph = 0
    GLOBAL_F = open(out + "/global.txt", 'w')
    os.chdir(repertory)
    for mails in glob.glob("*"):
        name = re.search('(.*)', mails)
        name_file = str(out + "/" + name.group(1) + ".detect")
        print(name_file)
        FILE = open(name_file, 'w')
        e = Email(mails)
        body = e.get_body()
        detectN = detector.detect(body, n, Type)
        if isinstance(detectN, list):
            language = detectN[0][0]
        else:
            language = detectN
        if language not in language_mails:
            language_mails[language] = 0
        language_mails[language] += 1
        globalfile += (name.group(1) + "\t" + language)
        language_by_SW = SW_detect.stopWords_detect(body)
        FILE.write("Le mail \"" + name.group(1) + "\" est globalement en : " +
                   language)
        if language != language_by_SW:
            language_mails["conflicts"] += 1
            FILE.write(" (conflict)")
            globalfile += " (conflict)"
        globalfile += "\n"
        FILE.write("\n" + str(detectN) + "\n")
        FILE.write("Language by Stop Words : " + language_by_SW + "\n")
        FILE.write(
            "\n\n+++++++++++++++++++++++++++++++++++++++++++++++++\n+++++++++++++++++++++++++++++++++++++++++++++++++\n"
        )
        detectN = detector.detect(e.get_subject(), n, Type)
        if isinstance(detectN, list):
            language = detectN[0][0]
        else:
            language = detectN
        FILE.write("Le sujet du mail est en : " + language + "\n")
        FILE.write("Subject : " + e.get_subject() + "\n")
        FILE.write(
            "\n\n+++++++++++++++++++++++++++++++++++++++++++++++++\n+++++++++++++++++++++++++++++++++++++++++++++++++\n"
        )
        i = 0
        for paragraph in cuttingText.getSubsections(body):
            i += 1
            detectN = detector.detect(paragraph, n, Type)
            if isinstance(detectN, list):
                language = detectN[0][0]
            else:
                language = detectN
            if language not in language_paragraphes:
                language_paragraphes[language] = 0
            language_paragraphes[language] += 1
            FILE.write("Le paragraphe " + str(i) + " est en : " + language +
                       "\n\t" + paragraph +
                       "\n\n==========================================\n")
            nb_paragraph += 1
        j = 0
        FILE.write(
            "\n\n+++++++++++++++++++++++++++++++++++++++++++++++++\n+++++++++++++++++++++++++++++++++++++++++++++++++\n"
        )
        for sentence in cuttingText.getSentences(body):
            j += 1
            detectN = detector.detect(sentence, n, Type)
            if isinstance(detectN, list):
                language = detectN[0][0]
            else:
                language = detectN
            if language not in language_sentences:
                language_sentences[language] = 0
            language_sentences[language] += 1
            FILE.write("La phrase " + str(j) + " est en : " + language +
                       "\n\t" + sentence +
                       "\n==========================================\n")
        FILE.close()
        nb_mails += 1
    GLOBAL_F.write("Nombre de mails total : " + str(nb_mails) + "\n")
    for language in language_mails:
        GLOBAL_F.write("\t- en " + language + " : " +
                       str(language_mails[language]) + "\n")
    GLOBAL_F.write("Nombre de paragraphes total : " +
                   str(sum(language_paragraphes.values())) + " (moyenne : " +
                   str(sum(language_paragraphes.values()) / nb_mails) + ")\n")
    for language in language_paragraphes:
        GLOBAL_F.write("\t- en " + language + " : " +
                       str(language_paragraphes[language]) + "\n")
    GLOBAL_F.write("Nombre de phrases total : " +
                   str(sum(language_sentences.values())) + " (moyenne : " +
                   str(sum(language_sentences.values()) / nb_mails) + ")\n")
    for language in language_sentences:
        GLOBAL_F.write("\t- en " + language + " : " +
                       str(language_sentences[language]) + "\n")
    GLOBAL_F.write("\n" + globalfile)
    GLOBAL_F.close()