-
Notifications
You must be signed in to change notification settings - Fork 0
/
Data.py
55 lines (42 loc) · 1.56 KB
/
Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import re
from typing import List, Dict
from Mail import *
class DataPart:
hams: List[Mail]
spams: List[Mail]
def __init__(self):
self.spams = []
self.hams = []
def __str__(self):
return "hams: " + str(self.hams) + \
"spams: " + str(self.spams)
class Data:
parts: Dict[str, DataPart]
def __init__(self):
self.parts = dict()
def read_mail(self, path: str):
subject: str = ""
body: str = ""
with open(path, encoding="utf8") as file:
subject = file.readline().replace("Subject: ", "")
file.readline()
body = file.readline()
subject_words = subject.replace("\n", "").split(" ")
body_words = body.replace("\n", "").split(" ")
subject_words = map(int, subject_words) if (subject_words[0] != "") else [0]
body_words = map(int, body_words) if (body_words[0] != "") else [0]
return Mail(path, subject_words, body_words)
def read_data(self, root_data_path: str):
folders = os.listdir(root_data_path)
for folder in folders:
part = DataPart()
with os.scandir(root_data_path + "/" + folder) as files:
for file in files:
file_path = root_data_path + "/" + folder + "/" + file.name
mail = self.read_mail(file_path)
if("legit" in file.name):
part.hams.append(mail)
else:
part.spams.append(mail)
self.parts[folder] = part