/
processing_input.py
176 lines (147 loc) · 9.1 KB
/
processing_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import datetime
import re
from Schedule import Schedule
def research(regular, string):
"""Function to simplify regular expression search"""
if re.search(re.compile(regular), string):
return re.search(re.compile(regular, re.I | re.U), string).group()
else:
print("There is no any {} pattern in string:\n{}".format(regular, string))
def process_header(header):
"""HEADER EXAMPLE"""
"""
13.03.2018
ФІТ
Розклад занять на ІІ семестр 2017-2018 н.р.
Напр.(спец.) МІТ
Група ІР
=========================================================
|Тиждень| ПН | ВТ | СР | ЧТ | ПТ | СБ |
=========================================================
| 26.02 |.......|.......|.......|лЛл....|.лЛл...|.......|
| 05.03 |ПППП...|.ЛЛПл..|.ЛПл...|лЛл....|.ЛЛ....|.......|
| 12.03 |ПППП...|.ПЛПл..|лПЛл...|лЛл....|..лл...|.......|
| 19.03 |ПППП...|.ПЛПл..|лПЛл...|лЛл....|.ЛЛ....|.......|
| 26.03 |ПППП...|.ПЛПл..|лПЛл...|лЛл....|..лл...|.......|
| 02.04 |ПППП...|.ПЛП...|лПЛл...|лЛл....|..лл...|.......|
| 09.04 |ПППП...|.ПЛПл..|лПЛл...|лЛл....|лллл...|.......|
| 16.04 |ПППП...|.ПЛПл..|лПЛл...|лЛл....|..лл...|.......|
| 23.04 |ПППП...|.ПЛЛ...|лПЛл...|лЛл....|лллл...|.......|
| 30.04 |ПППП...|.ПЛПл..|лПЛл...|ЛПП....|..лл...|.......|
| 07.05 |ППП....|.ПППл..|.ППл...|ЛПП....|лллл...|.......|
| 14.05 |ППП....|.ПП....|.ПП....|.ПП....|..лл...|.......|
| 21.05 |ППП....|.ППл...|.ПП....|.ПП....|лллл...|.......|
| 28.05 |ППП....|.ППлл..|лПП....|.......|лллл...|.......|
| 04.06 |ППП....|..Плл..|.......|.......|лллл...|.......|
| 11.06 |.......|.......|.......|.......|.......|.......|
| 18.06 |.......|.......|.......|.......|.......|.......|
"""
group = research(r'(?<=Група )\w{2,3}(-\d{2})*', header) # (Група ){www-dd}
year = research(r'\d{4}-\d{4} н.р.', header) # {dddd-dddd} н.р.
semester = 1 if research(r'\w+(?= семестр)', header) is 'І' or 'осінній' else 2 # I/II( семестр) new: осінній
first_monday = research(r'\d{2}.\d{2} ', header)[:-1] # dd.dd
# picking first or second number from year variable depending on semester number + day&month from first table cell
start_date = datetime.date(int(year[0:4]) if semester == 1 else int(year[5:9]), # year
int(first_monday[3:]), int(first_monday[:2])) # month, day
print('\nGroup: {}\nYear: {}\nSemester: {}\nFirst week date: {}'
.format(group, year, semester, start_date))
return group, year, semester, start_date
def subgroup_number(study_course):
if re.search(re.compile(r'\d'), study_course):
subgroup = research(r'\d', study_course)
study_course = re.sub(re.compile(r'(?<!\d|\()\s*\d\s*(?!\d|\))'), '', study_course)
study_course = re.sub(re.compile(r'(?<!\d)\s*-\s*(?!\d)'), '', study_course)
study_course = re.sub(re.compile(r'підгр\.?|підгрупа|група'), '', study_course)
return subgroup, study_course
return None, study_course
def room_date(course, year):
result = []
courses = re.split(re.compile('\|'), course)[1:]
for rm in courses:
room = re.findall(re.compile(r'ауд\.\d{3}'), rm)[0]
room_dates = re.findall(re.compile(r'(?<=\().{5,11}(?=\))'), rm)
print(room, room_dates)
for dt in room_dates:
if len(dt) == 5:
date = datetime.date(year, int(dt[3:]), int(dt[:2]))
result.append((room, date))
else:
first_date = datetime.date(year, int(dt[3:5]), int(dt[:2]))
last_date = datetime.date(year, int(dt[9:]), int(dt[6:8]))
shift = datetime.timedelta(days=7)
while first_date <= last_date:
result.append((room, first_date))
first_date += shift
return result
def process_lessons(schedule, day_desc_list):
"""The function analyzes data about periods and passes it to class Schedule"""
"""LESSONS SECTION EXAMPLE (1st list element of Schedule object)"""
"""
Понеділок
1 пара - 9:00
* Іноземна мова (П) [ас. Бабаніна]
|ауд.218 (05.03-14.05)|ауд.212 (21.05-04.06)
2 пара - 10:30
* Іноземна мова (П) [ас. Бабаніна]
|ауд.218 (05.03)|ауд.213 (12.03-30.04)|ауд.218 (07.05)|ауд.213 (14.05)|
|ауд.205 (21.05)|ауд.204 (28.05-04.06)
3 пара - 12:10
* Іноземна мова (П) [ас. Бабаніна]
|ауд.204 (05.03-26.03)|ауд.218 (02.04-23.04)|ауд.316 (30.04)|
|ауд.405 (07.05-14.05)|ауд.218 (21.05)|ауд.204 (28.05-04.06)
4 пара - 13:40
* Іноземна мова (П) [ас. Бабаніна]
|ауд.204 (05.03-23.04)|ауд.316 (30.04)
"""
year = schedule.start.year
print('\nProcessing data...')
""" Loop through DAYS of week ==================================================================================="""
for i in range(len(day_desc_list)):
# Splitting header with "\n" symbol followed by "1 пара"-like pattern
lessons_of_the_day = re.split(r'\n(?=\d пара)', day_desc_list[i])
# structure to iter through [day, num+courses_list, num+courses_list, ..., num+courses_list]
""" Loop through LESSONS ------------------------------------------------------------------------------------"""
for j in range(1, len(lessons_of_the_day)):
# Splitting lessons list with "\n* " pattern (* is followed by course name, example: "* Іноземна мова...")
courses_at_lesson = re.split('\n\* ', lessons_of_the_day[j])
# structure to iter through [lesson_num, course1, course2, .. ]
# Information about NUMBER of lesson has INDEX 0!!! (a digit followed by a space and a word with 1+ letter)
lesson_number = int(research(r'\d(?= \w+)', courses_at_lesson[0]))
""" Loop through COURSES ********************************************************************************"""
for crs in range(1, len(courses_at_lesson)):
# Getting the NAME OF THE COURSE (symbol sequence followed by "(\w)", example: "Іноземна мова (П)")
study_course = research(r'.+(?=\(\w\))', courses_at_lesson[crs])
# Excluding speciality from the name of the course (example: "Теорія алгоритмів (МІТ)" => "Теор..тмів ")
study_course = re.sub(re.compile(r'\(.+\)'), '', study_course)
subgroup = None
# If a SUBGROUP number is present, it is assigned to the variable and excluded from the name
# Examples: "Теорія алгоритмів 1", "Електротехніка та електроніка2", "Технології програмування –2 підгр"
if re.search(re.compile(r'\d'), study_course):
subgroup = research(r'\d', study_course)
study_course = re.sub(re.compile(r'\s*\d\s*'), '', study_course)
study_course = re.sub(re.compile(r'\s*-|–\s*'), '', study_course)
study_course = re.sub(re.compile(r'підгр\.?|підгрупа|група'), '', study_course)
# Results example: "Теорія алгоритмів", "Електротехніка та електроніка", "Технології програмування"
lesson_type = research(r'(?<=\()\w(?=\))', courses_at_lesson[crs]) # (w)
teacher = research(r'(?<=\[).+(?=\])', courses_at_lesson[crs]) # [name]
# Getting ROOM numbers and DATES
room_date_rel = room_date(courses_at_lesson[crs], year)
for rm_dt in room_date_rel:
schedule.add_lesson(study_course, rm_dt[0], lesson_type, lesson_number, teacher, rm_dt[1], subgroup)
return schedule
def process_data(data):
"""The function processes text from the file and returns a list that is used to create a spreadsheet"""
# Splitting schedule into sections (0 - general info, 1-5 - days) by "----...----" pattern (2+ dashes in line)
divided_schedule = re.split('-{2,}\n*', data)
schedule_info = process_header(divided_schedule[0])
scd = Schedule(schedule_info)
# processing info about classes and passing it to Schedule
scd = process_lessons(scd, divided_schedule[1:])
return scd
if __name__ == "__main__":
lesson1 = """Теорія алгоритмів(л) [проф. Білощицький]
|ауд.309 (22.05)|ауд.305 (29.05-05.06)"""
lesson2 = """Теорія систем та системний аналіз (П) [проф. Степанов М.М.]
|ауд.104 (18.10) (01.11) (15.11) (29.11-06.12)"""
print(lesson2)
print(room_date(lesson2, 2019))