Esempio n. 1
0
    def _get_text(tag: bs4.element.Tag) -> str:
        """ Get pretty text from example and remove
        from there duplicate spaces.

        Here it is assumed, that all examples have text.
        """
        # using 'findall' method removes punctuation marks
        txt = tag.get_text()
        # remove duplicate spaces
        return clean_text_up(txt)
Esempio n. 2
0
def is_content_entry(tag: bs4.element.Tag) -> bool:
    # children = [
    #         c for c in tag.children
    #         if not (isinstance(c, str) and re.match(r' +', c))
    #         ]
    # children = list(tag.children)
    # return (
    #         tag.name == 'span'
    #         and 'W' in tag.attrs.['class']
    #         and len(children) >= 2
    #         and isinstance(children[-1], bs4.element.Tag)
    #         and all(isinstance(elem, str) for elem in children[:-1])
    #         and children[-1].name == 'a'
    #         )
    return (isinstance(tag, bs4.element.Tag) and tag.name == 'span'
            and all(not cls.startswith('bg') for cls in tag.attrs['class'])
            and any((c in tag.attrs['class']) for c in ['W', 'C'])
            and not re.fullmatch(' *', tag.get_text()))
Esempio n. 3
0
 async def get_lessons_from_soup(self, element: bs4.element.Tag,
                                 groups_list: List[str]):
     try:
         title = element['title']
     except KeyError:
         return
     title_list = title.split(" ")
     try:
         group = str(re.findall(r'\?.+?\?', title)[0]).replace("?", '')
     except IndexError:
         return
     group_db = await select_group(group)
     if not group or not group_db:
         return
     try:
         quantity = int([s for s in title_list
                         if "||" in s][0].split("||")[1])
     except IndexError:
         return
     quantity = 1 if quantity == 0 else quantity
     date_num = [s for s in title_list if "~" in s][0]
     date = date_num.split("~")[0]
     lesson_num = int(date_num.split("~")[1])
     month = int(date.split(".")[1].lstrip("0"))
     day = int(date.split(".")[0].lstrip("0"))
     day_week = Week[datetime(datetime.now().year, month,
                              day).strftime('%A').lower()]
     teacher_id = await self.is_prepod_in_db(element.get_text())
     if teacher_id:
         text = "".join(element.get_text().replace(
             "\t", "").strip().split("\n")[:-1])
     else:
         text = " ".join(element.get_text().split())
     if not text:
         return
     text = " ".join(text.split())
     subgroup: int = 0
     lesson_kind: LessonKind = LessonKind.lec
     colspan = int(element['colspan'])
     if colspan > group_db.subgroups:
         i = 0
         j = 0
         quantity = 0
         while i <= colspan:
             idx = groups_list.index(group) + j
             if idx >= len(groups_list):
                 break
             gr = groups_list[idx]
             gr_info = await select_group(gr)
             i += gr_info.subgroups
             if i <= colspan:
                 quantity += 1
             j += 1
     elif 0 < colspan < group_db.subgroups:
         lesson_kind = LessonKind.lab
         subgroup = 1 if 'pr' in title else 2
     else:
         lesson_kind = LessonKind.lec if 'l1' in title else LessonKind.prac
     if element['rowspan'] == '#':
         week = UnderAboveWeek.under if 'tp' in title else UnderAboveWeek.above
     else:
         week = UnderAboveWeek.all
     group_idx = groups_list.index(group)
     result: List[List] = []
     for i in range(group_idx, group_idx + quantity):
         result.append([
             day_week, lesson_num, week, groups_list[i], subgroup, text,
             lesson_kind, teacher_id
         ])
     return result