コード例 #1
0
    def count(cls,
              where: str = "",
              where_args: tuple = None,
              group_by: str = "") -> int:
        """Count the number of scrapable objects present in the database, based on the parameters

        Args:
            where: where clause, with ? placeholders for the where_args. Defaults to "".
            where_args: args used in the where clause. Defaults to None.
            group_by: group by clause. Defaults to "".

        Returns:
            number of scrapable objects
        """
        return DbManager.count_from(table_name=cls().table,
                                    where=where,
                                    where_args=where_args,
                                    group_by=group_by)
コード例 #2
0
    def scrape(cls, delete: bool = False):
        """Scrapes the timetable slots of the provided year and stores them in the database

        Args:
            delete: whether the table contents should be deleted first. Defaults to False.
        """
        timetable_slots = []
        response = requests.get(read_md("aulario")).text
        tables = pd.read_html(response)

        for k, table in enumerate(tables):
            rooms = table.iloc[:, 0]
            schedule = table.iloc[:, 1:]
            subjects = {}
            for time in schedule:
                for i, row in enumerate(table[time]):
                    if time[-1] == "1":
                        time = time[:3] + "30"
                    if not pd.isnull(row):
                        r = row[:20] + rooms[i]
                        if not r in subjects:
                            subjects[r] = cls(nome=row.replace('[]', '').replace('[', '(').replace(']', ')'),
                                              giorno=k,
                                              ora_inizio=time,
                                              ora_fine=time,
                                              aula=rooms[i])
                        else:
                            subjects[r].ora_fine = time
            timetable_slots.extend(subjects.values())

        if delete:
            cls.delete_all()

        offset = DbManager.count_from(table_name=cls().table)  # number of rows already present
        for i, timetable_slot in enumerate(timetable_slots):
            timetable_slot.ID = i + offset  # generate the ID of the timetable slot based on its position in the array
        cls.bulk_save(timetable_slots)
        logger.info("Aulario loaded.")