def count(cls, where: str = "", where_args: tuple = None, group_by: str = "") -> int: """Count the number of scrapable objects present in the database, based on the parameters Args: where: where clause, with ? placeholders for the where_args. Defaults to "". where_args: args used in the where clause. Defaults to None. group_by: group by clause. Defaults to "". Returns: number of scrapable objects """ return DbManager.count_from(table_name=cls().table, where=where, where_args=where_args, group_by=group_by)
def scrape(cls, delete: bool = False): """Scrapes the timetable slots of the provided year and stores them in the database Args: delete: whether the table contents should be deleted first. Defaults to False. """ timetable_slots = [] response = requests.get(read_md("aulario")).text tables = pd.read_html(response) for k, table in enumerate(tables): rooms = table.iloc[:, 0] schedule = table.iloc[:, 1:] subjects = {} for time in schedule: for i, row in enumerate(table[time]): if time[-1] == "1": time = time[:3] + "30" if not pd.isnull(row): r = row[:20] + rooms[i] if not r in subjects: subjects[r] = cls(nome=row.replace('[]', '').replace('[', '(').replace(']', ')'), giorno=k, ora_inizio=time, ora_fine=time, aula=rooms[i]) else: subjects[r].ora_fine = time timetable_slots.extend(subjects.values()) if delete: cls.delete_all() offset = DbManager.count_from(table_name=cls().table) # number of rows already present for i, timetable_slot in enumerate(timetable_slots): timetable_slot.ID = i + offset # generate the ID of the timetable slot based on its position in the array cls.bulk_save(timetable_slots) logger.info("Aulario loaded.")