Example #1
0
    def _extract_table(self, html):
        """parse html page and process the calendar table
        intermediate python structure (event list) is returned"""
        soup = BeautifulSoup(html)
        tables = soup.html.body.findAll(name="table", recursive=False)
        # jump to the calendar table
        cal = tables[1]
        lines = cal.findAll(name="tr", recursive=False)
        # isolate first tab line with hours
        hours_line = lines[0].findChildren(name="td", recursive=False)[1:]
        hours = insert_halfhour_slots_and_convert_to_datetime(hours_line)
        # process all lines
        # search the number of row for that day
        n_rows = []
        for (no_line, line) in enumerate(lines[1:]):
            slots = line.findAll(name="td", recursive=False)
            # search the number of row for that day
            if slots[0].has_key("rowspan"):
                n_rows.append(int(slots[0]["rowspan"]))
            else:
                n_rows.append(0)
        event_list = []
        day = -1
        n = 0
        for (no_line, line) in enumerate(lines[1:]):
            if not n:
                n = n_rows[no_line]
                day += 1
                current_time = -1
            else:
                current_time = 0
            n -= 1
            slots = line.findAll(name="td", recursive=False)
            for s in slots:
                cell = s.findAll(name="table", recursive=False)
                # event found
                if len(cell) > 1:
                    event_data = {"day": day, "start_time": hours[current_time], "duration": int(s["colspan"])}
                    # duration in hours is extract from the colspan
                    # compute end time (1 colspan=1/2 hour)
                    delta = timedelta(hours=event_data["duration"] / 2)
                    event_data["stop_time"] = hours[current_time] + delta
                    td = cell[0].tr.findAll(name="td", recursive=False)
                    # Gehol weeks when the event occurs
                    event_data["weeks"] = split_weeks(td[0].contents[0].string)
                    # location
                    event_data["location"] = td[1].contents[0].string
                    if not event_data["location"]:
                        event_data["location"] = ""
                    # activity
                    event_data["type"] = cell[1].tr.td.contents[0].string
                    current_time = current_time + event_data["duration"]
                    event_data["organizer"] = ""
                    event_data["title"] = "%s - %s" % (self.metadata["mnemo"], self.metadata["title"])

                    course_event = CourseEvent(**event_data)
                    event_list.append(course_event)
                else:
                    current_time += 1
        return event_list
    def _load_events(self, event_table):
        all_rows = event_table.findChildren("tr", recursive=False)

        # get the column labels, save as actual hours objects
        hours_row = all_rows[0].findChildren("td", recursive=False)
        hours = insert_halfhour_slots_and_convert_to_datetime(hours_row[1:])

        # get the events for each day
        event_rows = all_rows[1:]
        self.events = []

        rows_per_day = self._get_num_row_per_day(event_rows)
        current_row_index = 0

        for (num_day, day_string, num_rows) in rows_per_day:
            day_events = []
            for day_subrow in range(num_rows):
                current_day_index = current_row_index + day_subrow
                events_in_row = self._load_weekday_events(event_rows[current_day_index], num_day, hours)
                day_events.extend(events_in_row)
            self.events.extend(day_events)
            current_row_index += num_rows