Example #1
0
    def _extract_table(self, html):
        """parse html page and process the calendar table
        intermediate python structure (event list) is returned"""
        soup = BeautifulSoup(html)
        tables = soup.html.body.findAll(name="table", recursive=False)
        # jump to the calendar table
        cal = tables[1]
        lines = cal.findAll(name="tr", recursive=False)
        # isolate first tab line with hours
        hours_line = lines[0].findChildren(name="td", recursive=False)[1:]
        hours = insert_halfhour_slots_and_convert_to_datetime(hours_line)
        # process all lines
        # search the number of row for that day
        n_rows = []
        for (no_line, line) in enumerate(lines[1:]):
            slots = line.findAll(name="td", recursive=False)
            # search the number of row for that day
            if slots[0].has_key("rowspan"):
                n_rows.append(int(slots[0]["rowspan"]))
            else:
                n_rows.append(0)
        event_list = []
        day = -1
        n = 0
        for (no_line, line) in enumerate(lines[1:]):
            if not n:
                n = n_rows[no_line]
                day += 1
                current_time = -1
            else:
                current_time = 0
            n -= 1
            slots = line.findAll(name="td", recursive=False)
            for s in slots:
                cell = s.findAll(name="table", recursive=False)
                # event found
                if len(cell) > 1:
                    event_data = {"day": day, "start_time": hours[current_time], "duration": int(s["colspan"])}
                    # duration in hours is extract from the colspan
                    # compute end time (1 colspan=1/2 hour)
                    delta = timedelta(hours=event_data["duration"] / 2)
                    event_data["stop_time"] = hours[current_time] + delta
                    td = cell[0].tr.findAll(name="td", recursive=False)
                    # Gehol weeks when the event occurs
                    event_data["weeks"] = split_weeks(td[0].contents[0].string)
                    # location
                    event_data["location"] = td[1].contents[0].string
                    if not event_data["location"]:
                        event_data["location"] = ""
                    # activity
                    event_data["type"] = cell[1].tr.td.contents[0].string
                    current_time = current_time + event_data["duration"]
                    event_data["organizer"] = ""
                    event_data["title"] = "%s - %s" % (self.metadata["mnemo"], self.metadata["title"])

                    course_event = CourseEvent(**event_data)
                    event_list.append(course_event)
                else:
                    current_time += 1
        return event_list
    def _process_event(self, object_cell, starting_hour, num_day):
        num_timeslots = int(object_cell["colspan"])
        cell_tables = object_cell.findChildren("table", recursive=False)
        # event box : 3 tables, one per line :
        #   - location/weeks
        #   - title
        #   - tutor/course type
        location_weeks_table, title_table, tutor_type_table = cell_tables

        location = location_weeks_table.tr.findChildren("td")[0].text
        course_weeks = location_weeks_table.tr.findChildren("td")[1].text

        course_title = title_table.tr.td.text

        children = tutor_type_table.findChildren("td")
        course_tutor = children[0].text
        course_group = children[1].text
        course_type = children[2].text

        return {
            "type": course_type,
            "location": location,
            "organizer": "",
            "title": course_title,
            "lecturer": course_tutor,
            "group": course_group,
            "weeks": split_weeks(course_weeks),
            "num_timeslots": num_timeslots,
            "start_time": starting_hour,
            "stop_time": starting_hour + timedelta(hours=self._convert_num_timeslots_to_hours(num_timeslots)),
            "day": num_day,
        }
Example #3
0
    def _process_event(self, object_cell, starting_hour, num_day):
        num_timeslots = int(object_cell['colspan'])
        cell_tables = object_cell.findChildren('table', recursive=False)
        # event box : 3 tables, one per line :
        #   - location/weeks
        #   - title
        #   - tutor/course type
        location_weeks_table, title_table, tutor_type_table = cell_tables

        location = location_weeks_table.tr.findChildren('td')[0].text
        course_weeks = location_weeks_table.tr.findChildren('td')[1].text

        course_title = title_table.tr.td.text

        children = tutor_type_table.findChildren('td')
        course_tutor = children[0].text
        course_group = children[1].text
        course_type = children[2].text

        return {
            'type':course_type,
            'location':location,
            'organizer':"",
            'title':course_title,
            'lecturer':course_tutor,
            'group':course_group,
            'weeks':split_weeks(course_weeks),
            'num_timeslots':num_timeslots,
            'start_time':starting_hour,
            'stop_time':starting_hour + timedelta(hours = self._convert_num_timeslots_to_hours(num_timeslots)),
            'day':num_day
        }
Example #4
0
    def _process_event(self, object_cell, starting_hour, num_day):
        num_timeslots = int(object_cell['colspan'])
        cell_tables = object_cell.findChildren('table', recursive=False)
        # event box : 3 tables, one per line :
        #   - weeks/location/type
        #   - mnemo
        #   - title
        first, second, third = cell_tables


        course_weeks = first.tr.findChildren('td')[0].text
        location = first.tr.findChildren('td')[1].text
        course_type = first.tr.findChildren('td')[2].text

        course_mnemo = second.tr.td.text

        course_title = third.tr.td.text
        
        return {
            'type':course_type,
            'location':location,
            'organizer':"",
            'title':course_title,
            'mnemo':course_mnemo,
            'weeks':split_weeks(course_weeks),
            'num_timeslots':num_timeslots,
            'start_time':starting_hour,
            'stop_time':starting_hour + timedelta(hours = self._convert_num_timeslots_to_hours(num_timeslots)),
            'day':num_day
        }
Example #5
0
    def _extract_table(self, html):
        '''parse html page and process the calendar table
        intermediate python structure (event list) is returned'''
        soup = BeautifulSoup(html)
        tables = soup.html.body.findAll(name='table',recursive=False)
        #jump to the calendar table
        cal = tables[1]
        lines = cal.findAll(name='tr',recursive=False)
        #isolate first tab line with hours
        hours_line = lines[0].findAll(name='td',recursive=False)
        hours = []
        for h in hours_line[1:]:
            if h.string:
                hours.append(convert_time(h.string))
            else:
                last_added_hour = hours[-1]
                hours.append(datetime(last_added_hour.year,
                                           last_added_hour.month,
                                           last_added_hour.day,
                                           last_added_hour.hour, 30))
        #process all lines
        #search the number of row for that day
        n_rows = []
        for (no_line,line) in enumerate(lines[1:]):
            slots = line.findAll(name='td',recursive=False)
            #search the number of row for that day
            if slots[0].has_key('rowspan'):
                n_rows.append(int(slots[0]['rowspan']))
            else:
                n_rows.append(0)
        event_list = []
        day = -1
        n = 0
        for (no_line,line) in enumerate(lines[1:]):
            if not n:
                n = n_rows[no_line]
                day += 1
                current_time = -1
            else:
                current_time = 0
            n -= 1
            slots = line.findAll(name='td',recursive=False)
            for s in slots:
                cell = s.findAll(name='table',recursive=False)
                # event found
                if len(cell)>1:
                    event_data = { 'day': day,
                                    'start_time': hours[current_time],
                                    'duration': int(s['colspan'])
                    }
                    #duration in hours is extract from the colspan
                    #compute end time (1 colspan=1/2 hour)
                    delta = timedelta(hours=event_data['duration']/2)
                    event_data['stop_time'] = hours[current_time]+delta
                    td = cell[0].tr.findAll(name='td',recursive=False)
                    # Gehol weeks when the event occurs
                    event_data['weeks'] = split_weeks(td[0].contents[0].string)
                    # location
                    event_data['location'] = td[1].contents[0].string
                    if not event_data['location']:
                        event_data['location'] = ''
                    # activity
                    event_data['type'] = cell[1].tr.td.contents[0].string
                    current_time = current_time + event_data['duration']
                    event_data['organizer'] = ""
                    event_data['title'] = "%s - %s" % (self.metadata['mnemo'], self.metadata['title'])

                    course_event = CourseEvent(**event_data)
                    event_list.append(course_event)
                else:
                    current_time += 1
        return event_list