Ejemplo n.º 1
0
    def parse_courses(self, term, page):
    	# Get rows from the table
        rows = page.get_element_by_id('GridView1').xpath('tbody')[0]
        skip = True
        added = 0
        updated = 0
        # Parse all rows
        for row in rows:
            # Skip the first row (titles)
            if skip is True:
                skip = False
                continue

            # Parse elements
            crn = int(row.getchildren()[0].text.strip())
            course = row.getchildren()[1].xpath('a')[0].text.strip()
            course_link = row.getchildren()[1].xpath('a')[0].attrib['href']
            section = row.getchildren()[2].text.strip()
            title = row.getchildren()[3].xpath('a')[0].text.strip()
            bookstore_link = row.getchildren()[3].xpath('a')[0].attrib['href']
            hours = row.getchildren()[4].text.strip()
            attrstring = row.getchildren()[5].xpath('span')[0].text
            attributes = attrstring.strip() if attrstring else ''
            ctype = row.getchildren()[6].text.strip()
            meeting_times = []

            # Possibility of having multiple meeting times
            days_list = list(row.getchildren()[7].itertext())
            times_list = list(row.getchildren()[8].itertext())
            for i, days in enumerate(days_list):
                days = days.strip()
                # These don't have a meeting time at all
                if len(days) == 0:
                    continue
                time = MeetingTime(days=days)
                # Not all meeting times have a specific start/end time
                if len(times_list) >= i:
                    timestring = times_list[i].strip()
                    if len(timestring) > 0:
                        start_time = timestring.split('-')[0]
                        if len(start_time) == 3:
                            start_time = '0' + start_time
                        end_time = timestring.split('-')[1]
                        if len(end_time) == 3:
                            end_time = '0' + end_time
                        start_time = datetime.datetime.strptime(start_time, '%H%M').time()
                        end_time = datetime.datetime.strptime(end_time, '%H%M').time()
                        time.start_time = start_time
                        time.end_time = end_time
                        # Add it to the database
                        try:
                            obj = MeetingTime.objects.get(days=days, start_time=time.start_time, end_time=time.end_time)
                            time = obj
                        except MeetingTime.DoesNotExist:
                            time.save()
                        meeting_times.append(time)

            location = row.getchildren()[9].text.strip()
            if location == 'ARR':
                location = None

            instructor = row.getchildren()[10].text.strip()

            # Parse the instructor
            if instructor and len(instructor) > 0:
                instructor = self.parse_instructor(instructor)
            else:
                instructor = None

            seats = int(row.getchildren()[11].xpath('span')[0].text.strip())
            statusstring = row.getchildren()[12].xpath('span')[0].text.strip()
            status = 1 if statusstring == 'Open' else 0 if statusstring == 'Closed' else -1

            # Create the course
            course = Course(term=term, crn=crn, course=course, course_link=course_link, section=section, title=title, bookstore_link=bookstore_link, hours=hours, attributes=attributes, ctype=ctype, location=location, instructor=instructor, seats=seats, status=status)

            # Add it to the database
            try:
                obj = Course.objects.get(term=term, crn=crn)
                if not course.instructor:
                    course.instructor = obj.instructor
                opts = obj._meta
                changed = False
                for f in opts.fields:
                    if f.name not in ['id', 'meeting_times']:
                        old_attr = getattr(obj, f.name)
                        new_attr = getattr(course, f.name)
                        if old_attr != new_attr:
                            logger.debug('Changed value ' + f.name + ': ' + str(old_attr) + ' -> ' + str(new_attr))
                            changed = True
                            setattr(obj, f.name, new_attr)
                if len([item for item in obj.meeting_times.all() if item not in meeting_times]) > 0:
                    logger.debug('Changed meeting times ' + str(obj.meeting_times.all()) + ' -> ' + str(meeting_times))
                    changed = True
                    obj.meeting_times = meeting_times
                if changed:
                    logger.debug('Course listed as changed: /' + str(obj.term.value) + '/' + str(obj.crn))
                    updated += 1
                    obj.save()
            except Course.DoesNotExist:
                course.save()
                course.meeting_times = meeting_times
                added += 1

        self.stdout.write('-> Parsed ' + str(len(rows) - 1) + ' courses. ' + str(added) + ' added, ' + str(updated) + ' updated.')
        self.total_parsed += len(rows) - 1
        self.total_added += added
        self.total_updated += updated