Esempio n. 1
0
    def parse(self, response):

        all_row = response.css(".empty_html tr")

        for i in range(1, len(all_row)):
            row = all_row[i]
            course_loader = ItemLoader(item=CourseItem(), selector=row)

            # https://aisap.nutc.edu.tw/public/day/course_list.aspx?sem=1081&clsno=1120170121&_p=2 -> 1120170121
            class_id = re.search(r'clsno=[\w\d]*', response.url)[0][6:]

            course_loader.replace_css('number', 'td:nth-child(2)::text')
            course_loader.replace_value('class_id', class_id)
            course_loader.replace_css('name', 'td:nth-child(4)::text, td:nth-child(4) > strong::text')
            course_loader.replace_css('time', 'td:nth-child(6)::text')
            course_loader.replace_css('location', 'td:nth-child(6)::text')
            course_loader.replace_css('compulsory', 'td:nth-child(7)::text')
            course_loader.replace_css('credit', 'td:nth-child(8)::text')
            course_loader.replace_css('popular', 'td:nth-child(9) > strong::text')
            course_loader.replace_css('teacher_name', 'td:nth-child(10)::text')
            course_loader.replace_css('popular_limit', 'td:nth-child(11)::text')

            yield course_loader.load_item()

        next_page = response.css('.page > b:last-child > a::attr(href)').get()

        if next_page:
            yield response.follow(next_page)