def parse(self, response): all_row = response.css(".empty_html tr") for i in range(1, len(all_row)): row = all_row[i] course_loader = ItemLoader(item=CourseItem(), selector=row) # https://aisap.nutc.edu.tw/public/day/course_list.aspx?sem=1081&clsno=1120170121&_p=2 -> 1120170121 class_id = re.search(r'clsno=[\w\d]*', response.url)[0][6:] course_loader.replace_css('number', 'td:nth-child(2)::text') course_loader.replace_value('class_id', class_id) course_loader.replace_css('name', 'td:nth-child(4)::text, td:nth-child(4) > strong::text') course_loader.replace_css('time', 'td:nth-child(6)::text') course_loader.replace_css('location', 'td:nth-child(6)::text') course_loader.replace_css('compulsory', 'td:nth-child(7)::text') course_loader.replace_css('credit', 'td:nth-child(8)::text') course_loader.replace_css('popular', 'td:nth-child(9) > strong::text') course_loader.replace_css('teacher_name', 'td:nth-child(10)::text') course_loader.replace_css('popular_limit', 'td:nth-child(11)::text') yield course_loader.load_item() next_page = response.css('.page > b:last-child > a::attr(href)').get() if next_page: yield response.follow(next_page)