def parse_2011(self, response): sel = Selector(response) speakers = sel.css('.speakers') for speaker in speakers: il = SpeakerLoader(selector=speaker) il.add_css('name', '.speakers::text') il.add_value('year', str(response.meta['cookiejar'])) yield il.load_item()
def parse_new(self, response): sel = Selector(response) speakers = sel.css('.archive .talk .speakers > .speaker') for speaker in speakers: il = SpeakerLoader(selector=speaker) il.add_css('name', "span::text") il.add_css('image_urls', "a > img::attr(src)", lambda x: [urljoin(response.url, y) for y in x]) il.add_value('year', str(response.meta['cookiejar'])) yield il.load_item()
def parse_new(self, response): sel = Selector(response) speakers = sel.css('.archive .talk .speakers > .speaker') for speaker in speakers: il = SpeakerLoader(selector=speaker) il.add_value('conference', 'EuroPython') il.add_css('name', "span::text") il.add_css('image_urls', "a > img::attr(src)", lambda x: [urljoin(response.url, y) for y in x]) il.add_value('year', str(response.meta['cookiejar'])) yield il.load_item()
def parse(self, response): sel = Selector(response) speakers = sel.css('div.mini-profile') for speaker in speakers: il = SpeakerLoader(selector=speaker) il.add_css('name', ".name > a::text") il.add_css('image_urls', "img::attr(src)") il.add_value('year', str(response.meta['cookiejar'])) yield il.load_item() # pagination pages = sel.css('.pagination a::attr(href)').extract() for page in pages: yield Request(urljoin(response.url, page), meta=response.meta)
def parse(self, response): sel = Selector(response) speakers = sel.css('div.mini-profile') for speaker in speakers: il = SpeakerLoader(selector=speaker) il.add_css('name', ".name > a::text") il.add_css('name', ".name::text") il.add_css('image_urls', "img::attr(src)") il.add_value('year', str(response.meta['cookiejar'])) il.add_value('conference', 'EuroPython') yield il.load_item() # pagination pages = sel.css('.pagination a::attr(href)').extract() for page in pages: yield Request(urljoin(response.url, page), meta=response.meta)