def get_official_site(self, meta, response): regx = '//a[preceding-sibling::span[text()="官方网站:"]][following-si\ bling::br]/@href' data = response.xpath(regx).extract() if data: meta['official_site'] = validator.process_url(data[0]) return meta
def get_poster(self, meta, response): regx = '//a[@class="nbgnbg"]/img/@src' data = response.xpath(regx).extract() if data: url = validator.process_url(data[0]) print("Poster Url:", url, sep=" ") meta['poster_url'] = url return meta
def set_official_site(self, meta, response): regex = '//a[preceding-sibling::span[text()="官方网站:"]][following-si\ bling::br]/@href' match = response.xpath(regex).get() if match: meta["official_site"] = validator.process_url(match) return meta