def get_official_site(self, meta, response):
        regx = '//a[preceding-sibling::span[text()="官方网站:"]][following-si\
bling::br]/@href'
        data = response.xpath(regx).extract()
        if data:
            meta['official_site'] = validator.process_url(data[0])
        return meta
 def get_poster(self, meta, response):
     regx = '//a[@class="nbgnbg"]/img/@src'
     data = response.xpath(regx).extract()
     if data:
         url = validator.process_url(data[0])
         print("Poster Url:", url, sep="  ")
         meta['poster_url'] = url
     return meta
예제 #3
0
    def set_official_site(self, meta, response):
        regex = '//a[preceding-sibling::span[text()="官方网站:"]][following-si\
bling::br]/@href'

        match = response.xpath(regex).get()
        if match:
            meta["official_site"] = validator.process_url(match)
        return meta