def get_alias(self, meta, response):
        regx = '//text()[preceding-sibling::span[text()="又名:"]][following-s\
ibling::br]'
        data = response.xpath(regx).extract()
        if data:
            meta['alias'] = validator.process_slash_str(data[0])
        return meta
Exemple #2
0
    def set_alias(self, meta, response):
        regex = '//text()[preceding-sibling::span[text()="又名:"]][following-s\
ibling::br]'

        match = response.xpath(regex).get()
        if match:
            meta["alias"] = validator.process_slash_str(match)
        return meta
    def get_actor_ids(self, meta, response):
        regx = '//a[@rel="v:starring"]/@href'
        actor_ids = response.xpath(regx).extract()
        actor_ids = [ ids.split("/")[-2]for ids in actor_ids]

        regx1 = '//a[@rel="v:starring"]/text()'
        actors = response.xpath(regx1).extract()

        cmb_actor = []
        for i in range(len(actor_ids)):
            cmb_actor.append(actors[i] + ":" + actor_ids[i])

        meta['actor_ids'] = validator.process_slash_str('|'.join(cmb_actor))
        return meta
    def get_director_ids(self, meta, response):
        regx = '//a[@rel="v:directedBy"]/@href'
        director_ids = response.xpath(regx).extract()
        director_ids = [ ids.split("/")[-2]for ids in director_ids]

        regx1 = '//a[@rel="v:directedBy"]/text()'
        directors = response.xpath(regx1).extract()

        cmb_directors = []
        for i in range(len(director_ids)):
            cmb_directors.append(directors[i] + ":" + director_ids[i])

        meta['director_ids'] = validator.process_slash_str('|'.join(cmb_directors))
        return meta
 def get_actors(self, meta, response):
     regx = '//a[@rel="v:starring"]/text()'
     actors = response.xpath(regx).extract()
     meta['actors'] = validator.process_slash_str('/'.join(actors))
     return meta
 def get_directors(self, meta, response):
     regx = '//a[@rel="v:directedBy"]/text()'
     directors = response.xpath(regx).extract()
     meta['directors'] = validator.process_slash_str('/'.join(directors))
     return meta
Exemple #7
0
 def set_actors(self, meta, response):
     regex = '//a[@rel="v:starring"]/text()'
     matches = response.xpath(regex).getall()
     meta["actors"] = validator.process_slash_str("/".join(matches))
     return meta