Python clean_str Exemples, pyjobs.util.clean_str Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : empregos_spider.py Projet : robertopc/pyjobs

    def parse_item(self, response):
        items = response.xpath(
            "//ul[@class='list grid-16-16']/li[@class='item']")
        for i in items:
            job = JobItem()
            job['link'] = i.xpath('.//h3/a/@href').extract()[0]
            job['uid'] = '{}_{}'.format(job['link'].split('/')[-1], self.name)
            job['title'] = i.xpath('.//h3/a/text()').extract()[0]

            desc = i.xpath(
                '//div[contains(@class, "descricao")]/p/text()').extract()[0]
            job['desc'] = clean_str(desc)

            citystate_string = clean_str(
                i.xpath('.//span[contains(@class, "nome-empresa")]/text()').
                extract()[0])
            citystate_string = citystate_string.strip().split('-')

            try:
                job['city'] = citystate_string[1]
            except IndexError:
                job['city'] = ''

            try:
                job['state'] = citystate_string[2]
            except IndexError:
                job['state'] = ''

            job['pay'] = clean_str(
                i.xpath(".//div[@class='salario-de-ate']/text()").extract()[0])
            yield job

Exemple #2

0

Afficher le fichier

Fichier : empregos_spider.py Projet : igr-santos/pyjobs

    def parse_item(self, response):
        items = response.xpath(
            "//ul[@class='list grid-16-16']/li[@class='item']")
        for i in items:
            job = JobItem()
            job['link'] = i.xpath('.//h3/a/@href').extract()[0]
            job['uid'] = '{}_{}'.format(
                job['link'].split('/')[-1], self.name)
            job['title'] = i.xpath('.//h3/a/text()').extract()[0]

            desc = i.xpath(
                '//div[contains(@class, "descricao")]/p/text()'
            ).extract()[0]
            job['desc'] = clean_str(desc)

            citystate_string = clean_str(
                i.xpath(
                    './/span[contains(@class, "nome-empresa")]/text()'
                ).extract()[0]
            )
            citystate_string = citystate_string.strip().split('-')

            try:
                job['city'] = citystate_string[1]
            except IndexError:
                job['city'] = ''

            try:
                job['state'] = citystate_string[2]
            except IndexError:
                job['state'] = ''

            job['pay'] = clean_str(
                i.xpath(".//div[@class='salario-de-ate']/text()").extract()[0])
            yield job

Exemple #3

0

Afficher le fichier

Fichier : catho_spider.py Projet : igr-santos/pyjobs

    def parse_item(self, response):
        items = response.xpath('//div[contains(@class, "boxVaga")]')
        for i in items:
            job = JobItem()

            job['provider'] = self.name

            job['uid'] = "{}_{}".format(
                i.xpath('@id').extract()[0], self.name)

            link = i.xpath('.//h2[@itemprop="title"]/a')
            job['link'] = link.xpath('@href').extract()[0]
            job['title'] = link.xpath('text()').extract()[0]

            desc = i.xpath(
                './/div[contains(@itemprop, "description")]/text()'
            ).extract()[0]
            job['desc'] = clean_str(desc)
            job['city'] = clean_str(i.xpath(
                './/span[contains(@itemprop, "addressRegion")]/text()'
            ).extract()[0])
            job['state'] = clean_str(i.xpath(
                './/span[contains(@itemprop, "addressLocality")]/text()'
            ).extract()[0])
            job['pay'] = clean_str(
                i.xpath('.//div[@class="salarioLocal"]/h3/text()')
                .extract()[0])

            yield job

Exemple #4

0

Afficher le fichier

    def parse_item(self, response):
        items = response.xpath('//div[contains(@class, "boxVaga")]')
        for i in items:
            job = JobItem()

            job['provider'] = self.name

            job['uid'] = "{}_{}".format(i.xpath('@id').extract()[0], self.name)

            link = i.xpath('.//h2[@itemprop="title"]/a')
            job['link'] = link.xpath('@href').extract()[0]
            job['title'] = link.xpath('text()').extract()[0]

            desc = i.xpath('.//div[contains(@itemprop, "description")]/text()'
                           ).extract()[0]
            job['desc'] = clean_str(desc)
            job['city'] = clean_str(
                i.xpath('.//span[contains(@itemprop, "addressRegion")]/text()'
                        ).extract()[0])
            job['state'] = clean_str(
                i.xpath(
                    './/span[contains(@itemprop, "addressLocality")]/text()').
                extract()[0])
            job['pay'] = clean_str(
                i.xpath('.//div[@class="salarioLocal"]/h3/text()').extract()
                [0])

            yield job

Exemple #5

0

Afficher le fichier

    def parse(self, response):
        items = response.xpath('//div[contains(@class, "box-vaga")]')
        for i in items:
            job = JobItem()
            link = i.xpath('.//h4/a')

            job['uid'] = i.xpath(
                './/p[@class="info-vaga-detalhe"]/span[2]/text()')\
                .extract()[0].strip()

            job['link'] = "%s%s" % (self.base_url,
                                    link.xpath('@href').extract()[0])
            job['title'] = link.xpath('text()').extract()[0]

            desc = i.xpath(
                './/div[contains(@class, "descricao-vaga")]/p/text()')\
                .extract()[0]
            job['desc'] = clean_str(desc)

            city_state = i.xpath(
                './/p[contains(@class, "info-vaga-conteudo")]/span[2]/text()')\
                .extract()[0].strip()
            job['city'] = city_state.split('/')[0]
            job['state'] = city_state.split('/')[1]

            pay = i.xpath(
                './/p[contains(@class, "info-vaga-conteudo")]/span[1]/text()')\
                .extract()[0].strip()
            job['pay'] = pay

            yield job

Exemple #6

0

Afficher le fichier

Fichier : ceviu_spider.py Projet : igr-santos/pyjobs

    def parse(self, response):
        items = response.xpath('//div[contains(@class, "box-vaga")]')
        for i in items:
            job = JobItem()
            link = i.xpath('.//h4/a')

            job['uid'] = i.xpath(
                './/p[@class="info-vaga-detalhe"]/span[2]/text()')\
                .extract()[0].strip()

            job['link'] = "%s%s" % (self.base_url,
                                    link.xpath('@href').extract()[0])
            job['title'] = link.xpath('text()').extract()[0]

            desc = i.xpath(
                './/div[contains(@class, "descricao-vaga")]/p/text()')\
                .extract()[0]
            job['desc'] = clean_str(desc)

            city_state = i.xpath(
                './/p[contains(@class, "info-vaga-conteudo")]/span[2]/text()')\
                .extract()[0].strip()
            job['city'] = city_state.split('/')[0]
            job['state'] = city_state.split('/')[1]

            pay = i.xpath(
                './/p[contains(@class, "info-vaga-conteudo")]/span[1]/text()')\
                .extract()[0].strip()
            job['pay'] = pay

            yield job