def parse_item(self, response): items = response.xpath( "//ul[@class='list grid-16-16']/li[@class='item']") for i in items: job = JobItem() job['link'] = i.xpath('.//h3/a/@href').extract()[0] job['uid'] = '{}_{}'.format(job['link'].split('/')[-1], self.name) job['title'] = i.xpath('.//h3/a/text()').extract()[0] desc = i.xpath( '//div[contains(@class, "descricao")]/p/text()').extract()[0] job['desc'] = clean_str(desc) citystate_string = clean_str( i.xpath('.//span[contains(@class, "nome-empresa")]/text()'). extract()[0]) citystate_string = citystate_string.strip().split('-') try: job['city'] = citystate_string[1] except IndexError: job['city'] = '' try: job['state'] = citystate_string[2] except IndexError: job['state'] = '' job['pay'] = clean_str( i.xpath(".//div[@class='salario-de-ate']/text()").extract()[0]) yield job
def parse_item(self, response): items = response.xpath( "//ul[@class='list grid-16-16']/li[@class='item']") for i in items: job = JobItem() job['link'] = i.xpath('.//h3/a/@href').extract()[0] job['uid'] = '{}_{}'.format( job['link'].split('/')[-1], self.name) job['title'] = i.xpath('.//h3/a/text()').extract()[0] desc = i.xpath( '//div[contains(@class, "descricao")]/p/text()' ).extract()[0] job['desc'] = clean_str(desc) citystate_string = clean_str( i.xpath( './/span[contains(@class, "nome-empresa")]/text()' ).extract()[0] ) citystate_string = citystate_string.strip().split('-') try: job['city'] = citystate_string[1] except IndexError: job['city'] = '' try: job['state'] = citystate_string[2] except IndexError: job['state'] = '' job['pay'] = clean_str( i.xpath(".//div[@class='salario-de-ate']/text()").extract()[0]) yield job
def parse_item(self, response): items = response.xpath('//div[contains(@class, "boxVaga")]') for i in items: job = JobItem() job['provider'] = self.name job['uid'] = "{}_{}".format( i.xpath('@id').extract()[0], self.name) link = i.xpath('.//h2[@itemprop="title"]/a') job['link'] = link.xpath('@href').extract()[0] job['title'] = link.xpath('text()').extract()[0] desc = i.xpath( './/div[contains(@itemprop, "description")]/text()' ).extract()[0] job['desc'] = clean_str(desc) job['city'] = clean_str(i.xpath( './/span[contains(@itemprop, "addressRegion")]/text()' ).extract()[0]) job['state'] = clean_str(i.xpath( './/span[contains(@itemprop, "addressLocality")]/text()' ).extract()[0]) job['pay'] = clean_str( i.xpath('.//div[@class="salarioLocal"]/h3/text()') .extract()[0]) yield job
def parse_item(self, response): items = response.xpath('//div[contains(@class, "boxVaga")]') for i in items: job = JobItem() job['provider'] = self.name job['uid'] = "{}_{}".format(i.xpath('@id').extract()[0], self.name) link = i.xpath('.//h2[@itemprop="title"]/a') job['link'] = link.xpath('@href').extract()[0] job['title'] = link.xpath('text()').extract()[0] desc = i.xpath('.//div[contains(@itemprop, "description")]/text()' ).extract()[0] job['desc'] = clean_str(desc) job['city'] = clean_str( i.xpath('.//span[contains(@itemprop, "addressRegion")]/text()' ).extract()[0]) job['state'] = clean_str( i.xpath( './/span[contains(@itemprop, "addressLocality")]/text()'). extract()[0]) job['pay'] = clean_str( i.xpath('.//div[@class="salarioLocal"]/h3/text()').extract() [0]) yield job
def parse(self, response): items = response.xpath('//div[contains(@class, "box-vaga")]') for i in items: job = JobItem() link = i.xpath('.//h4/a') job['uid'] = i.xpath( './/p[@class="info-vaga-detalhe"]/span[2]/text()')\ .extract()[0].strip() job['link'] = "%s%s" % (self.base_url, link.xpath('@href').extract()[0]) job['title'] = link.xpath('text()').extract()[0] desc = i.xpath( './/div[contains(@class, "descricao-vaga")]/p/text()')\ .extract()[0] job['desc'] = clean_str(desc) city_state = i.xpath( './/p[contains(@class, "info-vaga-conteudo")]/span[2]/text()')\ .extract()[0].strip() job['city'] = city_state.split('/')[0] job['state'] = city_state.split('/')[1] pay = i.xpath( './/p[contains(@class, "info-vaga-conteudo")]/span[1]/text()')\ .extract()[0].strip() job['pay'] = pay yield job