def vacansy_parse(self, response: HtmlResponse): name_vac = response.css('h1::text').extract_first() salary_vac = response.xpath("//span[@class='_3mfro _2Wp8I PlM3e _2JVkc']/text()").extract() url_vac = response.url source_vac = 'superjob.ru' yield JobparserItem(name=name_vac, salary=salary_vac, url=url_vac, source=source_vac)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath( "//span[@class='_3mfro _2Wp8I PlM3e _2JVkc']/text()").extract() url = response.url yield JobparserItem(item_name=name, item_salary=salary, item_url=url)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath( "//h1[@class='bloko-header-1']//text()").extract_first() salary = response.xpath( "//span[@class='bloko-header-2 bloko-header-2_lite']/text()" ).extract() yield JobparserItem(name=name, salary=salary, link=response.url)
def vacansy_parse(self, response: HtmlResponse): #time.sleep(5) name = response.xpath( '//h1[@class="_3mfro rFbjy s1nFK _2JVkc"]/text()').extract() name_vac = name[0] # zp = response.xpath('//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]') zp = response.xpath( '//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span/text()' ).extract() if len(zp) == 0: min_sal = 'Не указано' max_sal = None elif len(zp) == 2: min_sal = zp[0] max_sal = None else: min_sal = zp[0] max_sal = zp[2] url = response.url # Сделаем примитивный вывод показывающий результат print('name = {}, salary = {}, min_sal = {}, max_sal = {}, url = {}'. format(name, zp, min_sal, max_sal, url)) yield JobparserItem( name=name_vac, min_salary=min_sal, max_salary=max_sal, link=url, source='SJ') # Передаем сформированный item в pipeline
def vacancy_parse(self, response: HtmlResponse): # host without subdomains parsed_uri = tldextract.extract(response.url) host = "{domain}.{suffix}".format(domain=parsed_uri.domain, suffix=parsed_uri.suffix).lower() # url url = urljoin(response.url, urlparse(response.url).path) # name name = response.css('h1.header *::text').extract_first() # company company = response.css( '*.vacancy-company-name-wrapper > span[itemprop="identifier"] > meta[itemprop="name"]::attr(content)' ).extract_first().strip() # salary salary_data = response.css( 'div.vacancy-title > span[itemprop="baseSalary"]') salary_min = salary_data.css( 'meta[itemprop="minValue"]::attr(content)').extract_first() salary_max = salary_data.css( 'meta[itemprop="maxValue"]::attr(content)').extract_first() salary_currency = salary_data.css( 'meta[itemprop="currency"]::attr(content)').extract_first() # return result yield JobparserItem( host=host, url=url, name=name, company=company, salary_min=salary_min, salary_max=salary_max, salary_currency=salary_currency, )
def vacancy_parse(self, response: HtmlResponse): vaca = response.xpath( '//div[contains(@class,"vacancy-title")]//h1[@class="header"]//text()' ).extract() salary = response.css( 'div.vacancy-title p.vacancy-salary::text').extract() salary = salary() try: sal_min = response.css( 'div.vacancy-title meta[itemprop="minValue"]::attr(content)' ).extract() except: sal_min = 'NaN' try: sal_max = response.css( 'div.vacancy-title meta[itemprop="maxValue"]::attr(content)' ).extract() except: sal_max = 'NaN' link = response.css( 'div.bloko-column_xs-4 div[itemscope="itemscope"] meta[itemprop="url"]::attr(content)' ).extract() yield JobparserItem(name=vaca[0], link=link[0], salary=salary, min_salary=sal_min, max_salary=sal_max, source='hh.ru')
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath( "//p[@class='vacancy-salary']//text()").extract() yield JobparserItem(name=name, salary=salary) print()
def parse_vacancy(self, response: HtmlResponse): _tmp_cur = {'₽': 'RUB', '$': 'USD'} # _tmp_values = response.xpath("//div[@class='_3MVeX']/span[contains(@class, '_3mfro')]/span/text()").extract() price = response.xpath( "//span[@class='price-value-string js-price-value-string']/span[contains(@class,'js-item-price')]/text()" ).extract_first() if price: price = price.replace(' ', '') currency = response.xpath( "//span[@class='price-value-string js-price-value-string']/span[contains(@class,'price-value-prices-list-item-currency_sign')]/span/text()" ).extract_first() name = response.xpath( "//span[@class='title-info-title-text']/text()").extract_first() # name = response.xpath("//div[@class='_3MVeX']/h1/text()").extract_first() # v_tmp = [int(itm.replace('\xa0', '')) for itm in _tmp_values[:-1] if itm.replace('\xa0', '').isdigit()] salary = { 'currency': currency if currency else None, 'min_value': price if price else None, 'max_value': None } # # if salary['max_value']: print(1) # yield JobparserItem(name=name, salary=salary)
def vacansy_parce(self, response: HtmlResponse): link = response.url name = response.xpath("//h1/text()").extract_first() salary = response.xpath( "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()" ).extract() yield JobparserItem(name=name, salary=salary, link=link)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath("//p[@class='vacancy-salary']//text()").extract() vacancy_link = response.url vacancy_source = self.allowed_domains[0] yield JobparserItem(item_name=name, item_salary=salary, item_link=vacancy_link, item_source=vacancy_source)
def parse_vacancy(self, response: HtmlResponse): _tmp_cur = {'₽': 'RUB', '$': 'USD'} _tmp_values = response.xpath( "//div[@class='_3MVeX']/span[contains(@class, '_3mfro')]/span/text()" ).extract() name = response.xpath( "//div[@class='_3MVeX']/h1/text()").extract_first() v_tmp = [ int(itm.replace('\xa0', '')) for itm in _tmp_values[:-1] if itm.replace('\xa0', '').isdigit() ] salary = { 'currency': (lambda x: _tmp_cur[x] if x and x in _tmp_cur else None)( _tmp_values[-1]) if _tmp_values else None, 'min_value': v_tmp[0] if v_tmp else None, 'max_value': v_tmp[1] if v_tmp and len(v_tmp) > 1 else None } if salary['max_value']: print(1) yield JobparserItem(name=name, salary=salary)
def vacansy_parse(self, response: HtmlResponse, link): name = response.xpath( "//h1[@class='header']/descendant-or-self ::*/text()" ).extract_first() location = response.xpath( "//div[contains(@class, 'vacancy-company')]/p//text()").extract() def meta(itemprop): return response.xpath( f"//meta[contains(@itemprop,'{itemprop}')]/@content" ).extract_first() currency = meta('currency') min_salary = meta('minValue') max_salary = meta('maxValue') date_posted = meta('datePosted') company_name = meta('name') yield JobparserItem(location=location, currency=currency, min_salary=min_salary, link=link, max_salary=max_salary, date_posted=date_posted, company_name=company_name, name=name)
def vacansy_parse(self, response: HtmlResponse): name = response.xpath( '//div[contains(@class,"vacancy-title")]//h1//text()' ).extract_first() link = response.url currency = min_salary = max_salary = None try: currency = response.xpath( '//span[contains(@itemprop,"baseSalary")]/meta/@content' ).extract_first() tmp = response.xpath( '//span[contains(@itemprop,"baseSalary")]/span//@content' ).extract() min_salary = int(''.join(re.split('\s', tmp[0]))) if (len(tmp) == 3): max_salary = int(''.join(re.split('\s', tmp[1]))) except: pass yield JobparserItem(name=name, link=link, currency=currency, min_salary=min_salary, max_salary=max_salary, source="HeadHunter")
def vacansy_parse(self, response: HtmlResponse): name_vac = '' name1 = response.css( 'div.vacancy-title h1.header span.highlighted::text').getall( ) # Наименование вакансии name2 = response.css('div.vacancy-title h1.header::text' ).extract_first() # HH добавили... if len( name1 ) == 0: # ...в заголовок имени span, приходится добавлять новые обработчики и соответственно проверки if name2 is not None: name_vac = name2 else: if name2 is None: name_vac = name1 else: name_vac = name1[0] + name2 salary = response.css('div.vacancy-title p.vacancy-salary::text' ).extract_first() # Зарплата min_sal, max_sal = self.parse_salara(salary) url = response.url # Сделаем примитивный вывод показывающий результат #print('name = {}, salary = {}, min_sal = {}, max_sal = {}, url = {}'.format(name, salary, min_sal, max_sal, url)) yield JobparserItem( name=name_vac, min_salary=min_sal, max_salary=max_sal, link=url, source='HH') # Передаем сформированный item в pipeline
def vacancy_parse(self, response: HtmlResponse): vacansy_block = response.xpath('//div[@class="_3MVeX"]') name = vacansy_block.xpath('.//h1/text()').extract_first() salary = vacansy_block.xpath( './/span[@class="_3mfro _2Wp8I PlM3e _2JVkc"]//text()').extract() link = response.url yield JobparserItem(name=name, salary=salary, link=link)
def vacansy_parse(self, response: HtmlResponse): loader = ItemLoader(item=JobparserItem(), response=response) loader.add_value('source', 'SJ') loader.add_xpath('name', '//h1[contains(@class,"_3mfro")]/text()') loader.add_value('link', response.url) loader.add_xpath('salary', '//div[contains(@class,"_1Tjoc")]//span[contains(@class,"_3mfro _2Wp8I ")]/span/text()') yield loader.load_item()
def vacansy_parse(self, response: HtmlResponse): loader = ItemLoader(item=JobparserItem(), response=response) loader.add_value('source', 'HH') loader.add_xpath('name', '//div[contains(@class,"vacancy-title")]//h1//text()') loader.add_value('link', response.url) loader.add_xpath('salary', '//span[contains(@itemprop,"baseSalary")]//@content') yield loader.load_item()
def vacancy_parse(self, response: HtmlResponse): name = response.css('h1._3mfro.rFbjy.s1nFK._2JVkc::text').extract_first() salaryFrom = response.xpath("//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/span[1]/text()").extract_first() salaryTo = response.xpath("//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/span[3]/text()").extract_first() yield JobparserItem(name=name, salaryFrom=salaryFrom, salaryTo=salaryTo, url=response.url, source=self.allowed_domains[0])
def vacansy_parse(self, response: HtmlResponse): name = response.css('h1.rFbjy::text').extract_first() # Наименование вакансии salary = response.css('span._2Wp8I').extract_first() # Зарплата url = response.request.url source = 'superjob.ru' yield JobparserItem(name=name, min_salary=salary, max_salary=salary, url=url, source=source) # Передаем сформированный item в pipeline
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath( "//p[@class = 'vacancy-salary']/span/text()").extract() url = response.url yield JobparserItem(item_name=name, item_salary=salary, item_url=url)
def vacansy_parse(self, response): # Собираем информацию со страницы #job_name = response.css('div.vacancy-title h1.header::text').extract_first().strip() #salary_min = response.css('meta[itemprop="minValue"]::attr(content)').extract_first() #salary_max = response.css('meta[itemprop="maxValue"]::attr(content)').extract_first() #site = 'hh.ru' #job_link = response.css('div[itemscope="itemscope"] meta[itemprop="url"]::attr(content)').extract_first() #yield JobparserItem(name=job_name, min_salary=salary_min, max_salary=salary_max, link=job_link, site=site) loader = ItemLoader(item=JobparserItem(), response=response) loader.add_xpath('name', "//div//h1/text()") loader.add_value('min_salary', float('nan')) loader.add_value('max_salary', float('nan')) try: loader.add_xpath('min_salary', "//meta[@itemprop='minValue']/@content") except: pass try: loader.add_xpath('max_salary', "//meta[@itemprop='maxValue']/@content") except: pass job_link = response.css( 'div[itemscope="itemscope"] meta[itemprop="url"]::attr(content)' ).extract_first() loader.add_value('link', job_link) loader.add_value('site', 'hh.ru') yield loader.load_item()
def vacansy_parse(self, response: HtmlResponse): link_vac = response.url name = response.css('div.vacancy-title h1.header::text').extract_first() salary = response.css('div.vacancy-title p.vacancy-salary::text').extract() link = 'hh.ru' # print(name, salary) yield JobparserItem(name=name, salary_from=''.join(salary), salary_to='', link_vac=link_vac, link_site=link)
def vacansy_parse(self, response: HtmlResponse): name_vac = response.css('h1::text').extract_first() salary_vac = response.xpath( "//span[@class='bloko-header-2 bloko-header-2_lite']/text()" ).extract() yield JobparserItem(name=name_vac, salary=salary_vac)
def vacansy_parse(self, response): #Собираем информацию со страницы name = response.css( 'div.vacancy-title h1.header::text').extract_first() salary = response.css( 'div.vacancy-title p.vacancy-salary::text').extract_first() print(name, salary) yield JobparserItem(name=name, salary=salary) #Формируем item
def vacancy_parce(self, response: HtmlResponse): l = ItemLoader(item=JobparserItem(), response=response) l.add_value('url', response.url) l.add_css('name', "div.vacancy-title h1::text") l.add_xpath( 'salary', "//span[@class='bloko-header-2 bloko-header-2_lite']/text()") yield l.load_item()
def vacancy_pars(self, response: HtmlResponse): name = response.xpath( "//h1[@class='_3mfro rFbjy s1nFK _2JVkc']/text()").extract()[0] salary = response.xpath( "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()" ).extract() yield JobparserItem(name=name, salary=salary, link=self.vacancy_link)
def vacancy_parse(self, response: HtmlResponse): vac = response.css('div._3MVeX h1._3mfro::text').extract_first() salary = response.css('div._3MVeX span._2Wp8I span::text').extract() yield JobparserItem(name=vac, salary=salary, link=response.url, source='superjob.ru')
def vacansy_parse(self, response: HtmlResponse): name = response.css('h1._3mfro::text').extract_first() salary = '' for itm in response.css('span.PlM3e span::text').extract(): itm.replace(u'\xa0', ' ') salary = salary + itm company = response.css('h2.PlM3e::text').extract_first() yield JobparserItem(name=name, salary=salary, company=company)
def vacancy_parse(self, response: HtmlResponse): name = response.css( "div.vacancy-title h1.header::text").extract_first() salary = response.css( "div.vacancy-title p.vacancy-salary::text").extract_first() company = ''.join( response.css('a.vacancy-company-name span::text').extract()) yield JobparserItem(name=name, salary=salary, company=company)
def vacancy_parse(self, response: HtmlResponse): name = response.css(".vacancy-title h1::text").extract_first() salary = response.css(".vacancy-salary *::text").extract() yield JobparserItem(name=name, salary=salary, url=response.url, site=self.name)