def vacansy_parse(self, response: HtmlResponse): name = response.xpath('//h1[contains(@class,"_3mfro")]/text()').extract_first() link = response.url currency = min_salary = max_salary = None try: tmp = response.xpath('//div[contains(@class,"_1Tjoc")]//span[contains(@class,"_3mfro _2Wp8I ")]/span/text()').extract() min_salary = int(''.join(re.split('\s', tmp[0]))) if (len(tmp)==6) : max_salary = int(''.join(re.split('\s', tmp[4]))) currency = tmp[5] else: currency = tmp[1] except: pass yield JobparserItem(name=name, link=link, currency=currency, min_salary=min_salary, max_salary=max_salary, source="SuperJob")
def vacansy_parse(self, response): #name = response.css('div.vacancy-title h1.header::text').extract_first() url_vacancy = response.url #min_salary = response.css('meta[itemprop="minValue"]::attr(content)').extract_first() #max_salary = response.css('meta[itemprop="maxValue"]::attr(content)').extract_first() #source = 'hh.ru' #yield JobparserItem(name=name, url_vacancy=url_vacancy, min_salary=min_salary, # max_salary=max_salary, source=source) loader = ItemLoader(item=JobparserItem(), response=response) loader.add_css('name', 'div.vacancy-title h1.header::text') loader.add_value('url_vacancy', response.url) loader.add_css('min_salary', 'meta[itemprop="minValue"]::attr(content)') loader.add_css('max_salary', 'meta[itemprop="maxValue"]::attr(content)') loader.add_value('source', 'hh.ru')
def vacancy_parse(self, response: HtmlResponse): # host host = response.css('meta[property="og:site_name"]::attr(content)' ).extract_first().lower() # url url = response.css( 'meta[property="og:url"]::attr(content)').extract_first() # description description = response.css( 'meta[property="og:description"]::attr(content)').extract_first() title, salary = description.split(', ') # name name = title[len('Вакансия '):title.rfind(' в компании ')] # company company = title[title.rfind(' в компании ') + len(' в компании '):].strip() # salary salary = salary[len('зарплата '):-1] if salary == 'по договорённости': salary_min, salary_max, salary_currency = None, None, None else: salary_currency_index = next( i for i, j in list(enumerate(salary, 1))[::-1] if j.isdigit()) salary_currency = salary[salary_currency_index + 1:] salary = salary[:salary_currency_index].replace(' ', '') if '-' in salary: salary_min, salary_max = salary.split('-') else: salary_min = salary_max = salary text_range = response.css( 'span._2Wp8I.ZON4b *::text').extract_first().lower() if text_range.startswith('до'): salary_min = None else: # от salary_max = None # return result yield JobparserItem( host=host, url=url, name=name, company=company, salary_min=salary_min, salary_max=salary_max, salary_currency=salary_currency, )
def vacansy_parse(self, response: HtmlResponse): link_vac = response.url name = response.xpath( "//h1[@class='_3mfro rFbjy s1nFK _2JVkc']/text()") print(name) if response.xpath( "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()"): salary = response.xpath( "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()") else: salary = '' link = 'superjob.ru' # print(name, salary) yield JobparserItem(name=name, salary_from=''.join(salary), salary_to='', link_vac=link_vac, link_site=link)
def vacancy_parse(self, response: HtmlResponse): title = response.xpath( "//div[@class='vacancy-title ']/h1/text()").extract_first() if title == None: title = response.xpath( "//div[@class='vacancy-title']/h1/span/text()").extract()[0] vac_href = response.url salary_max = response.css( 'div.vacancy-title meta[itemprop="maxValue"]::attr(content)' ).extract() salary_min = response.css( 'div.vacancy-title meta[itemprop="minValue"]::attr(content)' ).extract() salary_comment = response.css( 'div.vacancy-title p.vacancy-salary::text').extract_first() competition = response.xpath( "//div[@class='bloko-tag bloko-tag_inline']//span/text()").extract( ) if competition == []: competition = response.xpath( "//span[@class='Bloko-TagList-Text']/text()").extract() company_title = response.xpath( "//meta[@itemprop = 'name']/@content").extract_first() company_href = response.xpath( "//a[@class='vacancy-company-name']/@href").extract_first() company_logo = response.xpath( "//a[@class='vacancy-company-logo']/img/@src").extract_first() if company_logo == None: company_logo = response.xpath( "//a[@class='vacancy-company-logo ']/img/@src").extract_first( ) print('VParse', title, response, salary_comment, salary_max, salary_min) yield JobparserItem(title=title, vac_href=vac_href, salary_max=salary_max, salary_min=salary_min, salary_comment=salary_comment, vac_from='HH', compet=competition, comp_title=company_title, comp_href=company_href, comp_logo=company_logo)
def vacansy_parse(self, response: HtmlResponse): vacancy_data = {} vacancy_data['name'] = response.css('h1::text').extract_first() vacancy_data['salary'] = response.xpath( "//span[@class='bloko-header-2 bloko-header-2_lite']/text()" ).extract() vacancy_data['experience'] = response.xpath( "//span[@data-qa = 'vacancy-experience']/text()").extract_first() vacancy_data['skils'] = response.xpath( "//div[@class ='bloko-tag bloko-tag_inline']//text()").extract() vacancy_data['company_name'] = response.xpath( "//a[@data-qa='vacancy-company-name']/*/text()").extract() vacancy_data['company_href'] = response.xpath( "//a[@data-qa='vacancy-company-name']/@href").extract() vacancy_data['company_location'] = response.xpath( "//p[@data-qa='vacancy-view-location']//text()").extract() # company_description = vacancy_data['publication_date'] = response.xpath( "//p[@class='vacancy-creation-time']/text()").extract() vacancy_data['connection_info'] = response.xpath( "//div[@class='vacancy-contacts__body']//text()").extract() # vacancy - title yield JobparserItem(vacancy_data) # def vacansy_parse(self, response:HtmlResponse): # vacancy_data = {} # name_vac = response.css('h1::text').extract_first() # salary_vac = response.xpath("//span[@class='bloko-header-2 bloko-header-2_lite']/text()").extract() # experience = response.xpath("//span[@data-qa = 'vacancy-experience']/text()").extract_first() # skils = response.xpath("//div[@class ='bloko-tag bloko-tag_inline']//text()").extract() # company_name = response.xpath("//a[@data-qa='vacancy-company-name']/*/text()").extract_first() # company_href = response.xpath("//a[@data-qa='vacancy-company-name']/@href").extract() # company_location = response.xpath("//p[@data-qa='vacancy-view-location']//text()").extract() # # company_description = # publication_date = response.xpath("//p[@class='vacancy-creation-time']/text()").extract() # connection_info = response.xpath("//div[@class='vacancy-contacts__body']//text()").extract() # # # vacancy - title # yield JobparserItem(name=name_vac, salary=salary_vac) # # span.bloko-tag__section.bloko-tag__section_text" data-qa="bloko-tag__text" style="" xpath="1">ASP.NET</span>
def vacansy_parse(self, response: HtmlResponse ): # Здесь обрабатываем информацию по вакансии vlink = response.url # Ссылка на страницу вакансии vname = response.xpath( '//h1//text()').extract_first() # Получаем наименование вакансии vsalary = response.css('p.vacancy-salary span::text').extract( ) # Получаем зарплату в виде списка отдельных блоков clink = 'https://spb.hh.ru' + response.css( 'div.vacancy-company-name-wrapper a::attr(href)').extract_first( ) # Получаем ссылку на страницу компании vcomp = ''.join( response.css( 'div.vacancy-company-name-wrapper span.bloko-section-header-2::text' ).extract()) # Получаем название компании компании if len(clink) == 0: clink = response.css( 'p.vacancy-company-name-wrapper a::attr(href)').extract_first( ) # Получаем ссылку на страницу компании vcomp = ''.join( response.css( 'p.vacancy-company-name-wrapper span.bloko-section-header-2::text' ).extract()) # Получаем название компании компании vgeo = ''.join( response.xpath('//p[@data-qa="vacancy-view-location"]//text()'). extract()) # Получаем адрес компании exp_time = response.xpath( '//div[@class="vacancy-description"]/div[1]//text()').extract( ) # Получаем опыт и график vdescr = response.xpath('//div[@class="vacancy-description"]//text()' ).extract() # Получаем описание вакансии vdate = response.xpath('//p[@class="vacancy-creation-time"]//text()' ).extract() # Дата публикации yield JobparserItem( lnkpage=vlink, name=vname, salary=vsalary, link_company=clink, company=vcomp, geo=vgeo, experience=exp_time, descr=vdescr, date_pub=vdate ) # Передаем данные в item для создания структуры json
def vacancy_parse(self, response: HtmlResponse ): # Здесь обрабатываем информацию по вакансии name_job = response.xpath( '//h1/text()').extract_first() # Получаем наименование вакансии salary_job = response.css('p.vacancy-salary span::text').extract( ) # Получаем зарплату в виде списка отдельных блоков location_job = response.xpath( '//p[@data-qa="vacancy-view-location"]//text()').extract() position_link = response.url company_job = response.xpath( '//span[@class="bloko-section-header-2 bloko-section-header-2_lite"]/text()' ).extract() yield JobparserItem( name=name_job, salary=salary_job, location=location_job, link=position_link, company=company_job ) # Передаем данные в item для создания структуры json
def vacancy_parse(self, response: HtmlResponse): name = response.css( 'div.vacancy-title h1.header::text').extract_first() int_sal = get_salary( response.css( 'div.vacancy-title p.vacancy-salary::text').extract_first()) href = response.url site = self.name if len(int_sal) == 2: salary_min = int_sal[0] salary_max = int_sal[1] else: salary_min = 0 salary_max = 0 yield JobparserItem(name=name, salary_min=salary_min, salary_max=salary_max, href=href, site=site)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath( "//h1[@class='_3mfro rFbjy s1nFK _2JVkc']/text()").extract() salary = response.xpath( "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()" ).extract() company = response.xpath( "//h2[@class='_3mfro PlM3e _2JVkc _2VHxz _3LJqf _15msI']/text()" ).extract() city = response.xpath( "//span[@class='_3mfro _1hP6a _2JVkc']/text()").extract_first() link = response.url source = self.allowed_domains[0] yield JobparserItem(name=name, salary=salary, company=company, link=link, source=source, city=city)
def vacansy_parse(self, response): # Собираем информацию со страницы job_name = response.css( 'div.vacancy-title h1.header::text').extract_first().strip() salary_min = response.css( 'meta[itemprop="minValue"]::attr(content)').extract_first() if not salary_min: salary_min = None salary_max = response.css( 'meta[itemprop="maxValue"]::attr(content)').extract_first() if not salary_max: salary_max = None site = 'hh.ru' job_link = response.css( 'div[itemscope="itemscope"] meta[itemprop="url"]::attr(content)' ).extract_first() yield JobparserItem(name=job_name, min_salary=salary_min, max_salary=salary_max, link=job_link, site=site)
def vacancy_parse(self, response): name_vac = response.xpath( "//h1[@class='_3mfro rFbjy s1nFK _2JVkc']/text()").extract_first() comp = response.xpath( "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()" ).extract() empl = response.xpath( "//div[@class='_1Tjoc _3ifBO Ghoh2 _3lvIR']//div[@class='_1cFsi _3VUIu']/div[@class='_2g1F-'][1]//h2/text()" ).extract_first() url_link = response.url dom = self.allowed_domains[0] addr = response.xpath( "//div[@class='f-test-address _3AQrx']//span[@class='_3mfro _1hP6a _2JVkc']/text()" ).extract_first() #print(name_vac , comp) yield JobparserItem(name=name_vac, salary=comp, employer=empl, url=url_link, address=addr, site=dom)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath(hh_name).extract_first() min_salary = response.xpath(hh_salary).extract() max_salary = None try: if 'до ' in min_salary[2]: max_salary = min_salary[3].replace('\xa0', '') elif 'до ' in min_salary[0]: max_salary = min_salary[1].replace('\xa0', '') except: max_salary = None min_salary = [ min_salary[1].replace('\xa0', '') if 'от ' in min_salary else None ][0] link = response.url print() yield JobparserItem(name=name, min_salary=min_salary, max_salary=max_salary, link=link)
def vacancy_parse(self, response: HtmlResponse): name = response.css('div.vacancy-title h1.header::text').extract() salary = [ response.css( 'span[itemprop="baseSalary"] meta[itemprop="minValue"] ::attr(content)' ).extract_first(), response.css( 'span[itemprop="baseSalary"] meta[itemprop="maxValue"] ::attr(content)' ).extract_first(), response.css( 'span[itemprop="baseSalary"] meta[itemprop="currency"] ::attr(content)' ).extract_first() ] vacancy_link = response.url site_scraping = self.allowed_domains[0] yield JobparserItem(name=name, salary=salary, vacancy_link=vacancy_link, site_scraping=site_scraping)
def vacansy_parse(self, response): job_name = response.css('div._3MVeX h1::text').extract_first().strip() salary = response.css( 'span[class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"] *::text').extract( ) salary = str(''.join(salary)) salary = salary.replace(u'\xa0', u'') if '—' in salary: salary_min = salary.split('—')[0] salary_min = re.sub(r'[^0-9]', '', salary_min) salary_max = salary.split('—')[1] salary_max = re.sub(r'[^0-9]', '', salary_max) salary_min = int(salary_min) salary_max = int(salary_max) elif 'от' in salary: salary_min = salary[2:] salary_min = re.sub(r'[^0-9]', '', salary_min) salary_min = int(salary_min) salary_max = None elif 'договорённости' in salary: salary_min = None salary_max = None elif 'до' in salary: salary_min = None salary_max = salary[2:] salary_max = re.sub(r'[^0-9]', '', salary_max) salary_max = int(salary_max) else: salary_min = int(re.sub(r'[^0-9]', '', salary)) salary_max = int(re.sub(r'[^0-9]', '', salary)) site = 'superjob.ru' job_link = response.css( 'link[rel="canonical"]::attr(href)').extract_first() yield JobparserItem(name=job_name, min_salary=salary_min, max_salary=salary_max, link=job_link, site=site)
def vacansy_parse(self, response: HtmlResponse): name = response.xpath( '//h1[@class="_3mfro rFbjy s1nFK _2JVkc"]/text()|//h1[@class="_3mfro rFbjy s1nFK _2JVkc"]/span[@class="_1rS-s"]/text()' ).extract_first() employer = response.xpath( '//h2[@class="_3mfro PlM3e _2JVkc _2VHxz _3LJqf _15msI"]/text()' ).extract_first() min_salary = response.xpath( '//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span/text()' ).extract_first() max_salary = response.xpath( '//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span[@x-path=1]/text()|//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span[3]/text()' ).extract_first() currency = response.xpath( '//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span[4]/text()' ).extract_first() yield JobparserItem(name=name, min_salary=min_salary, max_salary=max_salary, currency=currency, employer=employer, link=response.url)
def vacansy_parse(self, response: HtmlResponse): name = response.xpath( '//h1[@data-qa="vacancy-title"]/text()|//h1[@data-qa="vacancy-title"]/span/text()' ).extract_first() employer = response.xpath( '//a[@itemprop="hiringOrganization"]/span/span/text()|//a[@itemprop="hiringOrganization"]/span/text()' ).extract() min_salary = response.xpath( '//span[@itemprop="value"]/meta[@itemprop="minValue"]/@content' ).extract() max_salary = response.xpath( '//span[@itemprop="value"]/meta[@itemprop="maxValue"]/@content|//span[@itemprop="value"]/meta[@itemprop="value"]/@content' ).extract() currency = response.xpath( '//span[@itemprop="baseSalary"]/meta[@itemprop="currency"]/@content' ).extract() yield JobparserItem(name=name, min_salary=min_salary, max_salary=max_salary, currency=currency, employer=employer, link=response.url)
def vacancy_parse(self, response: HtmlResponse): min_salary, max_salary = locals() name = response.xpath(sj_name).extract_first() if 'По договорённости' in response.xpath(sj_salary).extract(): min_salary = None max_salary = None else: min_salary = response.xpath(sj_salary).extract() if '—' in min_salary: max_salary = min_salary[4].replace('\xa0', '') min_salary = min_salary[0].replace('\xa0', '') elif 'до' in min_salary: max_salary = min_salary[2].replace('\xa0', '').split('руб.')[0] min_salary = None else: max_salary = None min_salary = min_salary[2].replace('\xa0', '').split('руб.')[0] link = response.url yield JobparserItem(name=name, min_salary=min_salary, max_salary=max_salary, link=link)
def vacansy_parse(self, response: HtmlResponse ): # Здесь обрабатываем информацию по вакансии vlink = response.url # Ссылка на страницу вакансии vname = response.xpath( '//h1//text()').extract_first() # Получаем наименование вакансии vsalary = response.css('span._1OuF_.ZON4b span::text').extract( ) # Получаем зарплату в виде списка отдельных блоков clink = 'https://russia.superjob.ru' + response.xpath( "//div/a[contains(@class,'_2JivQ')]/@href").extract_first( ) # Получаем ссылку на страницу компании vcomp = ''.join( response.xpath( '//a/h2[@class="_3mfro PlM3e _2JVkc _2VHxz _3LJqf _15msI"]//text()' ).extract()) # Получаем название компании компании vgeo = ''.join(response.css( 'span._6-z9f span::text').extract()) # Получаем адрес компании exp_time = ' '.join( response.xpath( '//span/span/span[@class="_3mfro _1hP6a _2JVkc"]//text()'). extract()) # Получаем опыт и график vdescr = response.xpath( '//span[@class="_3mfro _2LeqZ _1hP6a _2JVkc _2VHxz _15msI"]//text()' ).extract() # Получаем описание вакансии vdate = response.xpath( '//div[@class="f-test-title _183s9 _3wZVt OuDXD _1iZ5S"]//span//text()' ).extract() # Дата публикации yield JobparserItem( lnkpage=vlink, name=vname, salary=vsalary, link_company=clink, company=vcomp, geo=vgeo, experience=exp_time, descr=vdescr, date_pub=vdate ) # Передаем данные в item для создания структуры json
def vacansy_parse(self, response: HtmlResponse): name = response.xpath('//h1/text()').extract_first() salary = response.xpath("//p[@class='vacancy-salary']/span/text()").extract() url = response.url yield JobparserItem(item_name=name, item_salary=salary, item_url=url)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath("//span[contains(@class, '_3mfro _2Wp8I PlM3e _2JVkc _2VHxz')]//text()").extract() vacancy_link = response.url vacancy_source = self.allowed_domains[0] yield JobparserItem(item_name=name, item_salary=salary, item_link=vacancy_link, item_source=vacancy_source)
def vacancy_parse(response: HtmlResponse): title = response.css('h1.header *::text').extract_first() salary = response.xpath( "//script[@data-name='HH/GoogleDfpService']/@data-params" ).extract_first() yield JobparserItem(title=title, salary=salary, url=response.url)
def vacansy_parse(self, response: HtmlResponse): name = response.css( 'div.vacancy-title h1.header::text').extract_first() salary = response.css( 'div.vacancy-title p.vacancy-salary::text').extract_first() yield JobparserItem(name=name, salary=salary)
def vacancy_parse(self, response: HtmlResponse): # вся информация по вакансии нашлась в теле скрипта general = response.xpath( '//div[@class="_1Tjoc UGN79 undefined _1XYex"]//script//text()' ).extract_first() yield JobparserItem(general=general)
def vacancy_parce(self, response: HtmlResponse): name1 = response.css("div.vacancy-title h1::text").extract_first() salary1 = response.xpath("//span[@class='bloko-header-2 bloko-header-2_lite']/text()").extract() link1 = response.url src = "hh.ru" yield JobparserItem(name=name1, salary=salary1, link=link1, src=src)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath( "//p[@class='vacancy-salary']/span/text()").extract() yield JobparserItem(name=name, salary=salary) print(name, salary)
def parse_vacancies(self, response: HtmlResponse): title = response.xpath('//h1//text()').get() salary = response.xpath('//span[@class="_3mfro _2Wp8I PlM3e _2JVkc"]/text()').getall() link = response.url site = 'superjob.ru' yield JobparserItem(title=title, salary=salary, link=link, site=site)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath( "//p[@class='vacancy-salary']//text()").extract() href = response.url yield JobparserItem(name=name, salary=salary, href=href)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath( "//span[@class='_3mfro _2Wp8I PlM3e _2JVkc']/text()").extract() url = response.url yield JobparserItem(item_name=name, item_salary=salary, item_url=url)
def vacancy_parse(self, response: HtmlResponse): name = response.xpath("//h1/text()").extract_first() salary = response.xpath( "//span[contains(@class, '_1OuF_ ZON4b')]//text()").extract() yield JobparserItem(name=name, salary=salary)