コード例 #1
0
ファイル: sjparser.py プロジェクト: KazuzGit/GeekBrains
    def vacansy_parse(self, response: HtmlResponse):
        name = response.xpath('//h1[contains(@class,"_3mfro")]/text()').extract_first()
        link = response.url
        currency = min_salary = max_salary = None
        try:
            tmp = response.xpath('//div[contains(@class,"_1Tjoc")]//span[contains(@class,"_3mfro _2Wp8I ")]/span/text()').extract()

            min_salary = int(''.join(re.split('\s', tmp[0])))
            if (len(tmp)==6) :
                max_salary = int(''.join(re.split('\s', tmp[4])))
                currency = tmp[5]
            else:
                currency = tmp[1]
        except:
            pass

        yield JobparserItem(name=name, link=link, currency=currency, min_salary=min_salary, max_salary=max_salary, source="SuperJob")
コード例 #2
0
ファイル: hhru.py プロジェクト: TataMoskovkina/GeekUniversity
    def vacansy_parse(self, response):
        #name = response.css('div.vacancy-title h1.header::text').extract_first()
        url_vacancy = response.url
        #min_salary = response.css('meta[itemprop="minValue"]::attr(content)').extract_first()
        #max_salary = response.css('meta[itemprop="maxValue"]::attr(content)').extract_first()
        #source = 'hh.ru'
        #yield JobparserItem(name=name, url_vacancy=url_vacancy, min_salary=min_salary,
        #                    max_salary=max_salary, source=source)

        loader = ItemLoader(item=JobparserItem(), response=response)
        loader.add_css('name', 'div.vacancy-title h1.header::text')
        loader.add_value('url_vacancy', response.url)
        loader.add_css('min_salary',
                       'meta[itemprop="minValue"]::attr(content)')
        loader.add_css('max_salary',
                       'meta[itemprop="maxValue"]::attr(content)')
        loader.add_value('source', 'hh.ru')
コード例 #3
0
ファイル: superjob.py プロジェクト: Zalutskii/GBEdu
 def vacancy_parse(self, response: HtmlResponse):
     # host
     host = response.css('meta[property="og:site_name"]::attr(content)'
                         ).extract_first().lower()
     # url
     url = response.css(
         'meta[property="og:url"]::attr(content)').extract_first()
     # description
     description = response.css(
         'meta[property="og:description"]::attr(content)').extract_first()
     title, salary = description.split(', ')
     # name
     name = title[len('Вакансия '):title.rfind(' в компании ')]
     # company
     company = title[title.rfind(' в компании ') +
                     len(' в компании '):].strip()
     # salary
     salary = salary[len('зарплата '):-1]
     if salary == 'по договорённости':
         salary_min, salary_max, salary_currency = None, None, None
     else:
         salary_currency_index = next(
             i for i, j in list(enumerate(salary, 1))[::-1] if j.isdigit())
         salary_currency = salary[salary_currency_index + 1:]
         salary = salary[:salary_currency_index].replace(' ', '')
         if '-' in salary:
             salary_min, salary_max = salary.split('-')
         else:
             salary_min = salary_max = salary
             text_range = response.css(
                 'span._2Wp8I.ZON4b *::text').extract_first().lower()
             if text_range.startswith('до'):
                 salary_min = None
             else:  # от
                 salary_max = None
     # return result
     yield JobparserItem(
         host=host,
         url=url,
         name=name,
         company=company,
         salary_min=salary_min,
         salary_max=salary_max,
         salary_currency=salary_currency,
     )
コード例 #4
0
ファイル: sjru.py プロジェクト: SpiritIV/job_scrapper
 def vacansy_parse(self, response: HtmlResponse):
     link_vac = response.url
     name = response.xpath(
         "//h1[@class='_3mfro rFbjy s1nFK _2JVkc']/text()")
     print(name)
     if response.xpath(
             "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()"):
         salary = response.xpath(
             "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()")
     else:
         salary = ''
     link = 'superjob.ru'
     # print(name, salary)
     yield JobparserItem(name=name,
                         salary_from=''.join(salary),
                         salary_to='',
                         link_vac=link_vac,
                         link_site=link)
    def vacancy_parse(self, response: HtmlResponse):
        title = response.xpath(
            "//div[@class='vacancy-title ']/h1/text()").extract_first()
        if title == None:
            title = response.xpath(
                "//div[@class='vacancy-title']/h1/span/text()").extract()[0]
        vac_href = response.url
        salary_max = response.css(
            'div.vacancy-title meta[itemprop="maxValue"]::attr(content)'
        ).extract()
        salary_min = response.css(
            'div.vacancy-title meta[itemprop="minValue"]::attr(content)'
        ).extract()
        salary_comment = response.css(
            'div.vacancy-title p.vacancy-salary::text').extract_first()
        competition = response.xpath(
            "//div[@class='bloko-tag bloko-tag_inline']//span/text()").extract(
            )
        if competition == []:
            competition = response.xpath(
                "//span[@class='Bloko-TagList-Text']/text()").extract()
        company_title = response.xpath(
            "//meta[@itemprop = 'name']/@content").extract_first()
        company_href = response.xpath(
            "//a[@class='vacancy-company-name']/@href").extract_first()
        company_logo = response.xpath(
            "//a[@class='vacancy-company-logo']/img/@src").extract_first()
        if company_logo == None:
            company_logo = response.xpath(
                "//a[@class='vacancy-company-logo ']/img/@src").extract_first(
                )

        print('VParse', title, response, salary_comment, salary_max,
              salary_min)
        yield JobparserItem(title=title,
                            vac_href=vac_href,
                            salary_max=salary_max,
                            salary_min=salary_min,
                            salary_comment=salary_comment,
                            vac_from='HH',
                            compet=competition,
                            comp_title=company_title,
                            comp_href=company_href,
                            comp_logo=company_logo)
コード例 #6
0
    def vacansy_parse(self, response: HtmlResponse):
        vacancy_data = {}
        vacancy_data['name'] = response.css('h1::text').extract_first()
        vacancy_data['salary'] = response.xpath(
            "//span[@class='bloko-header-2 bloko-header-2_lite']/text()"
        ).extract()
        vacancy_data['experience'] = response.xpath(
            "//span[@data-qa = 'vacancy-experience']/text()").extract_first()
        vacancy_data['skils'] = response.xpath(
            "//div[@class ='bloko-tag bloko-tag_inline']//text()").extract()
        vacancy_data['company_name'] = response.xpath(
            "//a[@data-qa='vacancy-company-name']/*/text()").extract()
        vacancy_data['company_href'] = response.xpath(
            "//a[@data-qa='vacancy-company-name']/@href").extract()
        vacancy_data['company_location'] = response.xpath(
            "//p[@data-qa='vacancy-view-location']//text()").extract()
        # company_description =
        vacancy_data['publication_date'] = response.xpath(
            "//p[@class='vacancy-creation-time']/text()").extract()
        vacancy_data['connection_info'] = response.xpath(
            "//div[@class='vacancy-contacts__body']//text()").extract()
        # vacancy - title
        yield JobparserItem(vacancy_data)

    # def vacansy_parse(self, response:HtmlResponse):
    #     vacancy_data = {}
    #     name_vac = response.css('h1::text').extract_first()
    #     salary_vac = response.xpath("//span[@class='bloko-header-2 bloko-header-2_lite']/text()").extract()
    #     experience = response.xpath("//span[@data-qa = 'vacancy-experience']/text()").extract_first()
    #     skils = response.xpath("//div[@class ='bloko-tag bloko-tag_inline']//text()").extract()
    #     company_name = response.xpath("//a[@data-qa='vacancy-company-name']/*/text()").extract_first()
    #     company_href = response.xpath("//a[@data-qa='vacancy-company-name']/@href").extract()
    #     company_location = response.xpath("//p[@data-qa='vacancy-view-location']//text()").extract()
    #     # company_description =
    #     publication_date = response.xpath("//p[@class='vacancy-creation-time']/text()").extract()
    #     connection_info = response.xpath("//div[@class='vacancy-contacts__body']//text()").extract()
    #
    #  # vacancy - title
    #     yield JobparserItem(name=name_vac, salary=salary_vac)
    #


# span.bloko-tag__section.bloko-tag__section_text" data-qa="bloko-tag__text" style="" xpath="1">ASP.NET</span>
コード例 #7
0
ファイル: hhru.py プロジェクト: chegevarae/educ
 def vacansy_parse(self, response: HtmlResponse
                   ):  # Здесь обрабатываем информацию по вакансии
     vlink = response.url  # Ссылка на страницу вакансии
     vname = response.xpath(
         '//h1//text()').extract_first()  # Получаем наименование вакансии
     vsalary = response.css('p.vacancy-salary span::text').extract(
     )  # Получаем зарплату в виде списка отдельных блоков
     clink = 'https://spb.hh.ru' + response.css(
         'div.vacancy-company-name-wrapper a::attr(href)').extract_first(
         )  # Получаем ссылку на страницу компании
     vcomp = ''.join(
         response.css(
             'div.vacancy-company-name-wrapper span.bloko-section-header-2::text'
         ).extract())  # Получаем название компании компании
     if len(clink) == 0:
         clink = response.css(
             'p.vacancy-company-name-wrapper a::attr(href)').extract_first(
             )  # Получаем ссылку на страницу компании
         vcomp = ''.join(
             response.css(
                 'p.vacancy-company-name-wrapper span.bloko-section-header-2::text'
             ).extract())  # Получаем название компании компании
     vgeo = ''.join(
         response.xpath('//p[@data-qa="vacancy-view-location"]//text()').
         extract())  # Получаем адрес компании
     exp_time = response.xpath(
         '//div[@class="vacancy-description"]/div[1]//text()').extract(
         )  # Получаем опыт и график
     vdescr = response.xpath('//div[@class="vacancy-description"]//text()'
                             ).extract()  # Получаем описание вакансии
     vdate = response.xpath('//p[@class="vacancy-creation-time"]//text()'
                            ).extract()  # Дата публикации
     yield JobparserItem(
         lnkpage=vlink,
         name=vname,
         salary=vsalary,
         link_company=clink,
         company=vcomp,
         geo=vgeo,
         experience=exp_time,
         descr=vdescr,
         date_pub=vdate
     )  # Передаем данные в item для создания структуры json
コード例 #8
0
 def vacancy_parse(self, response: HtmlResponse
                   ):  # Здесь обрабатываем информацию по вакансии
     name_job = response.xpath(
         '//h1/text()').extract_first()  # Получаем наименование вакансии
     salary_job = response.css('p.vacancy-salary span::text').extract(
     )  # Получаем зарплату в виде списка отдельных блоков
     location_job = response.xpath(
         '//p[@data-qa="vacancy-view-location"]//text()').extract()
     position_link = response.url
     company_job = response.xpath(
         '//span[@class="bloko-section-header-2 bloko-section-header-2_lite"]/text()'
     ).extract()
     yield JobparserItem(
         name=name_job,
         salary=salary_job,
         location=location_job,
         link=position_link,
         company=company_job
     )  # Передаем данные в item для создания структуры json
コード例 #9
0
 def vacancy_parse(self, response: HtmlResponse):
     name = response.css(
         'div.vacancy-title h1.header::text').extract_first()
     int_sal = get_salary(
         response.css(
             'div.vacancy-title p.vacancy-salary::text').extract_first())
     href = response.url
     site = self.name
     if len(int_sal) == 2:
         salary_min = int_sal[0]
         salary_max = int_sal[1]
     else:
         salary_min = 0
         salary_max = 0
     yield JobparserItem(name=name,
                         salary_min=salary_min,
                         salary_max=salary_max,
                         href=href,
                         site=site)
コード例 #10
0
 def vacancy_parse(self, response: HtmlResponse):
     name = response.xpath(
         "//h1[@class='_3mfro rFbjy s1nFK _2JVkc']/text()").extract()
     salary = response.xpath(
         "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()"
     ).extract()
     company = response.xpath(
         "//h2[@class='_3mfro PlM3e _2JVkc _2VHxz _3LJqf _15msI']/text()"
     ).extract()
     city = response.xpath(
         "//span[@class='_3mfro _1hP6a _2JVkc']/text()").extract_first()
     link = response.url
     source = self.allowed_domains[0]
     yield JobparserItem(name=name,
                         salary=salary,
                         company=company,
                         link=link,
                         source=source,
                         city=city)
コード例 #11
0
 def vacansy_parse(self, response):  # Собираем информацию со страницы
     job_name = response.css(
         'div.vacancy-title h1.header::text').extract_first().strip()
     salary_min = response.css(
         'meta[itemprop="minValue"]::attr(content)').extract_first()
     if not salary_min:
         salary_min = None
     salary_max = response.css(
         'meta[itemprop="maxValue"]::attr(content)').extract_first()
     if not salary_max:
         salary_max = None
     site = 'hh.ru'
     job_link = response.css(
         'div[itemscope="itemscope"] meta[itemprop="url"]::attr(content)'
     ).extract_first()
     yield JobparserItem(name=job_name,
                         min_salary=salary_min,
                         max_salary=salary_max,
                         link=job_link,
                         site=site)
コード例 #12
0
 def vacancy_parse(self, response):
     name_vac = response.xpath(
         "//h1[@class='_3mfro rFbjy s1nFK _2JVkc']/text()").extract_first()
     comp = response.xpath(
         "//span[@class='_3mfro _2Wp8I ZON4b PlM3e _2JVkc']/text()"
     ).extract()
     empl = response.xpath(
         "//div[@class='_1Tjoc _3ifBO Ghoh2 _3lvIR']//div[@class='_1cFsi _3VUIu']/div[@class='_2g1F-'][1]//h2/text()"
     ).extract_first()
     url_link = response.url
     dom = self.allowed_domains[0]
     addr = response.xpath(
         "//div[@class='f-test-address _3AQrx']//span[@class='_3mfro _1hP6a _2JVkc']/text()"
     ).extract_first()
     #print(name_vac , comp)
     yield JobparserItem(name=name_vac,
                         salary=comp,
                         employer=empl,
                         url=url_link,
                         address=addr,
                         site=dom)
コード例 #13
0
    def vacancy_parse(self, response: HtmlResponse):
        name = response.xpath(hh_name).extract_first()
        min_salary = response.xpath(hh_salary).extract()
        max_salary = None
        try:
            if 'до ' in min_salary[2]:
                max_salary = min_salary[3].replace('\xa0', '')
            elif 'до ' in min_salary[0]:
                max_salary = min_salary[1].replace('\xa0', '')
        except:
            max_salary = None
        min_salary = [
            min_salary[1].replace('\xa0', '') if 'от ' in min_salary else None
        ][0]
        link = response.url

        print()
        yield JobparserItem(name=name,
                            min_salary=min_salary,
                            max_salary=max_salary,
                            link=link)
コード例 #14
0
ファイル: hhru.py プロジェクト: georgylarin/Scrapy-Lesson-5
    def vacancy_parse(self, response: HtmlResponse):
        name = response.css('div.vacancy-title h1.header::text').extract()
        salary = [
            response.css(
                'span[itemprop="baseSalary"] meta[itemprop="minValue"] ::attr(content)'
            ).extract_first(),
            response.css(
                'span[itemprop="baseSalary"] meta[itemprop="maxValue"] ::attr(content)'
            ).extract_first(),
            response.css(
                'span[itemprop="baseSalary"] meta[itemprop="currency"] ::attr(content)'
            ).extract_first()
        ]

        vacancy_link = response.url
        site_scraping = self.allowed_domains[0]

        yield JobparserItem(name=name,
                            salary=salary,
                            vacancy_link=vacancy_link,
                            site_scraping=site_scraping)
コード例 #15
0
 def vacansy_parse(self, response):
     job_name = response.css('div._3MVeX h1::text').extract_first().strip()
     salary = response.css(
         'span[class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"] *::text').extract(
         )
     salary = str(''.join(salary))
     salary = salary.replace(u'\xa0', u'')
     if '—' in salary:
         salary_min = salary.split('—')[0]
         salary_min = re.sub(r'[^0-9]', '', salary_min)
         salary_max = salary.split('—')[1]
         salary_max = re.sub(r'[^0-9]', '', salary_max)
         salary_min = int(salary_min)
         salary_max = int(salary_max)
     elif 'от' in salary:
         salary_min = salary[2:]
         salary_min = re.sub(r'[^0-9]', '', salary_min)
         salary_min = int(salary_min)
         salary_max = None
     elif 'договорённости' in salary:
         salary_min = None
         salary_max = None
     elif 'до' in salary:
         salary_min = None
         salary_max = salary[2:]
         salary_max = re.sub(r'[^0-9]', '', salary_max)
         salary_max = int(salary_max)
     else:
         salary_min = int(re.sub(r'[^0-9]', '', salary))
         salary_max = int(re.sub(r'[^0-9]', '', salary))
     site = 'superjob.ru'
     job_link = response.css(
         'link[rel="canonical"]::attr(href)').extract_first()
     yield JobparserItem(name=job_name,
                         min_salary=salary_min,
                         max_salary=salary_max,
                         link=job_link,
                         site=site)
コード例 #16
0
ファイル: sjru.py プロジェクト: kirill50/Data_search
    def vacansy_parse(self, response: HtmlResponse):
        name = response.xpath(
            '//h1[@class="_3mfro rFbjy s1nFK _2JVkc"]/text()|//h1[@class="_3mfro rFbjy s1nFK _2JVkc"]/span[@class="_1rS-s"]/text()'
        ).extract_first()
        employer = response.xpath(
            '//h2[@class="_3mfro PlM3e _2JVkc _2VHxz _3LJqf _15msI"]/text()'
        ).extract_first()
        min_salary = response.xpath(
            '//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span/text()'
        ).extract_first()
        max_salary = response.xpath(
            '//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span[@x-path=1]/text()|//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span[3]/text()'
        ).extract_first()
        currency = response.xpath(
            '//span[@class="_3mfro _2Wp8I ZON4b PlM3e _2JVkc"]/span[4]/text()'
        ).extract_first()

        yield JobparserItem(name=name,
                            min_salary=min_salary,
                            max_salary=max_salary,
                            currency=currency,
                            employer=employer,
                            link=response.url)
コード例 #17
0
ファイル: hhru.py プロジェクト: kirill50/Data_search
    def vacansy_parse(self, response: HtmlResponse):
        name = response.xpath(
            '//h1[@data-qa="vacancy-title"]/text()|//h1[@data-qa="vacancy-title"]/span/text()'
        ).extract_first()
        employer = response.xpath(
            '//a[@itemprop="hiringOrganization"]/span/span/text()|//a[@itemprop="hiringOrganization"]/span/text()'
        ).extract()
        min_salary = response.xpath(
            '//span[@itemprop="value"]/meta[@itemprop="minValue"]/@content'
        ).extract()
        max_salary = response.xpath(
            '//span[@itemprop="value"]/meta[@itemprop="maxValue"]/@content|//span[@itemprop="value"]/meta[@itemprop="value"]/@content'
        ).extract()
        currency = response.xpath(
            '//span[@itemprop="baseSalary"]/meta[@itemprop="currency"]/@content'
        ).extract()

        yield JobparserItem(name=name,
                            min_salary=min_salary,
                            max_salary=max_salary,
                            currency=currency,
                            employer=employer,
                            link=response.url)
コード例 #18
0
    def vacancy_parse(self, response: HtmlResponse):
        min_salary, max_salary = locals()
        name = response.xpath(sj_name).extract_first()
        if 'По договорённости' in response.xpath(sj_salary).extract():
            min_salary = None
            max_salary = None
        else:
            min_salary = response.xpath(sj_salary).extract()
            if '—' in min_salary:
                max_salary = min_salary[4].replace('\xa0', '')
                min_salary = min_salary[0].replace('\xa0', '')
            elif 'до' in min_salary:
                max_salary = min_salary[2].replace('\xa0', '').split('руб.')[0]
                min_salary = None
            else:
                max_salary = None
                min_salary = min_salary[2].replace('\xa0', '').split('руб.')[0]
        link = response.url

        yield JobparserItem(name=name,
                            min_salary=min_salary,
                            max_salary=max_salary,
                            link=link)
コード例 #19
0
ファイル: sjru.py プロジェクト: chegevarae/educ
 def vacansy_parse(self, response: HtmlResponse
                   ):  # Здесь обрабатываем информацию по вакансии
     vlink = response.url  # Ссылка на страницу вакансии
     vname = response.xpath(
         '//h1//text()').extract_first()  # Получаем наименование вакансии
     vsalary = response.css('span._1OuF_.ZON4b span::text').extract(
     )  # Получаем зарплату в виде списка отдельных блоков
     clink = 'https://russia.superjob.ru' + response.xpath(
         "//div/a[contains(@class,'_2JivQ')]/@href").extract_first(
         )  # Получаем ссылку на страницу компании
     vcomp = ''.join(
         response.xpath(
             '//a/h2[@class="_3mfro PlM3e _2JVkc _2VHxz _3LJqf _15msI"]//text()'
         ).extract())  # Получаем название компании компании
     vgeo = ''.join(response.css(
         'span._6-z9f span::text').extract())  # Получаем адрес компании
     exp_time = ' '.join(
         response.xpath(
             '//span/span/span[@class="_3mfro _1hP6a _2JVkc"]//text()').
         extract())  # Получаем опыт и график
     vdescr = response.xpath(
         '//span[@class="_3mfro _2LeqZ _1hP6a _2JVkc _2VHxz _15msI"]//text()'
     ).extract()  # Получаем описание вакансии
     vdate = response.xpath(
         '//div[@class="f-test-title _183s9 _3wZVt OuDXD _1iZ5S"]//span//text()'
     ).extract()  # Дата публикации
     yield JobparserItem(
         lnkpage=vlink,
         name=vname,
         salary=vsalary,
         link_company=clink,
         company=vcomp,
         geo=vgeo,
         experience=exp_time,
         descr=vdescr,
         date_pub=vdate
     )  # Передаем данные в item для создания структуры json
コード例 #20
0
ファイル: hhru.py プロジェクト: papalos/scrapingGB
 def vacansy_parse(self, response: HtmlResponse):
     name = response.xpath('//h1/text()').extract_first()
     salary = response.xpath("//p[@class='vacancy-salary']/span/text()").extract()
     url = response.url
     yield JobparserItem(item_name=name, item_salary=salary, item_url=url)
コード例 #21
0
 def vacancy_parse(self, response: HtmlResponse):
     name = response.xpath("//h1/text()").extract_first()
     salary = response.xpath("//span[contains(@class, '_3mfro _2Wp8I PlM3e _2JVkc _2VHxz')]//text()").extract()
     vacancy_link = response.url
     vacancy_source = self.allowed_domains[0]
     yield JobparserItem(item_name=name, item_salary=salary, item_link=vacancy_link, item_source=vacancy_source)
コード例 #22
0
ファイル: hh.py プロジェクト: Selen34/scraping
def vacancy_parse(response: HtmlResponse):
    title = response.css('h1.header *::text').extract_first()
    salary = response.xpath(
        "//script[@data-name='HH/GoogleDfpService']/@data-params"
    ).extract_first()
    yield JobparserItem(title=title, salary=salary, url=response.url)
コード例 #23
0
 def vacansy_parse(self, response: HtmlResponse):
     name = response.css(
         'div.vacancy-title h1.header::text').extract_first()
     salary = response.css(
         'div.vacancy-title p.vacancy-salary::text').extract_first()
     yield JobparserItem(name=name, salary=salary)
コード例 #24
0
 def vacancy_parse(self, response: HtmlResponse):
     # вся информация по вакансии нашлась в теле скрипта
     general = response.xpath(
         '//div[@class="_1Tjoc UGN79 undefined _1XYex"]//script//text()'
     ).extract_first()
     yield JobparserItem(general=general)
コード例 #25
0
 def vacancy_parce(self, response: HtmlResponse):
     name1 = response.css("div.vacancy-title h1::text").extract_first()
     salary1 = response.xpath("//span[@class='bloko-header-2 bloko-header-2_lite']/text()").extract()
     link1 = response.url
     src = "hh.ru"
     yield JobparserItem(name=name1, salary=salary1, link=link1, src=src)
コード例 #26
0
 def vacancy_parse(self, response: HtmlResponse):
     name = response.xpath("//h1/text()").extract_first()
     salary = response.xpath(
         "//p[@class='vacancy-salary']/span/text()").extract()
     yield JobparserItem(name=name, salary=salary)
     print(name, salary)
コード例 #27
0
ファイル: sj.py プロジェクト: eglazachev/DataGathering
 def parse_vacancies(self, response: HtmlResponse):
     title = response.xpath('//h1//text()').get()
     salary = response.xpath('//span[@class="_3mfro _2Wp8I PlM3e _2JVkc"]/text()').getall()
     link = response.url
     site = 'superjob.ru'
     yield JobparserItem(title=title, salary=salary, link=link, site=site)
コード例 #28
0
 def vacancy_parse(self, response: HtmlResponse):
     name = response.xpath("//h1/text()").extract_first()
     salary = response.xpath(
         "//p[@class='vacancy-salary']//text()").extract()
     href = response.url
     yield JobparserItem(name=name, salary=salary, href=href)
コード例 #29
0
 def vacancy_parse(self, response: HtmlResponse):
     name = response.xpath("//h1/text()").extract_first()
     salary = response.xpath(
         "//span[@class='_3mfro _2Wp8I PlM3e _2JVkc']/text()").extract()
     url = response.url
     yield JobparserItem(item_name=name, item_salary=salary, item_url=url)
コード例 #30
0
 def vacancy_parse(self, response: HtmlResponse):
     name = response.xpath("//h1/text()").extract_first()
     salary = response.xpath(
         "//span[contains(@class, '_1OuF_ ZON4b')]//text()").extract()
     yield JobparserItem(name=name, salary=salary)