Python try_get_value_from_array Examples, job_position.utils.try_get_value_from_array Python Examples

Example #1

0

Show file

File: liepin_job_position.py Project: tmaccs/job_position_spider

 def parse_company_info(root, item):
     list_items = root.xpath('.')
     # item['company_name'] = try_get_value_from_array(list_items.xpath(
     #     '//div[@class="title-info "]/h3/a/text()').extract())
     item['company_trade'] = try_get_value_from_array(
         list_items.xpath(
             u".//*[contains(text(), '行业：')]/a/text()").extract())
     item['company_size'] = try_get_value_from_array(
         list_items.xpath(
             u'.//*[contains(text(), "公司规模：")]//text()').extract())
     item['company_address'] = try_get_value_from_array(
         list_items.xpath(
             u'.//*[contains(text(), "公司地址：")]/../text()').extract())

Example #2

0

Show file

File: liepin_job_position.py Project: tmaccs/job_position_spider

 def parse_other_info(root, item):
     list_items = root.xpath("./ul")
     item['department'] = try_get_value_from_array(
         list_items.xpath(
             u'.//span[text()="所属部门："]/../label/text()').extract())
     item['major'] = try_get_value_from_array(
         list_items.xpath(
             u'.//span[text()="专业要求："]/../label/text()').extract())
     item['supervisor'] = try_get_value_from_array(
         list_items.xpath(
             u'.//span[text()="汇报对象："]/../label/text()').extract())
     item['subordinate'] = try_get_value_from_array(
         list_items.xpath(
             u'.//span[text()="下属人数："]/../label/text()').extract())

Example #3

0

Show file

 def parse_company_info(root, item):
     list_items = root.xpath('.')
     item['company_name'] = try_get_value_from_array(
         list_items.xpath(u'.//*[text()="公司名称："]/../a/text()').extract())
     item['company_trade'] = try_get_value_from_array(
         list_items.xpath(
             u'//*[text()="所属行业："]/following-sibling::span/text()').extract(
             ))
     item['company_type'] = try_get_value_from_array(
         list_items.xpath(u'.//*[text()="公司性质："]/../text()').extract())
     item['company_size'] = try_get_value_from_array(
         list_items.xpath(u'.//*[text()="公司规模："]/../text()').extract())
     item['company_address'] = try_get_value_from_array(
         list_items.xpath(
             u'.//*[text()="公司地址："]/../span[@title]/text()').extract())

Example #4

0

Show file

File: liepin_job_position.py Project: tmaccs/job_position_spider

 def parse_company_detail_info(root, item):
     list_items = root.xpath(
         "./following-sibling::div/div[@class='info-word']/text()")
     company_detail = try_get_value_from_array(list_items.extract())
     if company_detail is not None:
         company_detail = company_detail.replace(r' ', r'')
     item['company_detail'] = company_detail

Example #5

0

Show file

File: liepin_job_position.py Project: tmaccs/job_position_spider

 def parse_basic_require(root, item):
     list_items = root.xpath("//div[@class='job-title-left']")
     item['work_place'] = try_get_value_from_array(
         list_items.xpath('.//a/text()').extract())
     item['salary'] = try_get_value_from_array(
         list_items.xpath('./p/text()').extract())
     # 判断span数量
     item['education'] = try_get_value_from_array(
         list_items.xpath(u'.//span[1]/text()').extract())
     item['experience'] = try_get_value_from_array(
         list_items.xpath(u'.//span[2]/text()').extract())
     item['language'] = try_get_value_from_array(
         list_items.xpath(u'.//span[3]/text()').extract())
     item['age'] = try_get_value_from_array(
         list_items.xpath(u'.//span[4]/text()').extract())

Example #6

0

Show file

File: liepin_job_position.py Project: tmaccs/job_position_spider

    def parse_params_to_url(self, response):
        root_node = response.xpath(".//div[@class='search-conditions']")
        if self.i < len(self.params):
            for k, v in self.params[self.i].items():
                param_type = root_node.xpath(
                    u".//*[@class='search-title'][contains(text(), '{}')]/following-sibling::dd"
                    .format(k))
                url = try_get_value_from_array(
                    param_type.xpath(
                        u".//a[contains(text(), '{}')]/@href".format(
                            v)).extract())
                self.i += 1
                self.new_url = urljoin(self.base_url, url)
                # self.new_url = response.urljoin(url)
                # print self.new_url, 'self.new_url'
            return Request(self.new_url, callback=self.parse_params_to_url)

        else:
            return Request(self.new_url,
                           callback=self.go_last_page,
                           dont_filter=True)

Example #7

0

Show file

 def parse_other_requirement_info(root, item):
     list_items = root.xpath('./div')
     item['experience'] = try_get_value_from_array(
         list_items.xpath(
             u'./ul/li/p[text()="工作经验："]/../span/text()').extract())
     item['education'] = try_get_value_from_array(
         list_items.xpath(u'./ul/li/p[text()="学历要求："]/../text()').extract())
     item['age'] = try_get_value_from_array(
         list_items.xpath(
             u'./ul/li/span[text()="年龄："]/../text()').extract())
     item['full_time'] = try_get_value_from_array(
         list_items.xpath(
             u'./ul/li/span[text()="是否统招全日制："]/../text()').extract())
     item['major'] = try_get_value_from_array(
         list_items.xpath(
             u'./div[text()="专业要求："]/../div/p/text()').extract())
     item['oversea'] = try_get_value_from_array(
         list_items.xpath(u'./*[text()="海外经历："]/../div/p/text()').extract())
     item['language'] = try_get_value_from_array(
         list_items.xpath(
             u'./div[text()="语言要求："]/../div/p/text()').extract())

Example #8

0

Show file

    def parse_basic_info(root, item):
        list_items = root.xpath(u'./ul/li')

        item['department'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="所属部门："]/../text()').extract())

        item['job_type'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="职位类别："]/../text()').extract())

        item['subordinate'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="下属人数："]/../text()').extract())

        item['requirement'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="招聘人数："]/../text()').extract())

        item['work_place'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="工作地点："]/../a/text()').extract())

        item['publish_time'] = try_get_value_from_array(
            list_items.xpath(
                u'./span[text()="发布时间："]/../*[2]/text()').extract())

        item['supervisor'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="汇报对象："]/../text()').extract())

Example #9

0

Show file

File: liepin_job_position.py Project: tmaccs/job_position_spider

 def go_last_page(self, response):
     last_page_url = try_get_value_from_array(
         response.xpath("//a[@class='last']/@href").extract())
     return Request(urljoin(self.base_url, last_page_url),
                    callback=self.get_total_page,
                    dont_filter=True)

Example #10

0

Show file

File: liepin_job_position.py Project: tmaccs/job_position_spider

 def get_next_page_url(self, response):
     next_page_url = try_get_value_from_array(
         response.xpath(u"//a[contains(text(), '下一页')]/@href").extract())
     return urljoin(self.base_url, next_page_url)

Example #11

0

Show file

File: liepin_job_position.py Project: tmaccs/job_position_spider

 def parse_job_detail_info(root, item):
     list_items = root.xpath('string(.)')
     item['job_detail'] = try_get_value_from_array(list_items.extract())

Example #12

0

Show file

 def parse_company_detail_info(root, item):
     list_items = root.xpath('string(.)')
     item['company_detail'] = try_get_value_from_array(
         list_items.extract()).replace(r' ', r'')

Example #13

0

Show file

 def parse_salary_info(root, item):
     list_items = root.xpath(u'./ul/li')
     item['salary'] = try_get_value_from_array(
         list_items.xpath(
             u'./span[text()="年薪范围："]/../span/a/text()').extract())