def parse_company_info(root, item):
     list_items = root.xpath('.')
     # item['company_name'] = try_get_value_from_array(list_items.xpath(
     #     '//div[@class="title-info "]/h3/a/text()').extract())
     item['company_trade'] = try_get_value_from_array(
         list_items.xpath(
             u".//*[contains(text(), '行业:')]/a/text()").extract())
     item['company_size'] = try_get_value_from_array(
         list_items.xpath(
             u'.//*[contains(text(), "公司规模:")]//text()').extract())
     item['company_address'] = try_get_value_from_array(
         list_items.xpath(
             u'.//*[contains(text(), "公司地址:")]/../text()').extract())
 def parse_other_info(root, item):
     list_items = root.xpath("./ul")
     item['department'] = try_get_value_from_array(
         list_items.xpath(
             u'.//span[text()="所属部门:"]/../label/text()').extract())
     item['major'] = try_get_value_from_array(
         list_items.xpath(
             u'.//span[text()="专业要求:"]/../label/text()').extract())
     item['supervisor'] = try_get_value_from_array(
         list_items.xpath(
             u'.//span[text()="汇报对象:"]/../label/text()').extract())
     item['subordinate'] = try_get_value_from_array(
         list_items.xpath(
             u'.//span[text()="下属人数:"]/../label/text()').extract())
Example #3
0
 def parse_company_info(root, item):
     list_items = root.xpath('.')
     item['company_name'] = try_get_value_from_array(
         list_items.xpath(u'.//*[text()="公司名称:"]/../a/text()').extract())
     item['company_trade'] = try_get_value_from_array(
         list_items.xpath(
             u'//*[text()="所属行业:"]/following-sibling::span/text()').extract(
             ))
     item['company_type'] = try_get_value_from_array(
         list_items.xpath(u'.//*[text()="公司性质:"]/../text()').extract())
     item['company_size'] = try_get_value_from_array(
         list_items.xpath(u'.//*[text()="公司规模:"]/../text()').extract())
     item['company_address'] = try_get_value_from_array(
         list_items.xpath(
             u'.//*[text()="公司地址:"]/../span[@title]/text()').extract())
 def parse_company_detail_info(root, item):
     list_items = root.xpath(
         "./following-sibling::div/div[@class='info-word']/text()")
     company_detail = try_get_value_from_array(list_items.extract())
     if company_detail is not None:
         company_detail = company_detail.replace(r' ', r'')
     item['company_detail'] = company_detail
 def parse_basic_require(root, item):
     list_items = root.xpath("//div[@class='job-title-left']")
     item['work_place'] = try_get_value_from_array(
         list_items.xpath('.//a/text()').extract())
     item['salary'] = try_get_value_from_array(
         list_items.xpath('./p/text()').extract())
     # 判断span数量
     item['education'] = try_get_value_from_array(
         list_items.xpath(u'.//span[1]/text()').extract())
     item['experience'] = try_get_value_from_array(
         list_items.xpath(u'.//span[2]/text()').extract())
     item['language'] = try_get_value_from_array(
         list_items.xpath(u'.//span[3]/text()').extract())
     item['age'] = try_get_value_from_array(
         list_items.xpath(u'.//span[4]/text()').extract())
    def parse_params_to_url(self, response):
        root_node = response.xpath(".//div[@class='search-conditions']")
        if self.i < len(self.params):
            for k, v in self.params[self.i].items():
                param_type = root_node.xpath(
                    u".//*[@class='search-title'][contains(text(), '{}')]/following-sibling::dd"
                    .format(k))
                url = try_get_value_from_array(
                    param_type.xpath(
                        u".//a[contains(text(), '{}')]/@href".format(
                            v)).extract())
                self.i += 1
                self.new_url = urljoin(self.base_url, url)
                # self.new_url = response.urljoin(url)
                # print self.new_url, 'self.new_url'
            return Request(self.new_url, callback=self.parse_params_to_url)

        else:
            return Request(self.new_url,
                           callback=self.go_last_page,
                           dont_filter=True)
Example #7
0
 def parse_other_requirement_info(root, item):
     list_items = root.xpath('./div')
     item['experience'] = try_get_value_from_array(
         list_items.xpath(
             u'./ul/li/p[text()="工作经验:"]/../span/text()').extract())
     item['education'] = try_get_value_from_array(
         list_items.xpath(u'./ul/li/p[text()="学历要求:"]/../text()').extract())
     item['age'] = try_get_value_from_array(
         list_items.xpath(
             u'./ul/li/span[text()="年龄:"]/../text()').extract())
     item['full_time'] = try_get_value_from_array(
         list_items.xpath(
             u'./ul/li/span[text()="是否统招全日制:"]/../text()').extract())
     item['major'] = try_get_value_from_array(
         list_items.xpath(
             u'./div[text()="专业要求:"]/../div/p/text()').extract())
     item['oversea'] = try_get_value_from_array(
         list_items.xpath(u'./*[text()="海外经历:"]/../div/p/text()').extract())
     item['language'] = try_get_value_from_array(
         list_items.xpath(
             u'./div[text()="语言要求:"]/../div/p/text()').extract())
Example #8
0
    def parse_basic_info(root, item):
        list_items = root.xpath(u'./ul/li')

        item['department'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="所属部门:"]/../text()').extract())

        item['job_type'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="职位类别:"]/../text()').extract())

        item['subordinate'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="下属人数:"]/../text()').extract())

        item['requirement'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="招聘人数:"]/../text()').extract())

        item['work_place'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="工作地点:"]/../a/text()').extract())

        item['publish_time'] = try_get_value_from_array(
            list_items.xpath(
                u'./span[text()="发布时间:"]/../*[2]/text()').extract())

        item['supervisor'] = try_get_value_from_array(
            list_items.xpath(u'./span[text()="汇报对象:"]/../text()').extract())
 def go_last_page(self, response):
     last_page_url = try_get_value_from_array(
         response.xpath("//a[@class='last']/@href").extract())
     return Request(urljoin(self.base_url, last_page_url),
                    callback=self.get_total_page,
                    dont_filter=True)
 def get_next_page_url(self, response):
     next_page_url = try_get_value_from_array(
         response.xpath(u"//a[contains(text(), '下一页')]/@href").extract())
     return urljoin(self.base_url, next_page_url)
 def parse_job_detail_info(root, item):
     list_items = root.xpath('string(.)')
     item['job_detail'] = try_get_value_from_array(list_items.extract())
Example #12
0
 def parse_company_detail_info(root, item):
     list_items = root.xpath('string(.)')
     item['company_detail'] = try_get_value_from_array(
         list_items.extract()).replace(r' ', r'')
Example #13
0
 def parse_salary_info(root, item):
     list_items = root.xpath(u'./ul/li')
     item['salary'] = try_get_value_from_array(
         list_items.xpath(
             u'./span[text()="年薪范围:"]/../span/a/text()').extract())