예제 #1
0
    def parseTencent(self, response):
        # print response.url
        #evenlist = response.xpath("//tr[@class='even'] | //tr[@class='odd']")
        #oddlist = response.xpath("//tr[@class='even'] | //tr[@class='odd']")
        #fulllist = evenlist + oddlist
        #for each in fulllist:
        # for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"):
        item = TencentItem()
        # 职位名称
        item['name'] = response.xpath(
            '//tr/td[@id="sharetitle"]/text()').extract()[0]
        # 工作地点
        item['work_place'] = response.xpath(
            '//tr[@class="c bottomline"]/td[1]/text()').extract()[0]
        # 职位类别
        # item['position_type'] = each.xpath('./td[2]/text()').extract()[0]
        # item['position_type'] = self.get_position_type(response)
        # 招聘人数
        item['need_num'] = response.xpath(
            '//tr[@class="c bottomline"]/td[3]/text()').extract()[0]
        # 工作职责
        item['position_duty'] = response.xpath(
            '//td[@class="l2"]//li/text()').extract()[0]

        yield item
예제 #2
0
    def parseTencent(self, response):
        for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"):
            item = TencentItem()
            item['positionName'] = each.xpath("./td[1]/a/text()").extract()[0] #if len(each.xpath("./td[1]/a/text()").extract()) > 0 else ''
            item['positionLink'] = "https://hr.tencent.com/" + each.xpath("./td[1]/a/@href").extract()[0] #if len(each.xpath("./td[1]/a/@href").extract()) > 0 else ''
            item['positionType'] = each.xpath("./td[2]/text()").extract()[0] if len(each.xpath("./td[2]/text()").extract()) > 0 else ''
            item['peopleNum']    = each.xpath("./td[3]/text()").extract()[0] #if len(each.xpath("./td[3]/text()").extract()) > 0 else ''
            item['workLocation'] = each.xpath("./td[4]/text()").extract()[0] #if len(each.xpath("./td[4]/text()").extract()) > 0 else ''
            item['publishTime']  = each.xpath("./td[5]/text()").extract()[0] #if len(each.xpath("./td[5]/text()").extract()) > 0 else ''

            yield item
예제 #3
0
 def parse_item(self, response):
     print '----------------------------------'
     print response.text()
     item = TencentItem()
     even_list = response.xpath("//a[@class='recruit-list-link']")
     for each in even_list:
         # 职位名称
         postionName = each.xpath(
             "./div[@class='recruit-title']/text()").extract()[0]
         #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
         #item['name'] = response.xpath('//div[@id="name"]').get()
         #item['description'] = response.xpath('//div[@id="description"]').get()
         yield item
예제 #4
0
 def parse_item(self, response):
     for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"):
         item = TencentItem()
         # 职位姓名
         item['positionname'] = each.xpath("./td[1]/a/text()").extract()[0]
         # 职位链接
         item['positionlink'] = "https://hr.tencent.com/" +each.xpath("./td[1]/a/@href").extract()[0]
         # 职位类别
         item['positionType'] = each.xpath("./td[2]/text()").extract()[0]
         # 招聘人数
         item['positionNum'] = each.xpath("./td[3]/text()").extract()[0]
         # 工作地点
         item['workLocation'] = each.xpath("./td[4]/text()").extract()[0]
         # 发布时间
         item['publishTime'] = each.xpath("./td[5]/text()").extract()[0]
         yield item
예제 #5
0
    def parseTencent(self, response):
        for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"):
            item = TencentItem()
            # 职位名称
            item['positionname'] = each.xpath("./td[1]/a/text()").extract()[0]
            # 详情连接
            item['positionlink'] = each.xpath("./td[1]/a/@href").extract()[0]
            # 职位类别
            item['positionType'] = each.xpath("./td[2]/text()").extract()[0]
            # 招聘人数
            item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0]
            # 工作地点
            item['workLocation'] = each.xpath("./td[4]/text()").extract()[0]
            # 发布时间
            item['publishTime'] = each.xpath("./td[5]/text()").extract()[0]

            yield item
예제 #6
0
    def parseTencent(self, response):
        for each in response.xpath('//tr[@class="even"] | //tr[@class="odd"]'):
            # 初始化模型对象
            item = TencentItem()
            # 职位名称
            item["sitionname"] = each.xpath("./td[1]/a/text()").extract()[0]
            # 详情链接
            item["positionlink"] = each.xpath("./td[1]/a/@href").extract()[0]
            # 类别
            item["positiontype"] = each.xpath("./td[2]/text()").extract()[0]
            # 招聘人数
            item["perpleNum"] = each.xpath("./td[3]/text()").extract()[0]
            # 工作地点
            item["workLocation"] = each.xpath("./td[4]/text()").extract()[0]
            # 发布时间
            item["publishTime"] = each.xpath("./td[5]/text()").extract()[0]

            # 将数据给管道文件处理
            yield item
예제 #7
0
    def parseTencent(self, response):
        #evenlist = response.xpath("//tr[@class='even'] | //tr[@class='odd']")
        #oddlist = response.xpath("//tr[@class='even'] | //tr[@class='odd']")
        #fulllist = evenlist + oddlist
        for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"):
            item = TencentItem()
            # 返回的是一个选择器的列表
            # 职位名
            item['positionName'] = each.xpath("./td[1]/a/text()").extract()[0]
            # 详细链接
            item['positionLink'] = each.xpath("./td[1]/a/@href").extract()[0]
            # 职位类型
            item['positionType'] = each.xpath("./td[2]/text()").extract()[0]
            # 招聘人数
            item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0]
            # 工作地点
            item['workLocation'] = each.xpath("./td[4]/text()").extract()[0]
            # 发布时间
            item['publishTime'] = each.xpath("./td[5]/text()").extract()[0]

            yield item
예제 #8
0
    def parseTencent(self, response):

        for each in response.xpath("//tr[@class='even']|//tr[@class='odd']"):
            item = TencentItem()
            # 职位名称
            item['positionName'] = each.xpath('./td[1]/a/text()').extract()[0]
            # 详情链接
            item['positionLink'] = each.xpath('./td[1]/a/@href').extract()[0]
            # 职位类别
            item['positionType'] = each.xpath('./td[2]/text()').extract()[0]
            # 招聘人数
            item['positionNum'] = each.xpath('./td[3]/text()').extract()[0]
            # 工作地点
            item['workLocation'] = each.xpath('./td[4]/text()').extract()[0]
            # 发布时间
            item['publishTime'] = each.xpath('./td[5]/text()').extract()[0]

            #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
            #i['name'] = response.xpath('//div[@id="name"]').extract()
            #i['description'] = response.xpath('//div[@id="description"]').extract()
            yield item
예제 #9
0
파일: tencent.py 프로젝트: itarvin/spider
    def parseTencent(self, response):
        # def parse_item(self, response):
        #     i = TencentspiderItem()
        #     #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
        #     #i['name'] = response.xpath('//div[@id="name"]').extract()
        #     #i['description'] = response.xpath('//div[@id="description"]').extract()
        #     return i
        for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"):
            # 初始化模型对象
            item = TencentItem()

            item['positionname'] = each.xpath("./td[1]/a/text()").extract()[0]
            # 详情连接
            item['positionlink'] = each.xpath("./td[1]/a/@href").extract()[0]
            # 职位类别
            # item['positionType'] = each.xpath("./td[2]/text()").extract()[0]
            # 招聘人数
            item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0]
            # 工作地点
            item['workLocation'] = each.xpath("./td[4]/text()").extract()[0]
            # 发布时间
            item['publishTime'] = each.xpath("./td[5]/text()").extract()[0]

            yield item