Ejemplo n.º 1
0
 def start_requests(self):
     rows = db.query('select uk from user where share=0 limit 10')
     for row in rows:
         db.update('update user set share=1 where uk=%s', row['uk'])
     return [
         scrapy.FormRequest(self.share_url.format(uk=row['uk'], start=0),
                            callback=self.parse) for row in rows
     ]
Ejemplo n.º 2
0
    def parse(self, response):
        uk = re.findall(r'uk=(\d+)', response.request.url)[0]
        follower_total_count = int(re.findall(r"totalCount:\"(\d+)\"", response.body)[0])
        db.update('update user set follow=1 where uk=%s', uk)

        if follower_total_count > 0:
            urls = [self.wap_follow_url.format(uk=uk, start=start) for start in range(20, follower_total_count, 20)]

            for url in urls:
                yield scrapy.Request(url, callback=self.parse_follow)

            follow_uk_list = re.findall(r"follow_uk\\\":(\d+)", response.body)

            for uk in follow_uk_list:
                yield UserItem(uk=uk)
                yield scrapy.Request(self.wap_follow_url.format(uk=uk, start=0), callback=self.parse)
Ejemplo n.º 3
0
    def parse(self, response):
        uk = re.findall(r'uk=(\d+)', response.request.url)[0]
        try:
            total_count = int(json.loads(response.body)['total_count'])
        except KeyError as e:
            log.msg(e)
            db.update('update user set share=2 where uk=%s', uk)
            return

        if total_count > 0:
            uk = re.findall(r'query_uk=(\d+)', response.request.url)[0]
            urls = [self.share_url.format(uk=uk, start=start).encode('utf-8')
                    for start in range(0, total_count, 60)]

            for url in urls:
                yield scrapy.Request(url,
                                     callback=self.parse_share,
                                     headers={'Referer':self.share_referer_url.format(uk=uk)})
Ejemplo n.º 4
0
    def parse(self, response):
        uk = re.findall(r'uk=(\d+)', response.request.url)[0]
        try:
            total_count = int(json.loads(response.body)['total_count'])
        except KeyError as e:
            log.msg(e)
            db.update('update user set share=2 where uk=%s', uk)
            return

        if total_count > 0:
            uk = re.findall(r'query_uk=(\d+)', response.request.url)[0]
            urls = [
                self.share_url.format(uk=uk, start=start).encode('utf-8')
                for start in range(0, total_count, 60)
            ]

            for url in urls:
                yield scrapy.Request(
                    url,
                    callback=self.parse_share,
                    headers={'Referer': self.share_referer_url.format(uk=uk)})
Ejemplo n.º 5
0
 def start_requests(self):
     rows = db.query('select uk from user where share=0 limit 10')
     for row in rows:
         db.update('update user set share=1 where uk=%s', row['uk'])
     return [scrapy.FormRequest(self.share_url.format(uk=row['uk'], start=0), callback=self.parse) for row in rows]