def start_requests(self): rows = db.query('select uk from user where share=0 limit 10') for row in rows: db.update('update user set share=1 where uk=%s', row['uk']) return [ scrapy.FormRequest(self.share_url.format(uk=row['uk'], start=0), callback=self.parse) for row in rows ]
def parse(self, response): uk = re.findall(r'uk=(\d+)', response.request.url)[0] follower_total_count = int(re.findall(r"totalCount:\"(\d+)\"", response.body)[0]) db.update('update user set follow=1 where uk=%s', uk) if follower_total_count > 0: urls = [self.wap_follow_url.format(uk=uk, start=start) for start in range(20, follower_total_count, 20)] for url in urls: yield scrapy.Request(url, callback=self.parse_follow) follow_uk_list = re.findall(r"follow_uk\\\":(\d+)", response.body) for uk in follow_uk_list: yield UserItem(uk=uk) yield scrapy.Request(self.wap_follow_url.format(uk=uk, start=0), callback=self.parse)
def parse(self, response): uk = re.findall(r'uk=(\d+)', response.request.url)[0] try: total_count = int(json.loads(response.body)['total_count']) except KeyError as e: log.msg(e) db.update('update user set share=2 where uk=%s', uk) return if total_count > 0: uk = re.findall(r'query_uk=(\d+)', response.request.url)[0] urls = [self.share_url.format(uk=uk, start=start).encode('utf-8') for start in range(0, total_count, 60)] for url in urls: yield scrapy.Request(url, callback=self.parse_share, headers={'Referer':self.share_referer_url.format(uk=uk)})
def parse(self, response): uk = re.findall(r'uk=(\d+)', response.request.url)[0] try: total_count = int(json.loads(response.body)['total_count']) except KeyError as e: log.msg(e) db.update('update user set share=2 where uk=%s', uk) return if total_count > 0: uk = re.findall(r'query_uk=(\d+)', response.request.url)[0] urls = [ self.share_url.format(uk=uk, start=start).encode('utf-8') for start in range(0, total_count, 60) ] for url in urls: yield scrapy.Request( url, callback=self.parse_share, headers={'Referer': self.share_referer_url.format(uk=uk)})
def start_requests(self): rows = db.query('select uk from user where share=0 limit 10') for row in rows: db.update('update user set share=1 where uk=%s', row['uk']) return [scrapy.FormRequest(self.share_url.format(uk=row['uk'], start=0), callback=self.parse) for row in rows]