def start_requests(self): db.attach(self) query = self.session.query(db.Question) for question in query.all(): for i in range(0, min(int(question.answer_count), 100), 20): yield scrapy.Request(self.url_pattern % (question.id, i), headers={'Authorization': self.authorization})
def start_requests(self): db.attach(self) for artist in self.session.query(db.Artist).all(): request = scrapy.Request(self.url_pattern % artist.artist_name, headers={ 'Authorization': self.authorization}, dont_filter=True) request.meta['artist'] = artist yield request
def start_requests(self): db.attach(self) query = self.session.query(db.Topic) for topic in query.all(): for i in range(0, int(topic.followers_count), 20): request = scrapy.Request( 'https://www.zhihu.com/api/v4/topics/%s/followers?limit=20&offset=%d' % (topic.id, i), headers={'Authorization': self.authorization}) request.meta['topic'] = topic yield request
def start_requests(self): db.attach(self) query = self.session.query( db.User).filter(db.User.channel == db.CHANNEL) for user in query.all(): if not any([ user.business is None, user.educations is None, user.employments is None ]): yield scrapy.Request( self.url_pattern % user.user_id, headers={'Authorization': self.authorization})
def open_spider(self, spider): db.attach(self)