def pick_users(self, cutoff): for uid in self.scores: state, rfs, ats = self.scores.split(uid) if state==scoredict.NEW and log_score(rfs,ats) >= cutoff: job = LookupJobBody( _id=as_local_id('U',uid), rfriends_score=rfs, mention_score=ats, ) job.put(self.stalk) self.scores.set_state(uid, scoredict.LOOKUP) self.lookups+=1
def pick_users(self, cutoff): for uid in self.scores: state, rfs, ats = self.scores.split(uid) if state == scoredict.NEW and log_score(rfs, ats) >= cutoff: job = LookupJobBody( _id=as_local_id('U', uid), rfriends_score=rfs, mention_score=ats, ) job.put(self.stalk) self.scores.set_state(uid, scoredict.LOOKUP) self.lookups += 1
def read_scores(self): job = None stop = 10000000 if self.halt else 100000 for x in xrange(stop): try: job = self.stalk.reserve(120) if job is None: logging.info("loaded %d scores",x) return if job.body=="halt": self.halt=True print "starting to halt..." logging.info("starting to halt...") job.delete() return body = LookupJobBody.from_job(job) if body.done: self.scores.set_state(as_int_id(body._id), scoredict.DONE) else: self.scores.increment( as_int_id(body._id), body.rfriends_score, body.mention_score ) job.delete() except: logging.exception("exception in read_scores caused HALT") self.halt = True if job: job.bury() return
def run(self): while True: jobs = [] for x in xrange(20): # reserve blocks to wait when x is 0, but returns None for 1-19 try: j = self.stalk.reserve(0 if x else None) except beanstalkc.DeadlineSoon: break if j is None: break jobs.append(j) bodies = [LookupJobBody.from_job(j) for j in jobs] users =self.twitter.user_lookup([b._id for b in bodies]) logging.info("looking at %r"%[getattr(u,'screen_name','') for u in users]) for job,body,user in zip(jobs,bodies,users): if user is None: continue try: if self.twitter.remaining < 30: dt = (self.twitter.reset_time-datetime.utcnow()) logging.info("goodnight for %r",dt) time.sleep(dt.seconds) logging.info("look at %s",user.screen_name) if user._id in User.database or user._id in self.orig_db: job.delete() continue self.crawl_user(user) user.save() job.delete() except: logging.exception("exception for job %s"%job.body) job.bury() logging.info("api calls remaining: %d",self.twitter.remaining)
def read_scores(self): job = None stop = 10000000 if self.halt else 100000 for x in xrange(stop): try: job = self.stalk.reserve(120) if job is None: logging.info("loaded %d scores", x) return if job.body == "halt": self.halt = True print "starting to halt..." logging.info("starting to halt...") job.delete() return body = LookupJobBody.from_job(job) if body.done: self.scores.set_state(as_int_id(body._id), scoredict.DONE) else: self.scores.increment(as_int_id(body._id), body.rfriends_score, body.mention_score) job.delete() except: logging.exception("exception in read_scores caused HALT") self.halt = True if job: job.bury() return
def run(self): while True: jobs = [] for x in xrange(20): # reserve blocks to wait when x is 0, but returns None for 1-19 try: j = self.stalk.reserve(0 if x else None) except beanstalkc.DeadlineSoon: break if j is None: break jobs.append(j) bodies = [LookupJobBody.from_job(j) for j in jobs] users = self.twitter.user_lookup([b._id for b in bodies]) logging.info("looking at %r" % [getattr(u, 'screen_name', '') for u in users]) for job, body, user in zip(jobs, bodies, users): if user is None: continue try: if self.twitter.remaining < 30: dt = (self.twitter.reset_time - datetime.utcnow()) logging.info("goodnight for %r", dt) time.sleep(dt.seconds) logging.info("look at %s", user.screen_name) if user._id in User.database or user._id in self.orig_db: job.delete() continue self.crawl_user(user) user.save() job.delete() except: logging.exception("exception for job %s" % job.body) job.bury() logging.info("api calls remaining: %d", self.twitter.remaining)