Ejemplo n.º 1
0
 def _send_job(self, uid, rfs, ats, force=None):
     job = LookupJobBody(
         _id=uid,
         rfriends_score=rfs,
         mention_score=ats,
         force=force
     )
     job.put(self.stalk)
     self.scores.set_state(uid, scoredict.LOOKUP)
Ejemplo n.º 2
0
 def read_scores(self):
     job = None
     stop = 10000000 if self.halt else 100000
     for x in xrange(stop):
         try:
             job = self.stalk.reserve(35)
             if job is None:
                 logging.info("loaded %d scores",x)
                 return
             if job.body=="halt":
                 self.halt=True
                 print "starting to halt..."
                 logging.info("starting to halt...")
                 job.delete()
                 return
             body = LookupJobBody.from_job(job)
             if body.done:
                 self.scores.set_state(body._id, scoredict.DONE)
             else:
                 self.scores.increment(
                     body._id,
                     body.rfriends_score,
                     body.mention_score
                 )
             job.delete()
         except:
             logging.exception("exception in read_scores caused HALT")
             self.halt = True
             if job:
                 job.bury()
             return
Ejemplo n.º 3
0
    def run(self):
        while True:
            jobs = []
            for x in xrange(100):
                try:
                    # reserve blocks to wait when x is 0, but returns None for 1-99
                    j = self.stalk.reserve(0 if x else None)
                except beanstalkc.DeadlineSoon:
                    break
                if j is None:
                    break
                jobs.append(j)

            bodies = [LookupJobBody.from_job(j) for j in jobs]
            try:
                users =self.twitter.user_lookup([b._id for b in bodies])
            except ResourceNotFound:
                logging.info("no profile for %r",[b._id for b in bodies])
                continue

            logging.info("looking at %r"%[getattr(u,'screen_name','') for u in users])
            for job,body,user in zip(jobs,bodies,users):
                if user is None:
                    logging.info("no profile for %d",body._id)
                    job.delete()
                    continue
                try:
                    self.twitter.sleep_if_needed()
                    logging.info("look at %s",user.screen_name)
                    if (not body.force) and User.in_db(user._id):
                        job.delete()
                        continue
                    self.crawl_user(user,body.force)
                    user.save()
                    job.delete()
                except:
                    logging.exception("exception for job %s"%job.body)
                    job.bury()
            logging.info("api calls remaining: %d",self.twitter.remaining)