Esempio n. 1
0
 def pick_users(self, cutoff):
     for uid in self.scores:
         state, rfs, ats = self.scores.split(uid)
         if state==scoredict.NEW and log_score(rfs,ats) >= cutoff:
             job = LookupJobBody(
                 _id=as_local_id('U',uid),
                 rfriends_score=rfs,
                 mention_score=ats,
             )
             job.put(self.stalk)
             self.scores.set_state(uid, scoredict.LOOKUP)
             self.lookups+=1
Esempio n. 2
0
 def pick_users(self, cutoff):
     for uid in self.scores:
         state, rfs, ats = self.scores.split(uid)
         if state == scoredict.NEW and log_score(rfs, ats) >= cutoff:
             job = LookupJobBody(
                 _id=as_local_id('U', uid),
                 rfriends_score=rfs,
                 mention_score=ats,
             )
             job.put(self.stalk)
             self.scores.set_state(uid, scoredict.LOOKUP)
             self.lookups += 1
Esempio n. 3
0
 def read_scores(self):
     job = None
     stop = 10000000 if self.halt else 100000
     for x in xrange(stop):
         try:
             job = self.stalk.reserve(120)
             if job is None:
                 logging.info("loaded %d scores",x)
                 return
             if job.body=="halt":
                 self.halt=True
                 print "starting to halt..."
                 logging.info("starting to halt...")
                 job.delete()
                 return
             body = LookupJobBody.from_job(job)
             if body.done:
                 self.scores.set_state(as_int_id(body._id), scoredict.DONE)
             else:
                 self.scores.increment(
                     as_int_id(body._id),
                     body.rfriends_score,
                     body.mention_score
                 )
             job.delete()
         except:
             logging.exception("exception in read_scores caused HALT")
             self.halt = True
             if job:
                 job.bury()
             return
Esempio n. 4
0
    def run(self):
        while True:
            jobs = []
            for x in xrange(20):
                # reserve blocks to wait when x is 0, but returns None for 1-19
                try:
                    j = self.stalk.reserve(0 if x else None)
                except beanstalkc.DeadlineSoon:
                    break
                if j is None:
                    break
                jobs.append(j)

            bodies = [LookupJobBody.from_job(j) for j in jobs]
            users =self.twitter.user_lookup([b._id for b in bodies])

            logging.info("looking at %r"%[getattr(u,'screen_name','') for u in users])
            for job,body,user in zip(jobs,bodies,users):
                if user is None: continue
                try:
                    if self.twitter.remaining < 30:
                        dt = (self.twitter.reset_time-datetime.utcnow())
                        logging.info("goodnight for %r",dt)
                        time.sleep(dt.seconds)
                    logging.info("look at %s",user.screen_name)
                    if user._id in User.database or user._id in self.orig_db:
                        job.delete()
                        continue
                    self.crawl_user(user)
                    user.save()
                    job.delete()
                except:
                    logging.exception("exception for job %s"%job.body)
                    job.bury()
            logging.info("api calls remaining: %d",self.twitter.remaining)
Esempio n. 5
0
 def read_scores(self):
     job = None
     stop = 10000000 if self.halt else 100000
     for x in xrange(stop):
         try:
             job = self.stalk.reserve(120)
             if job is None:
                 logging.info("loaded %d scores", x)
                 return
             if job.body == "halt":
                 self.halt = True
                 print "starting to halt..."
                 logging.info("starting to halt...")
                 job.delete()
                 return
             body = LookupJobBody.from_job(job)
             if body.done:
                 self.scores.set_state(as_int_id(body._id), scoredict.DONE)
             else:
                 self.scores.increment(as_int_id(body._id),
                                       body.rfriends_score,
                                       body.mention_score)
             job.delete()
         except:
             logging.exception("exception in read_scores caused HALT")
             self.halt = True
             if job:
                 job.bury()
             return
Esempio n. 6
0
    def run(self):
        while True:
            jobs = []
            for x in xrange(20):
                # reserve blocks to wait when x is 0, but returns None for 1-19
                try:
                    j = self.stalk.reserve(0 if x else None)
                except beanstalkc.DeadlineSoon:
                    break
                if j is None:
                    break
                jobs.append(j)

            bodies = [LookupJobBody.from_job(j) for j in jobs]
            users = self.twitter.user_lookup([b._id for b in bodies])

            logging.info("looking at %r" %
                         [getattr(u, 'screen_name', '') for u in users])
            for job, body, user in zip(jobs, bodies, users):
                if user is None: continue
                try:
                    if self.twitter.remaining < 30:
                        dt = (self.twitter.reset_time - datetime.utcnow())
                        logging.info("goodnight for %r", dt)
                        time.sleep(dt.seconds)
                    logging.info("look at %s", user.screen_name)
                    if user._id in User.database or user._id in self.orig_db:
                        job.delete()
                        continue
                    self.crawl_user(user)
                    user.save()
                    job.delete()
                except:
                    logging.exception("exception for job %s" % job.body)
                    job.bury()
            logging.info("api calls remaining: %d", self.twitter.remaining)