Ejemplo n.º 1
0
    def notifyProgress(self, client, result, status, attributes):
        # We use this notification to check the progress of the analyzer and just add the
        # results to our stream

        ot, of, oa, ou = 0, 0, 0, 0

        if result.operation == TwitterJob.TIMELINE_OP:
            ot = self.redis.decr(Stats.TIMELINE_ONGOING)
            if status == STATUS_COMPLETED:
                self.redis.incr(Stats.TIMELINE_COMPLETED)
        elif result.operation == TwitterJob.FOLLOWER_OP:
            of = self.redis.decr(Stats.FOLLOWER_ONGOING)
            if status == STATUS_COMPLETED:
                self.redis.incr(Stats.FOLLOWER_COMPLETED)
        elif result.operation == TwitterJob.ANALYZER_OP:
            oa = self.redis.decr(Stats.ANALYZER_ONGOING)
            if status == STATUS_COMPLETED:
                self.redis.incr(Stats.ANALYZER_COMPLETED)
        elif result.operation == TwitterJob.UPDATE_OP:
            ou = self.redis.decr(Stats.UPDATE_ONGOING)
            if status == STATUS_COMPLETED:
                self.redis.incr(Stats.UPDATE_COMPLETED)

        self.updateStatistics(self.getNickname(client), result, attributes, ot + 1, of + 1, oa + 1, ou + 1)

        if result.operation == result.ANALYZER_OP:
            counter = 0

            for target_user in attributes.get('analyzer.target_users', []):
                try:
                    newjob = TwitterJob(TwitterJob.TIMELINE_OP, int(target_user), 0)

                    #log.msg("Following %d friend => %d %s" % (result.user_id, int(target_user), str(newjob)))

                    # NOTE: This is the point you may want to customize. As it is now new jobs
                    #       are simply inserted in the FRONTIER_NAME. If you are willing to implement
                    #       a continuous BFS just insert the job at the of STREAM (rpush).
                    #       For DFS traversal insert the job in front of STREAM (lpush).
                    #       If you need to implement a priority queue use ZSETs
                    if not self.redis.sismember(self.USERS_SELECTED, target_user):
                        if settings.TRAVERSING.upper() == 'BFS':
                            self.redis.rpush(self.STREAM, TwitterJob.serialize(newjob))
                        elif settings.TRAVERSING.upper() == 'DFS':
                            self.redis.lpush(self.STREAM, TwitterJob.serialize(newjob))
                        else:
                            self.redis.rpush(self.FRONTIER_NAME, TwitterJob.serialize(newjob))

                        self.redis.sadd(self.USERS_SELECTED, target_user)
                        counter += 1

                except Exception, exc:
                    log.msg("Bogus data from the analyzer: %s is not a user_id" % target_user)

            if counter > 0:
                log.msg("Adding a total of %d users. These were %d 's friends" % (counter, result.user_id))
            else:
                log.msg("No admissible friends were discovered while traversing %d 's followers list" % (result.user_id))
Ejemplo n.º 2
0
    def onJobReturned(self, response):
        self.factory.thread_working = False

        if not isinstance(response, TwitterResponse):
            reason = failure.Failure(
                Exception("I was expecting a TwitterResponse object. Got %s" %
                          str(response)), Exception)
            return self.onJobFailed(reason)

        log.msg("Twitter job executed. Response is %s" % str(response))

        job = self.current_job
        self.current_job = None

        if response.status in (STATUS_COMPLETED, STATUS_BANNED):

            if response.status == STATUS_BANNED:
                self.sleep_time = response.sleep_time

            next_job = TwitterJob(job.operation, job.user_id, response.state)
            return self.onJobCompleted(response.status, next_job,
                                       response.attributes)

        elif response.status in (STATUS_UNAUTHORIZED, STATUS_ERROR):
            return self.onJobCompleted(response.status, job,
                                       response.attributes)

        reason = failure.Failure(
            Exception("Unknown status %d. Don't know how to proceed" %
                      response.status), Exception)
        return self.onJobFailed(reason)
Ejemplo n.º 3
0
    def transformJob(self, result):
        # Actually this check is pretty useless. It will fall back to None anyway
        if self.transformation in (TRANSFORM_TIMELINE, TRANSFORM_FOLLOWER,
                                   TRANSFORM_ANALYZER):
            return None

        # If you specify just -t and -a -> -f is implied
        if result.operation == result.TIMELINE_OP and \
           self.transformation & (TRANSFORM_FOLLOWER | TRANSFORM_ANALYZER):

            return False, TwitterJob(TwitterJob.FOLLOWER_OP, result.user_id,
                                     -1)

        elif result.operation == result.FOLLOWER_OP and \
             self.transformation & (TRANSFORM_ANALYZER):

            return False, TwitterJob(TwitterJob.ANALYZER_OP, result.user_id, 0)
Ejemplo n.º 4
0
    def manageLostClient(self, client):
        if self.clients[client] == WORKER_WORKING:
            deserialized = TwitterJob.deserialize(self.assigned_jobs[client])

            if deserialized.operation == TwitterJob.TIMELINE_OP:
                self.redis.decr(Stats.TIMELINE_ONGOING)
            elif deserialized.operation == TwitterJob.FOLLOWER_OP:
                self.redis.decr(Stats.FOLLOWER_ONGOING)
            elif deserialized.operation == TwitterJob.ANALYZER_OP:
                self.redis.decr(Stats.ANALYZER_ONGOING)
            elif deserialized.operation == TwitterJob.UPDATE_OP:
                self.redis.decr(Stats.UPDATE_ONGOING)

        return JobTrackerFactory.manageLostClient(self, client)
Ejemplo n.º 5
0
    def manageLostClient(self, client):
        if self.clients[client] == WORKER_WORKING:
            deserialized = TwitterJob.deserialize(self.assigned_jobs[client])

            if deserialized.operation == TwitterJob.TIMELINE_OP:
                self.redis.decr(Stats.TIMELINE_ONGOING)
            elif deserialized.operation == TwitterJob.FOLLOWER_OP:
                self.redis.decr(Stats.FOLLOWER_ONGOING)
            elif deserialized.operation == TwitterJob.ANALYZER_OP:
                self.redis.decr(Stats.ANALYZER_ONGOING)
            elif deserialized.operation == TwitterJob.UPDATE_OP:
                self.redis.decr(Stats.UPDATE_ONGOING)

        return JobTrackerFactory.manageLostClient(self, client)
Ejemplo n.º 6
0
    def assignJobTo(self, client):
        type, job = JobTrackerFactory.assignJobTo(self, client)

        # Just used to keep track of some general statistics
        if type == TYPE_JOB:
            deserialized = TwitterJob.deserialize(job)

            if deserialized.operation == TwitterJob.TIMELINE_OP:
                self.redis.incr(Stats.TIMELINE_ONGOING)
            elif deserialized.operation == TwitterJob.FOLLOWER_OP:
                self.redis.incr(Stats.FOLLOWER_ONGOING)
            elif deserialized.operation == TwitterJob.ANALYZER_OP:
                self.redis.incr(Stats.ANALYZER_ONGOING)
            elif deserialized.operation == TwitterJob.UPDATE_OP:
                self.redis.incr(Stats.UPDATE_ONGOING)

        return type, job
Ejemplo n.º 7
0
    def assignJobTo(self, client):
        type, job = JobTrackerFactory.assignJobTo(self, client)

        # Just used to keep track of some general statistics
        if type == TYPE_JOB:
            deserialized = TwitterJob.deserialize(job)

            if deserialized.operation == TwitterJob.TIMELINE_OP:
                self.redis.incr(Stats.TIMELINE_ONGOING)
            elif deserialized.operation == TwitterJob.FOLLOWER_OP:
                self.redis.incr(Stats.FOLLOWER_ONGOING)
            elif deserialized.operation == TwitterJob.ANALYZER_OP:
                self.redis.incr(Stats.ANALYZER_ONGOING)
            elif deserialized.operation == TwitterJob.UPDATE_OP:
                self.redis.incr(Stats.UPDATE_ONGOING)

        return type, job
Ejemplo n.º 8
0
    def notifyProgress(self, client, result, status, attributes):
        # We use this notification to check the progress of the analyzer and just add the
        # results to our stream

        ot, of, oa, ou = 0, 0, 0, 0

        if result.operation == TwitterJob.TIMELINE_OP:
            ot = self.redis.decr(Stats.TIMELINE_ONGOING)
            if status == STATUS_COMPLETED:
                self.redis.incr(Stats.TIMELINE_COMPLETED)
        elif result.operation == TwitterJob.FOLLOWER_OP:
            of = self.redis.decr(Stats.FOLLOWER_ONGOING)
            if status == STATUS_COMPLETED:
                self.redis.incr(Stats.FOLLOWER_COMPLETED)
        elif result.operation == TwitterJob.ANALYZER_OP:
            oa = self.redis.decr(Stats.ANALYZER_ONGOING)
            if status == STATUS_COMPLETED:
                self.redis.incr(Stats.ANALYZER_COMPLETED)
        elif result.operation == TwitterJob.UPDATE_OP:
            ou = self.redis.decr(Stats.UPDATE_ONGOING)
            if status == STATUS_COMPLETED:
                self.redis.incr(Stats.UPDATE_COMPLETED)

        self.updateStatistics(self.getNickname(client), result, attributes,
                              ot + 1, of + 1, oa + 1, ou + 1)

        if result.operation == result.ANALYZER_OP:
            counter = 0

            for target_user in attributes.get('analyzer.target_users', []):
                try:
                    newjob = TwitterJob(TwitterJob.TIMELINE_OP,
                                        int(target_user), 0)

                    #log.msg("Following %d friend => %d %s" % (result.user_id, int(target_user), str(newjob)))

                    # NOTE: This is the point you may want to customize. As it is now new jobs
                    #       are simply inserted in the FRONTIER_NAME. If you are willing to implement
                    #       a continuous BFS just insert the job at the of STREAM (rpush).
                    #       For DFS traversal insert the job in front of STREAM (lpush).
                    #       If you need to implement a priority queue use ZSETs
                    if not self.redis.sismember(self.USERS_SELECTED,
                                                target_user):
                        if settings.TRAVERSING.upper() == 'BFS':
                            self.redis.rpush(self.STREAM,
                                             TwitterJob.serialize(newjob))
                        elif settings.TRAVERSING.upper() == 'DFS':
                            self.redis.lpush(self.STREAM,
                                             TwitterJob.serialize(newjob))
                        else:
                            self.redis.rpush(self.FRONTIER_NAME,
                                             TwitterJob.serialize(newjob))

                        self.redis.sadd(self.USERS_SELECTED, target_user)
                        counter += 1

                except Exception, exc:
                    log.msg(
                        "Bogus data from the analyzer: %s is not a user_id" %
                        target_user)

            if counter > 0:
                log.msg(
                    "Adding a total of %d users. These were %d 's friends" %
                    (counter, result.user_id))
            else:
                log.msg(
                    "No admissible friends were discovered while traversing %d 's followers list"
                    % (result.user_id))