コード例 #1
0
    def run(self):
        print('Worker started')
        # do some initialization here

        while True:
            data = self.queue.get(True)
            try:
                if data is None:
                    print('ALL FINISHED!!!!', self.conn_number)
                    break

                print('Starting: ', data)
                if self.gets_user_id:
                    user = TwitterUser(self.api_hook, user_id=data)
                else:
                    user = TwitterUser(self.api_hook, screen_name=data)

                user.populate_tweets_from_api(json_output_directory=os.path.join(self.out_dir,"json"))

                if len(user.tweets) == 0:
                    if self.to_pickle or self.populate_lists or self.populate_friends or self.populate_followers:
                        print 'pickling and dumping: ', user.screen_name
                        pickle.dump(user, open(os.path.join(self.out_dir,"obj",data), "wb"))
                    continue
                if self.populate_lists:
                    user.populate_lists_member_of()

                if self.populate_friends:
                    print 'populating friends, ', user.screen_name
                    user.populate_friends()

                if self.populate_followers:
                    print 'populating followers, ', user.screen_name
                    user.populate_followers()

                if self.to_pickle or self.populate_lists or self.populate_friends or self.populate_followers:
                    # Pickle and dump user
                    print 'pickling and dumping (no tweets): ', user.screen_name
                    user.tweets = []
                    pickle.dump(user, open(os.path.join(self.out_dir,"obj",data), "wb"))
            except Exception:
                print('FAILED:: ', data)
                exc_type, exc_value, exc_traceback = sys.exc_info()
                print("*** print_tb:")
                traceback.print_tb(exc_traceback, limit=30, file=sys.stdout)
                print("*** print_exception:")

            print('finished collecting data for: ', data)
コード例 #2
0
    def run(self):
        print ("Worker started")

        while True:

            user_id, snow_sample_number = self.queue.get(True)

            print "Starting: ", user_id, snow_sample_number

            stored_user_list = set(
                [os.path.basename(user_pickle) for user_pickle in glob.glob(os.path.join(self.out_dir, "obj", "*"))]
            )

            # Get the ego
            if user_id in stored_user_list:
                print ("\tgot pickled: ", user_id)
                user = pickle.load(open(os.path.join(self.out_dir, "obj", str(user_id)), "rb"))
            else:
                user = TwitterUser(self.api_hook, user_id=user_id)
                user.populate_tweets_from_api(json_output_directory=os.path.join(self.out_dir, "json"))

                if len(user.tweets) == 0:
                    print "pickling and dumping: ", user.screen_name
                    pickle.dump(user, open(os.path.join(self.out_dir, "obj", user_id), "wb"))
                    continue
                print "populating friends, ", user.screen_name
                user.populate_friends()

                print "pickling and dumping (no tweets): ", user.screen_name
                user.tweets = []
                pickle.dump(user, open(os.path.join(self.out_dir, "obj", user_id), "wb"))

            ##write out their following network and add each id to queue
            # network_fil = codecs.open(os.path.join(self.network_dir,user_id),"w", "utf-8")
            added = 0
            for following_id in user.mentioned.keys():
                if snow_sample_number < self.step_count:
                    added += 1
                    self.queue.put([str(following_id), snow_sample_number + 1])
                # network_fil.write(",".join([user_id,str(following_id)])+"\n")
            # network_fil.close()

            print "finished collecting data for: ", user_id
            print "added: ", added
コード例 #3
0
    def run(self):
        print ("Worker started")

        while True:

            user_id, snow_sample_number = self.queue.get(True)

            print ("Starting: ", user_id, snow_sample_number)

            stored_user_list = set([os.path.basename(user_pickle) for user_pickle in glob.glob(self.pickle_dir + "*")])

            # Get the ego
            if user_id in stored_user_list:
                print ("\tgot pickled: ", user_id)
                user = pickle.load(open(self.pickle_dir + "/" + str(user_id), "rb"))
            else:
                user = TwitterUser(self.api_hook, user_id=user_id)
                print ("\tgetting tweets for: ", user_id)
                user.populate_tweets_from_api()
                print ("\t num tweets received for: ", user_id, " ", len(user.tweets))
                # print '\tgetting followers for: ', screen_name
                # user.populate_followers()

                print ("\tgetting friends for: ", user_id)
                user.populate_friends()

                print ("pickling: ", user_id)
                pickle.dump(user, open(self.pickle_dir + "/" + user_id, "wb"))

            ##write out their following network and add each id to queue
            network_fil = codecs.open(os.path.join(self.network_dir, user_id), "w", "utf-8")
            added = 0
            for following_id in user.friend_ids:
                if snow_sample_number < 2:
                    added += 1
                    self.queue.put([str(following_id), snow_sample_number + 1])
                network_fil.write(",".join([user_id, str(following_id)]) + "\n")
            network_fil.close()

            print "finished collecting data for: ", user_id
            print "added: ", added
コード例 #4
0
    def run(self):
        print('Worker started')
        # do some initialization here
        snow_sample_number = None
        since_tweet_id = None
        while True:
            data = self.queue.get(True)

            try:
                if data is None:
                    print 'ALL FINISHED!!!!'
                    break

                if len(data) == 1 or type(data) is str or type(
                        data) is unicode or type(data) is int:
                    user_identifier = data
                elif len(data) == 3:
                    user_identifier, snow_sample_number, since_tweet_id = data
                elif len(data) == 2:
                    if self.step_count:
                        user_identifier, snow_sample_number = data
                    elif self.gets_since_tweet_id:
                        user_identifier, since_tweet_id = data

                user_identifier = str(user_identifier)

                print 'Starting: ', data

                pickle_filename = os.path.join(self.out_dir, "obj",
                                               user_identifier)
                json_filename = os.path.join(self.out_dir, "json",
                                             user_identifier + ".json.gz")

                # Get the user's data
                if os.path.exists(pickle_filename) and os.path.exists(
                        json_filename) and not self.add_to_file:
                    print '\tgot existing data for: ', data
                    user = pickle.load(open(pickle_filename, "rb"))
                    user.populate_tweets_from_file(json_filename)
                else:
                    if self.gets_user_id:
                        user = TwitterUser(self.api_hook,
                                           user_id=user_identifier)
                    else:
                        user = TwitterUser(self.api_hook,
                                           screen_name=user_identifier)

                    print 'populating tweets', user_identifier

                    if self.populate_tweets:
                        if self.save_user_tweets:
                            print 'saving tweets to: ', json_filename
                            of_name, tweet_count = user.populate_tweets_from_api(
                                json_output_filename=json_filename,
                                since_id=since_tweet_id,
                                populate_object_with_tweets=False)
                        else:
                            of_name, tweet_count = user.populate_tweets_from_api(
                                since_id=since_tweet_id,
                                populate_object_with_tweets=False)

                        if self.tweet_count_file:
                            self.tweet_count_file.write(
                                str(user_identifier) + "\t" +
                                str(tweet_count) + "\n")

                    if self.populate_lists:
                        print 'populating lists', user.screen_name
                        user.populate_lists_member_of()

                    if self.populate_friends:
                        print 'populating friends, ', user.screen_name
                        user.populate_friends()

                    if self.populate_followers:
                        print 'populating followers, ', user.screen_name
                        user.populate_followers()

                    if self.save_user_data and \
                        (self.always_pickle or self.populate_lists
                         or self.populate_friends or self.populate_followers):
                        # Pickle and dump user
                        #print 'pickling and dumping (no tweets): ', user.screen_name
                        user.tweets = []
                        pickle.dump(user, open(pickle_filename, "wb"))

                # now add to queue if necessary
                if snow_sample_number is not None and snow_sample_number < self.step_count:
                    for user_identifier in self.add_users_to_queue_function(
                            user):
                        self.queue.put(
                            [str(user_identifier), snow_sample_number + 1])

                if self.post_process_function:
                    self.post_process_function(user)

            except KeyboardInterrupt as e:
                print e
                break
            except Exception:
                print('FAILED:: ', data)
                exc_type, exc_value, exc_traceback = sys.exc_info()
                print("*** print_tb:")
                traceback.print_tb(exc_traceback, limit=30, file=sys.stdout)
                print("*** print_exception:")