def run(self): print('Worker started') # do some initialization here while True: data = self.queue.get(True) try: if data is None: print('ALL FINISHED!!!!', self.conn_number) break print('Starting: ', data) if self.gets_user_id: user = TwitterUser(self.api_hook, user_id=data) else: user = TwitterUser(self.api_hook, screen_name=data) user.populate_tweets_from_api(json_output_directory=os.path.join(self.out_dir,"json")) if len(user.tweets) == 0: if self.to_pickle or self.populate_lists or self.populate_friends or self.populate_followers: print 'pickling and dumping: ', user.screen_name pickle.dump(user, open(os.path.join(self.out_dir,"obj",data), "wb")) continue if self.populate_lists: user.populate_lists_member_of() if self.populate_friends: print 'populating friends, ', user.screen_name user.populate_friends() if self.populate_followers: print 'populating followers, ', user.screen_name user.populate_followers() if self.to_pickle or self.populate_lists or self.populate_friends or self.populate_followers: # Pickle and dump user print 'pickling and dumping (no tweets): ', user.screen_name user.tweets = [] pickle.dump(user, open(os.path.join(self.out_dir,"obj",data), "wb")) except Exception: print('FAILED:: ', data) exc_type, exc_value, exc_traceback = sys.exc_info() print("*** print_tb:") traceback.print_tb(exc_traceback, limit=30, file=sys.stdout) print("*** print_exception:") print('finished collecting data for: ', data)
def run(self): print ("Worker started") while True: user_id, snow_sample_number = self.queue.get(True) print "Starting: ", user_id, snow_sample_number stored_user_list = set( [os.path.basename(user_pickle) for user_pickle in glob.glob(os.path.join(self.out_dir, "obj", "*"))] ) # Get the ego if user_id in stored_user_list: print ("\tgot pickled: ", user_id) user = pickle.load(open(os.path.join(self.out_dir, "obj", str(user_id)), "rb")) else: user = TwitterUser(self.api_hook, user_id=user_id) user.populate_tweets_from_api(json_output_directory=os.path.join(self.out_dir, "json")) if len(user.tweets) == 0: print "pickling and dumping: ", user.screen_name pickle.dump(user, open(os.path.join(self.out_dir, "obj", user_id), "wb")) continue print "populating friends, ", user.screen_name user.populate_friends() print "pickling and dumping (no tweets): ", user.screen_name user.tweets = [] pickle.dump(user, open(os.path.join(self.out_dir, "obj", user_id), "wb")) ##write out their following network and add each id to queue # network_fil = codecs.open(os.path.join(self.network_dir,user_id),"w", "utf-8") added = 0 for following_id in user.mentioned.keys(): if snow_sample_number < self.step_count: added += 1 self.queue.put([str(following_id), snow_sample_number + 1]) # network_fil.write(",".join([user_id,str(following_id)])+"\n") # network_fil.close() print "finished collecting data for: ", user_id print "added: ", added
def run(self): print ("Worker started") while True: user_id, snow_sample_number = self.queue.get(True) print ("Starting: ", user_id, snow_sample_number) stored_user_list = set([os.path.basename(user_pickle) for user_pickle in glob.glob(self.pickle_dir + "*")]) # Get the ego if user_id in stored_user_list: print ("\tgot pickled: ", user_id) user = pickle.load(open(self.pickle_dir + "/" + str(user_id), "rb")) else: user = TwitterUser(self.api_hook, user_id=user_id) print ("\tgetting tweets for: ", user_id) user.populate_tweets_from_api() print ("\t num tweets received for: ", user_id, " ", len(user.tweets)) # print '\tgetting followers for: ', screen_name # user.populate_followers() print ("\tgetting friends for: ", user_id) user.populate_friends() print ("pickling: ", user_id) pickle.dump(user, open(self.pickle_dir + "/" + user_id, "wb")) ##write out their following network and add each id to queue network_fil = codecs.open(os.path.join(self.network_dir, user_id), "w", "utf-8") added = 0 for following_id in user.friend_ids: if snow_sample_number < 2: added += 1 self.queue.put([str(following_id), snow_sample_number + 1]) network_fil.write(",".join([user_id, str(following_id)]) + "\n") network_fil.close() print "finished collecting data for: ", user_id print "added: ", added
def run(self): print('Worker started') # do some initialization here snow_sample_number = None since_tweet_id = None while True: data = self.queue.get(True) try: if data is None: print 'ALL FINISHED!!!!' break if len(data) == 1 or type(data) is str or type( data) is unicode or type(data) is int: user_identifier = data elif len(data) == 3: user_identifier, snow_sample_number, since_tweet_id = data elif len(data) == 2: if self.step_count: user_identifier, snow_sample_number = data elif self.gets_since_tweet_id: user_identifier, since_tweet_id = data user_identifier = str(user_identifier) print 'Starting: ', data pickle_filename = os.path.join(self.out_dir, "obj", user_identifier) json_filename = os.path.join(self.out_dir, "json", user_identifier + ".json.gz") # Get the user's data if os.path.exists(pickle_filename) and os.path.exists( json_filename) and not self.add_to_file: print '\tgot existing data for: ', data user = pickle.load(open(pickle_filename, "rb")) user.populate_tweets_from_file(json_filename) else: if self.gets_user_id: user = TwitterUser(self.api_hook, user_id=user_identifier) else: user = TwitterUser(self.api_hook, screen_name=user_identifier) print 'populating tweets', user_identifier if self.populate_tweets: if self.save_user_tweets: print 'saving tweets to: ', json_filename of_name, tweet_count = user.populate_tweets_from_api( json_output_filename=json_filename, since_id=since_tweet_id, populate_object_with_tweets=False) else: of_name, tweet_count = user.populate_tweets_from_api( since_id=since_tweet_id, populate_object_with_tweets=False) if self.tweet_count_file: self.tweet_count_file.write( str(user_identifier) + "\t" + str(tweet_count) + "\n") if self.populate_lists: print 'populating lists', user.screen_name user.populate_lists_member_of() if self.populate_friends: print 'populating friends, ', user.screen_name user.populate_friends() if self.populate_followers: print 'populating followers, ', user.screen_name user.populate_followers() if self.save_user_data and \ (self.always_pickle or self.populate_lists or self.populate_friends or self.populate_followers): # Pickle and dump user #print 'pickling and dumping (no tweets): ', user.screen_name user.tweets = [] pickle.dump(user, open(pickle_filename, "wb")) # now add to queue if necessary if snow_sample_number is not None and snow_sample_number < self.step_count: for user_identifier in self.add_users_to_queue_function( user): self.queue.put( [str(user_identifier), snow_sample_number + 1]) if self.post_process_function: self.post_process_function(user) except KeyboardInterrupt as e: print e break except Exception: print('FAILED:: ', data) exc_type, exc_value, exc_traceback = sys.exc_info() print("*** print_tb:") traceback.print_tb(exc_traceback, limit=30, file=sys.stdout) print("*** print_exception:")