class UserTimelinesDiskTweetFactory(object): ''' This reads data from gzipped Bruno-formatted Tweet dumps of tweet timelines for individual users. ''' def __init__(self, timelinesdir, users=[], verbose=False): self.timelinesdir = timelinesdir self.verbose = verbose self.users = users if users == []: self.pm = ProgressMeter(len([1 for fname in os.listdir(timelinesdir) if fname.endswith('tweet.dat.gz')])) else: self.pm = ProgressMeter(len(users)) def get_tweets(self): filenames = os.listdir(self.timelinesdir) if len(self.users) > 0: filenames = ["%s.tweets.dat.gz" % (user) for user in self.users] for fname in filenames: fpath = "%s%s" % (self.timelinesdir, fname) fin = None if fname.endswith("tweet.dat.gz"): tf = BasicDiskTweetFactory(fpath) for t in tf.get_tweets(): yield t if self.verbose: self.pm.update()
def __init__(self, timelinesdir, users=[], verbose=False): self.timelinesdir = timelinesdir self.verbose = verbose self.users = users if users == []: self.pm = ProgressMeter(len([1 for fname in os.listdir(timelinesdir) if fname.endswith('tweet.dat.gz')])) else: self.pm = ProgressMeter(len(users))