def load(file_type, filename): with file(filename) as f: if file_type == "cirip": # file dumped with cirip.ro api content = f.read() tweets = simplejson.loads(content[content.index("[") : -1]) return [ {"id": None, "created_at": twitter_utils.parse_twitter_date(t["created_at"]), "text": t["text"]} for t in tweets ] elif file_type == "tweetscan": # file from tweetscan.com csv_reader = csv.reader(f) return [ { "id": int(row[5]), "created_at": datetime.datetime.fromtimestamp(int(row[2])), "text": row[1].decode("utf-8"), } for i, row in enumerate(csv_reader) if i != 0 ]
def cleanup_status(status): status['created_at'] = twitter_utils.parse_twitter_date(status['created_at']) if 'retweeted_status' in status: status['retweeted_status'] = cleanup_status(status['retweeted_status']) return status