def main(): iter_count = 0 for users in get_user_sessions(limit=800*BYTES_IN_MB, remove_autorecurring=True): prefix = 'user_sessions.' + str(iter_count) put_json_files(users, prefix, encoder=UserEncoder) print "Wrote to file prefix " + prefix iter_count += 1 prefix = 'user_sessions.' + str(iter_count)
def compute_query_occurrences(): queries = defaultdict(int) for users in get_user_sessions(limit=800*BYTES_IN_MB, remove_autorecurring=False): for user in users: for query in user.queries: queries[query.text] += 1 counts = queries.values() counts.sort(reverse=True) return counts
def compute_session_lengths(keepzeros=True): session_lengths = [] zeros = 0. for users in get_user_sessions(limit=800*BYTES_IN_MB, remove_autorecurring=True): for user in users: for (sid, session) in user.sessions.iteritems(): if session.duration == 0.: zeros += 1 if keepzeros: session_lengths.append(session.duration) else: session_lengths.append(session.duration) session_lengths.sort() return session_lengths, zeros
def get_short_sessions(keepzeros=True): short_sessions = [] zeros = 0. for users in get_user_sessions(limit=8*BYTES_IN_MB, remove_autorecurring=True): for user in users: for (sid, session) in user.sessions.iteritems(): if session.duration <= 1.: if session.duration == 0.: zeros += 1 if keepzeros: short_sessions.append(session) else: short_sessions.append(session) short_sessions.sort(key=lambda x: x.duration) return short_sessions, zeros