def main():
    iter_count = 0
    for users in  get_user_sessions(limit=800*BYTES_IN_MB, remove_autorecurring=True):
        prefix = 'user_sessions.' + str(iter_count)
        put_json_files(users, prefix, encoder=UserEncoder)
        print "Wrote to file prefix " + prefix
        iter_count += 1
        prefix = 'user_sessions.' + str(iter_count)
def compute_query_occurrences():
    queries = defaultdict(int)
    for users in  get_user_sessions(limit=800*BYTES_IN_MB, remove_autorecurring=False):
        for user in users:
            for query in user.queries:
                queries[query.text] += 1
    counts = queries.values()
    counts.sort(reverse=True)
    return counts
def compute_session_lengths(keepzeros=True):
    session_lengths = []
    zeros = 0.
    for users in  get_user_sessions(limit=800*BYTES_IN_MB, remove_autorecurring=True):
        for user in users:
            for (sid, session) in user.sessions.iteritems():
                if session.duration == 0.:
                    zeros += 1
                    if keepzeros:
                        session_lengths.append(session.duration)
                else:
                    session_lengths.append(session.duration)
    session_lengths.sort()
    return session_lengths, zeros
Ejemplo n.º 4
0
def get_short_sessions(keepzeros=True):
    short_sessions = []
    zeros = 0.
    for users in  get_user_sessions(limit=8*BYTES_IN_MB, remove_autorecurring=True):
        for user in users:
            for (sid, session) in user.sessions.iteritems():
                if session.duration <= 1.:
                    if session.duration == 0.:
                        zeros += 1
                        if keepzeros:
                            short_sessions.append(session)
                    else:
                        short_sessions.append(session)
    short_sessions.sort(key=lambda x: x.duration)
    return short_sessions, zeros