コード例 #1
0
from connect import get_rows, query_to_csv
from params import askreddit_commenters, all_subreddits

# print '\n'.join(map(lambda x: x[0], get_rows('select subreddit from May2015 group by subreddit order by count(*) desc limit 200')))

# print '\n'.join(map(lambda x: x[0], get_rows('select author from May2015 where subreddit = \'AskReddit\' group by author order by count(*) desc limit 50')))


where_in_top = ' or '.join(['subreddit = "%s"' % s for s in all_subreddits])
where_by_uname = ' or '.join(['author = "%s"' % s for s in askreddit_commenters])

# subreddit_data = 'select author, subreddit, score, ups, downs, link_id, created_utc, body from May2015 where (%s)' % where_in_top
# query_to_csv(subreddit_data, 'subreddit_data.csv')

# author_data = 'select author, subreddit, score, ups, downs, link_id, created_utc, body from May2015 where (subreddit = \'AskReddit\' and (%s))' % where_by_uname
# query_to_csv(author_data, 'author_data.csv')


subreddit_q = 'select author, subreddit, score, ups, downs, link_id, created_utc, body from May2015 where subreddit = \'[subreddit]\' order by RANDOM() limit 100000'
sr_query = lambda sr: subreddit_q.replace('[subreddit]', sr)
for subreddit in reversed(all_subreddits):
	query_to_csv(sr_query(subreddit), 'data/%s.csv' % subreddit)



# subreddit_data = 'select author, subreddit, score, ups, downs, link_id, created_utc, body from May2015 where (%s)' % where_in_top
# query_to_csv(subreddit_data, 'author_data.csv')
コード例 #2
0
from connect import get_rows, query_to_csv
from params import commenters, tv_subreddits

where_in_tv = ' or '.join(['subreddit = "%s"' % s for s in tv_subreddits])
comments_by_subreddit = 'select subreddit, count(*) from May2015 where %s group by subreddit order by count(*) desc' % where_in_tv
comments_by_author = 'select author, count(distinct subreddit), count(*) from May2015 where %s group by author order by count(*) desc limit 1000' % where_in_tv

usernames = map(lambda x: x[0], commenters)
where_by_uname = ' or '.join(['author = "%s"' % s for s in usernames])

comment_dump = 'select author, subreddit, score, downs, created_utc, body from May2015 where ((%s) and (%s))' % (
    where_in_tv, where_by_uname)

query_to_csv(comment_dump, 'out.csv')
コード例 #3
0
from connect import get_rows, query_to_csv
from params import commenters, tv_subreddits

where_in_tv = ' or '.join(['subreddit = "%s"' % s for s in tv_subreddits])
comments_by_subreddit = 'select subreddit, count(*) from May2015 where %s group by subreddit order by count(*) desc' % where_in_tv
comments_by_author = 'select author, count(distinct subreddit), count(*) from May2015 where %s group by author order by count(*) desc limit 1000' % where_in_tv

usernames = map(lambda x : x[0], commenters)
where_by_uname = ' or '.join(['author = "%s"' % s for s in usernames])

comment_dump = 'select author, subreddit, score, downs, created_utc, body from May2015 where ((%s) and (%s))' % (where_in_tv, where_by_uname)


query_to_csv(comment_dump, 'out.csv')