def main(): reddit = Reddit(config.data_location) # following code explores saving user posts per user for user in reddit.get_users(): #os.mkdir('../tmp/{user.name}') with open(f'../tmp/{user.name}.csv', 'w') as fp: csv_file = csv.writer(fp) csv_file.writerow(['SeqId', 'InstNo', 'Author', 'Text']) for post in user.posts: #with open(f'../tmp/{user.name}.csv', 'w') as fp: if 'selftext' in post and post['selftext'] and post[ 'selftext'] != '[removed]' and post[ 'subreddit'] != 'makeupexchange': content_post = post.get('selftext').replace('\n', ' ') content_post = re.sub(r'\[.*?\]\(http\S+\)|http\S+', '', content_post, flags=re.MULTILINE) content_post = nltk.tokenize.word_tokenize(content_post) content_post = ' '.join(content_post) content_post = nltk.tokenize.sent_tokenize(content_post) for i in range(len(content_post) - 1): content_post[i] = content_post[i] + ' <SENT>' content_post = ' '.join(content_post) csv_file.writerow( [post.get('id'), 0, user.name, content_post])
def main(): reddit = Reddit(config.data_location) # following code explores saving user posts per user # for user in reddit.get_users(): # os.mkdir('../tmp/{user.name}') # #with open(f'../tmp/{user.name}.csv', 'w') as fp: # #csv_file = csv.writer(fp) # #count = 0 # for post in user.posts: # with open(f'../tmp/{user.name}.csv', 'w') as fp: # if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]': # #csv_file.writerow([post.get('id'), time.ctime(post['created_utc']), post.get('subreddit'), post.get('selftext').replace('\n', ' ')]) # fp.write(post.get('selftext').replace('\n', ' ')) # fp.write('\n') # following code explores saving user posts per user per post # for user in reddit.get_users(): # dirpath = '../user_posts/'+user.name # os.mkdir(dirpath) # for post in user.posts: # if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]': # filepath = os.path.join(dirpath, post.get('id')+'.txt') # with open(filepath, 'w') as fp: # fp.write(post.get('selftext').replace('\n',' ')) # following code save all user posts into one file # with open (f'../all_posts/all.txt', 'w') as fp: # for user in reddit.get_users(): # print('Processing ' + str(user.name) + ' \'s history') # for post in user.posts: # if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]': # fp.write(post.get('selftext').replace('\n', ' ')) # fp.write('\n') # the following code saves a text file per user for user in reddit.get_users(): with open(f'../user_history/{user.name}.txt', 'w') as fp: for post in user.posts: if 'selftext' in post and post[ 'selftext'] and post['selftext'] != '[removed]': fp.write(post.get('selftext').replace('\n', ' ')) fp.write('\n')