Example #1
0
def main():
    reddit = Reddit(config.data_location)

    # following code explores saving user posts per user
    for user in reddit.get_users():
        #os.mkdir('../tmp/{user.name}')
        with open(f'../tmp/{user.name}.csv', 'w') as fp:
            csv_file = csv.writer(fp)
            csv_file.writerow(['SeqId', 'InstNo', 'Author', 'Text'])
            for post in user.posts:
                #with open(f'../tmp/{user.name}.csv', 'w') as fp:
                if 'selftext' in post and post['selftext'] and post[
                        'selftext'] != '[removed]' and post[
                            'subreddit'] != 'makeupexchange':
                    content_post = post.get('selftext').replace('\n', ' ')
                    content_post = re.sub(r'\[.*?\]\(http\S+\)|http\S+',
                                          '',
                                          content_post,
                                          flags=re.MULTILINE)
                    content_post = nltk.tokenize.word_tokenize(content_post)
                    content_post = ' '.join(content_post)
                    content_post = nltk.tokenize.sent_tokenize(content_post)
                    for i in range(len(content_post) - 1):
                        content_post[i] = content_post[i] + ' <SENT>'
                    content_post = ' '.join(content_post)
                    csv_file.writerow(
                        [post.get('id'), 0, user.name, content_post])
Example #2
0
def main():
    reddit = Reddit(config.data_location)
    # following code explores saving user posts per user
    # for user in reddit.get_users():
    #     os.mkdir('../tmp/{user.name}')
    #     #with open(f'../tmp/{user.name}.csv', 'w') as fp:
    #         #csv_file = csv.writer(fp)
    #         #count = 0
    #         for post in user.posts:
    #             with open(f'../tmp/{user.name}.csv', 'w') as fp:
    #             if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]':
    #                 #csv_file.writerow([post.get('id'), time.ctime(post['created_utc']), post.get('subreddit'), post.get('selftext').replace('\n', ' ')])
    #                 fp.write(post.get('selftext').replace('\n', ' '))
    #                 fp.write('\n')
    # following code explores saving user posts per user per post
    # for user in reddit.get_users():
    #     dirpath = '../user_posts/'+user.name
    #     os.mkdir(dirpath)
    #     for post in user.posts:
    #         if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]':
    #             filepath = os.path.join(dirpath, post.get('id')+'.txt')
    #             with open(filepath, 'w') as fp:
    #                 fp.write(post.get('selftext').replace('\n',' '))

    # following code save all user posts into one file
    # with open (f'../all_posts/all.txt', 'w') as fp:
    #     for user in reddit.get_users():
    #         print('Processing ' + str(user.name) + ' \'s history')
    #         for post in user.posts:
    #             if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]':
    #                 fp.write(post.get('selftext').replace('\n', ' '))
    #                 fp.write('\n')
    # the following code saves a text file per user
    for user in reddit.get_users():
        with open(f'../user_history/{user.name}.txt', 'w') as fp:
            for post in user.posts:
                if 'selftext' in post and post[
                        'selftext'] and post['selftext'] != '[removed]':
                    fp.write(post.get('selftext').replace('\n', ' '))
                    fp.write('\n')