Example #1
0
def get_authors_timeline(author: str, topics: List[str]) -> AuthorTimeline:
    reddit = Reddit(config.data_location)
    posts: Dict[str, TimelinePost] = {}

    for topic in topics:
        df = TopicsDFCache.load(topic)
        filtered_df = df[(df.Author == author)]
        for _, row in filtered_df.iterrows():
            post_id = row['SeqId']
            sentence_number = row['InstNo']
            text = row['Text']
            sent = TimelineSentence(sentence_number, text, topic)
            if post_id not in posts:
                create_time = reddit.get_post(post_id)['created_utc']
                posts[post_id] = TimelinePost(post_id, create_time)
            posts[post_id].sentences = list(
                sorted(posts[post_id].sentences + [sent],
                       key=lambda x: x.number))

    sorted_posts = list(sorted(posts.values(), key=lambda x: x.timestamp))

    # selected_posts = list()
    #
    # year_dict = {2012:0,2013:0,2014:0,2015:0,2016:0,2017:0,2018:0}
    # month_dict = {1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0}
    #
    # for item in sorted_posts:
    #     if date.fromtimestamp(item.timestamp).year in year_dict.keys():
    #         year_dict[date.fromtimestamp(item.timestamp).year] += 1
    # year = max(year_dict.items(), key=operator.itemgetter(1))[0]
    # for item in sorted_posts:
    #     if date.fromtimestamp(item.timestamp).year == year:
    #         if date.fromtimestamp(item.timestamp).month in month_dict.keys():
    #             month_dict[date.fromtimestamp(item.timestamp).month] += 1
    # month = max(month_dict.items(),key=operator.itemgetter(1))[0]
    #
    # for item in sorted_posts:
    #     if date.fromtimestamp(item.timestamp).year==year and date.fromtimestamp(item.timestamp).month==month
    #             selected_posts.append(item.sentences)

    # print(f'{author} most frequent year {year} and month {month}')
    return AuthorTimeline(author, sorted_posts)