def get_authors_timeline(author: str, topics: List[str]) -> AuthorTimeline: reddit = Reddit(config.data_location) posts: Dict[str, TimelinePost] = {} for topic in topics: df = TopicsDFCache.load(topic) filtered_df = df[(df.Author == author)] for _, row in filtered_df.iterrows(): post_id = row['SeqId'] sentence_number = row['InstNo'] text = row['Text'] sent = TimelineSentence(sentence_number, text, topic) if post_id not in posts: create_time = reddit.get_post(post_id)['created_utc'] posts[post_id] = TimelinePost(post_id, create_time) posts[post_id].sentences = list( sorted(posts[post_id].sentences + [sent], key=lambda x: x.number)) sorted_posts = list(sorted(posts.values(), key=lambda x: x.timestamp)) # selected_posts = list() # # year_dict = {2012:0,2013:0,2014:0,2015:0,2016:0,2017:0,2018:0} # month_dict = {1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0} # # for item in sorted_posts: # if date.fromtimestamp(item.timestamp).year in year_dict.keys(): # year_dict[date.fromtimestamp(item.timestamp).year] += 1 # year = max(year_dict.items(), key=operator.itemgetter(1))[0] # for item in sorted_posts: # if date.fromtimestamp(item.timestamp).year == year: # if date.fromtimestamp(item.timestamp).month in month_dict.keys(): # month_dict[date.fromtimestamp(item.timestamp).month] += 1 # month = max(month_dict.items(),key=operator.itemgetter(1))[0] # # for item in sorted_posts: # if date.fromtimestamp(item.timestamp).year==year and date.fromtimestamp(item.timestamp).month==month # selected_posts.append(item.sentences) # print(f'{author} most frequent year {year} and month {month}') return AuthorTimeline(author, sorted_posts)