def process_podcasts(podcasts): aggregate_result = AggregateResult('Podcasts') for podcast in podcasts: minutes = podcast.count hours = minutes / 60 podcast_date_key = transform_datetime_to_iso_date_str( podcast.tweet.created_at) if podcast_date_key not in aggregate_result.timeline: aggregate_result.timeline[podcast_date_key] = defaultdict(int) aggregate_result.timeline[podcast_date_key][ podcast.classification] += hours aggregate_result.item_count += 1 aggregate_result.kcv += hours podcast_report_entry = create_podcast_report_entry(podcast, minutes) aggregate_result.report_entries.append(podcast_report_entry) aggregate_result.report_entries.sort( key=lambda podcast_report_entry: podcast_report_entry.start_date, reverse=True) return aggregate_result
def process_blogs(blogs): aggregate_result = AggregateResult('Blogs') for blog in blogs: words = blog.count blog_date_key = transform_datetime_to_iso_date_str(blog.tweet.created_at) minutes = words / average_blog_reading_speed if minutes > 30: minutes = 30 hours = minutes / 60 if blog_date_key not in aggregate_result.timeline: aggregate_result.timeline[blog_date_key] = defaultdict(int) aggregate_result.timeline[blog_date_key][blog.classification] += hours aggregate_result.item_count += 1 aggregate_result.kcv += hours blog_report_entry = create_blog_report_entry(blog, minutes) aggregate_result.report_entries.append(blog_report_entry) aggregate_result.report_entries.sort(key=lambda blog_report_entry: blog_report_entry.start_date, reverse=True) return aggregate_result
def process_tweets(tweets): aggregate_result = AggregateResult('Tweets') for tweet in tweets: words = tweet.count tweet_date_key = transform_datetime_to_iso_date_str(tweet.created_at) minutes = words / average_tweet_reading_speed hours = minutes / 60 if tweet_date_key not in aggregate_result.timeline: aggregate_result.timeline[tweet_date_key] = defaultdict(int) aggregate_result.timeline[tweet_date_key][ tweet.classification] += hours aggregate_result.item_count += 1 aggregate_result.kcv += hours tweet_report_entry = create_tweet_report_entry(tweet, minutes) aggregate_result.report_entries.append(tweet_report_entry) aggregate_result.report_entries.sort( key=lambda tweet_report_entry: tweet_report_entry.start_date, reverse=True) return aggregate_result
def process_conferences(conferences): aggregate_result = AggregateResult('Conferences') for conference in conferences: conference.title = conference.full_text conference.start_date = conference.created_at conference.stop_date = conference.created_at conference.length = 30 if conference.full_text.lower().startswith("i'm at"): conference.length = 60 if conference.full_text.find("^") != -1: try: counter = conference.full_text[conference.full_text \ .find("^") + 1:] \ .split("\n")[0] \ .replace("m", "") \ .replace("in", "") \ .split(" ")[0] conference.length = int(counter) except: print(f'Could not parse counter: {conference.full_text}') total_hours = conference.length / 60 knowlege_consumption_velocity = total_hours conference_date_key = transform_datetime_to_iso_date_str( conference.created_at) if conference_date_key not in aggregate_result.timeline: aggregate_result.timeline[conference_date_key] = defaultdict(int) aggregate_result.timeline[conference_date_key][ conference.classification] += knowlege_consumption_velocity aggregate_result.item_count += 1 aggregate_result.kcv += knowlege_consumption_velocity conference_report_entry = create_book_report_entry(conference, 1, is_book=False, is_conference=True) aggregate_result.report_entries.append(conference_report_entry) aggregate_result.report_entries.sort( key=lambda conference_report_entry: conference_report_entry.start_date, reverse=True) return aggregate_result
def process_books(report_start_date, report_stop_date, books): aggregate_result = AggregateResult('Books') for book in books: pages = book.pages - devaiation_book_pages count = pages * average_words_per_page days_to_read_book = (book.stop_date - book.start_date).days + 1 total_minutes = count / average_reading_speed_in_minutes total_hours = total_minutes / 60 average_knowlege_consumption_velocity = total_hours / days_to_read_book days_overlap = 0 date_list = [ book.stop_date - timedelta(days=day) for day in range(0, days_to_read_book) ] for book_date in date_list: book_date_key = transform_datetime_to_iso_date_str(book_date) if report_start_date <= book_date <= report_stop_date: if book_date_key not in aggregate_result.timeline: aggregate_result.timeline[book_date_key] = defaultdict(int) aggregate_result.timeline[book_date_key][ book. classification] += average_knowlege_consumption_velocity # words = average_knowlege_consumption_velocity * 60 * average_reading_speed_in_minutes # aggregate_result.timeline[book_date_key][book.classification] += int(words) days_overlap += 1 distribution_percent = days_overlap / days_to_read_book aggregate_result.item_count += distribution_percent aggregate_result.kcv += average_knowlege_consumption_velocity * days_overlap book_report_entry = create_book_report_entry(book, distribution_percent) aggregate_result.report_entries.append(book_report_entry) aggregate_result.report_entries.sort( key=lambda book_report_entry: book_report_entry.start_date, reverse=True) return aggregate_result
def process_pairings(pairings): aggregate_result = AggregateResult('Pairing') for pairing in pairings: pairing.title = pairing.full_text pairing.start_date = pairing.created_at pairing.stop_date = pairing.created_at pairing.length = 15 if pairing.full_text.find("^") != -1: try: counter = pairing.full_text[pairing.full_text.find("^") + 1:].split("\n")[0].replace( "m", "") pairing.length = int(counter) except: print(f'Could not parse counter: {pairing.full_text}') total_hours = pairing.length / 60 knowlege_consumption_velocity = total_hours pairing_date_key = transform_datetime_to_iso_date_str( pairing.created_at) if pairing_date_key not in aggregate_result.timeline: aggregate_result.timeline[pairing_date_key] = defaultdict(int) aggregate_result.timeline[pairing_date_key][ pairing.classification] += knowlege_consumption_velocity aggregate_result.item_count += 1 aggregate_result.kcv += knowlege_consumption_velocity pairing_report_entry = create_book_report_entry(pairing, 1, is_book=False, is_pairing=True) aggregate_result.report_entries.append(pairing_report_entry) aggregate_result.report_entries.sort( key=lambda pairing_report_entry: pairing_report_entry.start_date, reverse=True) return aggregate_result
def process_videos(videos): aggregate_result = AggregateResult('Videos') for video in videos: video.title = video.full_text video.start_date = video.created_at video.stop_date = video.created_at video.length = 2.5 if video.full_text.find("^") != -1: try: counter = video.full_text[video.full_text.find("^") + 1:].split("\n")[0].replace("m", "") video.length = int(counter) except: print(f'Could not parse counter: {video.full_text}') total_hours = video.length / 60 knowlege_consumption_velocity = total_hours video_date_key = transform_datetime_to_iso_date_str(video.created_at) if video_date_key not in aggregate_result.timeline: aggregate_result.timeline[video_date_key] = defaultdict(int) aggregate_result.timeline[video_date_key][ video.classification] += knowlege_consumption_velocity aggregate_result.item_count += 1 aggregate_result.kcv += knowlege_consumption_velocity video_report_entry = create_book_report_entry(video, 1, is_book=False, is_video=True) aggregate_result.report_entries.append(video_report_entry) aggregate_result.report_entries.sort( key=lambda video_report_entry: video_report_entry.start_date, reverse=True) return aggregate_result
def process_audio_books(report_start_date, report_stop_date, audio_books): aggregate_result = AggregateResult('Audio Books') for audio_book in audio_books: days_to_hear_book = (audio_book.stop_date - audio_book.start_date).days + 1 total_hours = audio_book.length / 60 average_knowlege_consumption_velocity = total_hours / days_to_hear_book days_overlap = 0 date_list = [ audio_book.stop_date - timedelta(days=day) for day in range(0, days_to_hear_book) ] for book_date in date_list: book_date_key = transform_datetime_to_iso_date_str(book_date) if report_start_date <= book_date <= report_stop_date: if book_date_key not in aggregate_result.timeline: aggregate_result.timeline[book_date_key] = defaultdict(int) aggregate_result.timeline[book_date_key][ audio_book. classification] += average_knowlege_consumption_velocity days_overlap += 1 distribution_percent = days_overlap / days_to_hear_book aggregate_result.item_count += distribution_percent aggregate_result.kcv += average_knowlege_consumption_velocity * days_overlap audio_book_report_entry = create_book_report_entry( audio_book, distribution_percent, is_book=False) aggregate_result.report_entries.append(audio_book_report_entry) aggregate_result.report_entries.sort(key=lambda audio_book_report_report: audio_book_report_report.start_date, reverse=True) return aggregate_result
def process_github(report_start_date, report_stop_date): start_date = report_start_date.date() stop_date = report_stop_date.date() aggregate_result = AggregateResult('GitHub') with open(repository_commits_json_file, 'r') as infile: github_commits = json.load(infile) filtered_commits = [] for repository, weekly_commits in github_commits.items(): for weekly_commit in weekly_commits: if not weekly_commit['count']: continue commit_start_date = datetime.strptime(weekly_commit['start_date'], '%Y-%m-%d').date() commit_stop_date = datetime.strptime(weekly_commit['stop_date'], '%Y-%m-%d').date() if commit_start_date <= stop_date and commit_stop_date >= start_date: filtered_commits.append(weekly_commit) for weekly_commit in filtered_commits: number_days_week = 7 minutes_per_hour = 60 commits_per_day = weekly_commit['count'] / 7 minutes = weekly_commit['count'] * github_commit_minute_count minutes_per_day = minutes / number_days_week hours_per_day = minutes_per_day / minutes_per_hour commit_day = datetime.strptime(weekly_commit['start_date'], '%Y-%m-%d').date() weekly_commit_stop_date = datetime.strptime(weekly_commit['stop_date'], '%Y-%m-%d').date() days_overlap = 0 while commit_day <= weekly_commit_stop_date: if start_date <= commit_day <= stop_date: days_overlap += 1 tweet_date_key = commit_day.isoformat() if tweet_date_key not in aggregate_result.timeline: aggregate_result.timeline[tweet_date_key] = defaultdict( int) aggregate_result.timeline[tweet_date_key][ 'engineering'] += hours_per_day aggregate_result.item_count += commits_per_day aggregate_result.kcv += hours_per_day else: print() commit_day += timedelta(days=1) overlap = days_overlap / number_days_week github_report_entry = create_github_report_entry( weekly_commit, minutes_per_day, overlap) aggregate_result.report_entries.append(github_report_entry) # aggregate_result.report_entries.sort(key=lambda github_report_entry: github_report_entry.start_date, reverse=True) return aggregate_result