def do_analysis(): # get a meeting riffdata = Riffdata() test_meetings = ['plg-147-l2t0dt-1', 'plg-206-uzw00g-3'] meeting = riffdata.get_meeting(test_meetings[0]) Riffdata.print_meeting(meeting) process_utterances(meeting) participant_uts = meeting['participant_uts'] print_participant_utterance_counts(participant_uts) verts, colors = get_utterances_as_polycollection(participant_uts) bars = PolyCollection(verts, facecolors=colors) fig, ax = plt.subplots() ax.add_collection(bars) ax.autoscale() loc = mdates.MinuteLocator(byminute=[0, 15, 30, 45]) ax.xaxis.set_major_locator(loc) ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(loc)) ax.set_yticks([*range(1, len(participant_uts) + 1)]) ax.set_yticklabels(list(participant_uts)) fig.savefig('meeting_timeline.png', dpi=288)
def do_analysis(): riffdata = Riffdata() meetings = riffdata.get_meetings_with_participant_utterances() print(f'Found utterances from {len(meetings)} meetings') meeting_ids = list(meetings) # Just to get an idea of what is in a meeting w/ participant utterances print 1 _print_participant_uts_info(meeting_ids[0], meetings[meeting_ids[0]]) durations = get_utterance_durations(meetings) print(f'Found {len(durations)} utterances') durations.sort() print( f'shortest utterance was {durations[0]}ms and longest was {durations[-1]}ms' ) buckets, bucket_cnt, graph_ranges = _distribute_durations(durations) _print_bucket_data(buckets, bucket_cnt) x, y = _make_xy_sets_to_plot(x_src=buckets, y_src=bucket_cnt, ranges=graph_ranges) # pylint: disable=consider-using-enumerate fig, ax = plt.subplots(len(x), 1) for plot in range(0, len(x)): my_plotter(ax[plot], x[plot], y[plot], {'marker': 'x'}) fig.savefig('plot.png')
def do_analysis(): riffdata = Riffdata() meetings = riffdata.get_meetings_with_participant_utterances() print(f'Found utterances from {len(meetings)} meetings') gaps = get_utterance_gaps(meetings) x = np.array(gaps) fig, ax = plt.subplots() # the histogram of the data (see example: https://matplotlib.org/gallery/statistics/histogram_features.html) num_bins = 50 ax.hist(x, num_bins, range=(0, 4000)) fig.savefig('plot_gap.png')
def do_analysis(): riffdata = Riffdata() meetings = riffdata.get_meetings_with_participant_utterances() print(f'Found utterances from {len(meetings)} meetings') zerolen_ut_distribution = get_zerolen_ut_distribution(meetings) percentile_size = 100 // len(zerolen_ut_distribution) x = np.array(range(percentile_size, 101, percentile_size)) y = np.array(zerolen_ut_distribution) # pylint: disable=consider-using-enumerate fig, ax = plt.subplots() my_plotter(ax, x, y, {'marker': 'x'}) fig.savefig('plot_0_distrib.png')
def do_analysis(meeting_date_range=(None, None), room_detail: str = 'count', *, report_format: str = 'human') -> None: """ Analyze the meetings in the requested date range and write a report """ riffdata = Riffdata() meeting_data = MeetingsData(riffdata, meeting_date_range) if report_format == 'human': write_human_meeting_report(meeting_data, RoomDetailLevel(room_detail)) elif report_format == 'yaml': write_yaml_meeting_report(meeting_data)
def average_interruptions_chart(participant_Id): riffdata = Riffdata() db_name = 'riff_one_interrupt' #creating the database create_single_db(participant_Id, db_name) interrupt_db = riffdata.client[db_name] #putting utterances into data frame utterances = interrupt_db.utterances.find({}) utteranceDF = pd.DataFrame(list(utterances)) utteranceDF = utteranceDF.drop(['volumes', '__v'], axis=1) clean_interrupt_DF(utteranceDF) #removing utterances less than 10 milliseconds #utteranceDF = utteranceDF[utteranceDF['length_seconds']>=.01] #getting list of meetings to analyze meetings = utteranceDF.meeting.unique() meetingsDF = pd.DataFrame(meetings) meetingsDF.columns = ['Meeting'] compile_interruptions(meetingsDF, utteranceDF, participant_Id) meetingsDF = meetingsDF.sort_values(by=['startTime']) #print(meetingsDF.head(20)) #making interruptions plot fig = plt.figure() plt.scatter(meetingsDF.startTime, meetingsDF.interruptions_per_min) plt.xlabel('Date') plt.ylabel('Interruptions per Min') fig.suptitle('Interruptions over time for participant: %s' % participant_Id) plt.savefig('interruptions.png') plt.clf() #making affirmations plot fig = plt.figure() plt.scatter(meetingsDF.startTime, meetingsDF.affirmations_per_min) plt.xlabel('Date') plt.ylabel('Affirmations per Min') fig.suptitle('Affirmations over time for participant: %s' % participant_Id) plt.savefig('affirmations.png') plt.clf() #dropping the database again drop_single_db(db_name)
def do_analysis(meeting_date_range=(None, None), room_detail: str = 'count'): riffdata = Riffdata() # display strings for the start and end of the date range from_constraint = '' to_constraint = '' # constraints for the query to implement the date range startTimeConstraints = {} if meeting_date_range[0] is not None: startTimeConstraints['$gte'] = meeting_date_range[0] from_constraint = f'from {meeting_date_range[0]:%b %d %Y}' if meeting_date_range[1] is not None: startTimeConstraints['$lt'] = meeting_date_range[1] to_constraint = f'through {meeting_date_range[1]:%b %d %Y}' qry = None if len(startTimeConstraints) > 0: qry = {'startTime': startTimeConstraints} meetings = riffdata.get_meetings(qry) if len(meetings) == 0: print('There were no meetings found') return first_meeting_start = min(meetings, key=lambda m: m['startTime'])['startTime'] last_meeting_start = max(meetings, key=lambda m: m['startTime'])['startTime'] print(f'There were {len(meetings)} meetings {from_constraint} {to_constraint}\n' f' first meeting on: {first_meeting_start:%b %d %Y}\n' f' last meeting on: {last_meeting_start:%b %d %Y}\n') init_partcnt_cnt: MutableMapping[int, int] = {} # this exists solely to define the type for reduce below meetings_w_num_participants = reduce(inc_cnt, [len(meeting['participants']) for meeting in meetings], init_partcnt_cnt) print('Number of meetings grouped by number of participants:') pprint.pprint(meetings_w_num_participants) print() # filter the meetings list to exclude meetings w/ only 1 participant meetings = [meeting for meeting in meetings if len(meeting['participants']) > 1] if len(meetings) == 0: print('There were no meetings with more than 1 participant') return else: print(f'Number of meetings with more than 1 participant was {len(meetings)}') meeting_duration_distribution = [ [5, 0], [10, 0], [20, 0], [40, 0], [60, 0], [120, 0], [180, 0], [720, 0], ] meeting_durations = [meeting['meetingLengthMin'] for meeting in meetings] reduce(inc_bucket, meeting_durations, meeting_duration_distribution) print(f'The {len(meetings)} meetings grouped by meeting length in minutes:') # pprint.pprint(meeting_duration_distribution) print_buckets(meeting_duration_distribution) print() longest_meeting = max(meetings, key=lambda m: m['meetingLengthMin']) print('The longest meeting was:') Riffdata.print_meeting(longest_meeting) print() avg_meeting_duration = sum(meeting_durations) / len(meeting_durations) print(f'Average length of a meeting was {avg_meeting_duration:.1f} minutes\n') # find the set of unique participants in these meetings all_meeting_participants: Set[str] = set().union(*[meeting['participants'] for meeting in meetings]) print(f'Total number of participants in these meetings was {len(all_meeting_participants)}\n') # print the requested room details print_room_details(meetings, RoomDetailLevel(room_detail))
def print_room_details(meetings, detail_level: RoomDetailLevel = RoomDetailLevel.COUNT) -> None: """ Print the room details for the rooms used by the given meetings Level: - NONE: no room details are printed - COUNT: every room name is listed with the number of times that room was used for a meeting - SUMMARY: every room name is listed with a summary of the usage of that room including - number of times the room was used - shortest and longest meeting times in the room - fewest and most participants in the room - SUMMARY_ATTENDEES: SUMMARY information and a list of all participants with a count of the number of meetings in the room that participant attended - ALL_MEETINGS: every room name is listed followed by complete details of each meeting that took place in the room """ if detail_level is RoomDetailLevel.NONE: # print nothing return # reorganize the meetings by room # dict of room name to room dict containing summary values and list of meetings rooms: MutableMapping[str, MutableMapping[str, Any]] = {} for meeting in meetings: room_name = meeting['room'] if room_name in rooms: rooms[room_name]['meetings'].append(meeting) else: rooms[room_name] = {'meetings': [meeting]} # all detail levels except none show how many rooms were used by the meetings print(f'{len(rooms)} rooms used') if detail_level is RoomDetailLevel.COUNT: # print each room and a count of how many times it was used print('Count of the number of times a meeting room was used:') for room_name, room in rooms.items(): print(f'{room_name}: {len(room["meetings"])}') return if detail_level is RoomDetailLevel.ALL_MEETINGS: # print each room and a count of how many times it was used for room_name, room in rooms.items(): print(f'{room_name}: {len(room["meetings"])}') for meeting in room['meetings']: Riffdata.print_meeting(meeting) print() return # compute room summary details for room in rooms.values(): set_room_summary(room) if detail_level is RoomDetailLevel.SUMMARY or detail_level is RoomDetailLevel.SUMMARY_ATTENDEES: # print summary information about the meetings in each room for room_name, room in rooms.items(): # shorter var names for summary info (I think we can do even better) shortest_meeting = room['summary']['shortest_meeting'] longest_meeting = room['summary']['longest_meeting'] avg_meeting = room['summary']['avg_meeting'] fewest_participants = room['summary']['fewest_participants'] most_participants = room['summary']['most_participants'] room_participants = room['summary']['room_participants'] print(f'{room_name}: {len(room["meetings"])} meetings') if fewest_participants == most_participants: print(f'\tattended by {fewest_participants} participants') else: print(f'\tattended by {fewest_participants} - {most_participants} participants') if shortest_meeting == longest_meeting: print(f'\tlasting {shortest_meeting:.1f} minutes') else: print(f'\tlasting from {shortest_meeting:.1f} to {longest_meeting:.1f} minutes' f' (avg: {avg_meeting:.1f})') if detail_level is RoomDetailLevel.SUMMARY_ATTENDEES: print('\troom participants (# of meetings)') for p, cnt in room_participants.items(): print(f'\t\t{p} ({cnt})') print() return
def drop_single_db(new_db_name): riffdata = Riffdata() riffdata.client.drop_database(new_db_name)
def create_single_db(participantId, new_db_name): riffdata = Riffdata() riffdata.create_single_participant_db(participantId, new_db_name)