Python Riffdataの例、riffdata.riffdata.Riffdata Pythonの例

コード例 #1

0

ファイルを表示

def do_analysis():
    # get a meeting
    riffdata = Riffdata()
    test_meetings = ['plg-147-l2t0dt-1', 'plg-206-uzw00g-3']
    meeting = riffdata.get_meeting(test_meetings[0])
    Riffdata.print_meeting(meeting)

    process_utterances(meeting)
    participant_uts = meeting['participant_uts']
    print_participant_utterance_counts(participant_uts)

    verts, colors = get_utterances_as_polycollection(participant_uts)
    bars = PolyCollection(verts, facecolors=colors)

    fig, ax = plt.subplots()
    ax.add_collection(bars)
    ax.autoscale()
    loc = mdates.MinuteLocator(byminute=[0, 15, 30, 45])
    ax.xaxis.set_major_locator(loc)
    ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(loc))

    ax.set_yticks([*range(1, len(participant_uts) + 1)])
    ax.set_yticklabels(list(participant_uts))

    fig.savefig('meeting_timeline.png', dpi=288)

コード例 #2

0

ファイルを表示

ファイル: utterance_duration.py プロジェクト: mlippert/analyze-data

def do_analysis():
    riffdata = Riffdata()
    meetings = riffdata.get_meetings_with_participant_utterances()

    print(f'Found utterances from {len(meetings)} meetings')
    meeting_ids = list(meetings)

    # Just to get an idea of what is in a meeting w/ participant utterances print 1
    _print_participant_uts_info(meeting_ids[0], meetings[meeting_ids[0]])

    durations = get_utterance_durations(meetings)
    print(f'Found {len(durations)} utterances')

    durations.sort()
    print(
        f'shortest utterance was {durations[0]}ms and longest was {durations[-1]}ms'
    )

    buckets, bucket_cnt, graph_ranges = _distribute_durations(durations)
    _print_bucket_data(buckets, bucket_cnt)

    x, y = _make_xy_sets_to_plot(x_src=buckets,
                                 y_src=bucket_cnt,
                                 ranges=graph_ranges)

    # pylint: disable=consider-using-enumerate
    fig, ax = plt.subplots(len(x), 1)
    for plot in range(0, len(x)):
        my_plotter(ax[plot], x[plot], y[plot], {'marker': 'x'})

    fig.savefig('plot.png')

コード例 #3

0

ファイルを表示

def do_analysis():
    riffdata = Riffdata()
    meetings = riffdata.get_meetings_with_participant_utterances()

    print(f'Found utterances from {len(meetings)} meetings')

    gaps = get_utterance_gaps(meetings)

    x = np.array(gaps)
    fig, ax = plt.subplots()
    # the histogram of the data (see example: https://matplotlib.org/gallery/statistics/histogram_features.html)
    num_bins = 50
    ax.hist(x, num_bins, range=(0, 4000))

    fig.savefig('plot_gap.png')

コード例 #4

0

ファイルを表示

def do_analysis():
    riffdata = Riffdata()
    meetings = riffdata.get_meetings_with_participant_utterances()

    print(f'Found utterances from {len(meetings)} meetings')

    zerolen_ut_distribution = get_zerolen_ut_distribution(meetings)

    percentile_size = 100 // len(zerolen_ut_distribution)
    x = np.array(range(percentile_size, 101, percentile_size))
    y = np.array(zerolen_ut_distribution)

    # pylint: disable=consider-using-enumerate
    fig, ax = plt.subplots()
    my_plotter(ax, x, y, {'marker': 'x'})

    fig.savefig('plot_0_distrib.png')

コード例 #5

0

ファイルを表示

def do_analysis(meeting_date_range=(None, None),
                room_detail: str = 'count',
                *,
                report_format: str = 'human') -> None:
    """
    Analyze the meetings in the requested date range and write a report
    """
    riffdata = Riffdata()
    meeting_data = MeetingsData(riffdata, meeting_date_range)

    if report_format == 'human':
        write_human_meeting_report(meeting_data, RoomDetailLevel(room_detail))
    elif report_format == 'yaml':
        write_yaml_meeting_report(meeting_data)

コード例 #6

0

ファイルを表示

ファイル: interruptions.py プロジェクト: andrew-reilly19/analyze-data

def average_interruptions_chart(participant_Id):
    riffdata = Riffdata()
    db_name = 'riff_one_interrupt'

    #creating the database
    create_single_db(participant_Id, db_name)
    interrupt_db = riffdata.client[db_name]

    #putting utterances into data frame
    utterances = interrupt_db.utterances.find({})
    utteranceDF = pd.DataFrame(list(utterances))
    utteranceDF = utteranceDF.drop(['volumes', '__v'], axis=1)
    clean_interrupt_DF(utteranceDF)

    #removing utterances less than 10 milliseconds
    #utteranceDF = utteranceDF[utteranceDF['length_seconds']>=.01]

    #getting list of meetings to analyze
    meetings = utteranceDF.meeting.unique()
    meetingsDF = pd.DataFrame(meetings)
    meetingsDF.columns = ['Meeting']
    compile_interruptions(meetingsDF, utteranceDF, participant_Id)
    meetingsDF = meetingsDF.sort_values(by=['startTime'])
    #print(meetingsDF.head(20))

    #making interruptions plot
    fig = plt.figure()
    plt.scatter(meetingsDF.startTime, meetingsDF.interruptions_per_min)
    plt.xlabel('Date')
    plt.ylabel('Interruptions per Min')
    fig.suptitle('Interruptions over time for participant: %s' %
                 participant_Id)
    plt.savefig('interruptions.png')
    plt.clf()

    #making affirmations plot
    fig = plt.figure()
    plt.scatter(meetingsDF.startTime, meetingsDF.affirmations_per_min)
    plt.xlabel('Date')
    plt.ylabel('Affirmations per Min')
    fig.suptitle('Affirmations over time for participant: %s' % participant_Id)
    plt.savefig('affirmations.png')
    plt.clf()

    #dropping the database again
    drop_single_db(db_name)

コード例 #7

0

ファイルを表示

ファイル: meetings.py プロジェクト: andrew-reilly19/analyze-data

def do_analysis(meeting_date_range=(None, None), room_detail: str = 'count'):
    riffdata = Riffdata()

    # display strings for the start and end of the date range
    from_constraint = ''
    to_constraint = ''
    # constraints for the query to implement the date range
    startTimeConstraints = {}
    if meeting_date_range[0] is not None:
        startTimeConstraints['$gte'] = meeting_date_range[0]
        from_constraint = f'from {meeting_date_range[0]:%b %d %Y}'
    if meeting_date_range[1] is not None:
        startTimeConstraints['$lt'] = meeting_date_range[1]
        to_constraint = f'through {meeting_date_range[1]:%b %d %Y}'

    qry = None
    if len(startTimeConstraints) > 0:
        qry = {'startTime': startTimeConstraints}

    meetings = riffdata.get_meetings(qry)

    if len(meetings) == 0:
        print('There were no meetings found')
        return

    first_meeting_start = min(meetings, key=lambda m: m['startTime'])['startTime']
    last_meeting_start = max(meetings, key=lambda m: m['startTime'])['startTime']
    print(f'There were {len(meetings)} meetings {from_constraint} {to_constraint}\n'
          f'  first meeting on: {first_meeting_start:%b %d %Y}\n'
          f'  last meeting on:  {last_meeting_start:%b %d %Y}\n')

    init_partcnt_cnt: MutableMapping[int, int] = {}  # this exists solely to define the type for reduce below
    meetings_w_num_participants = reduce(inc_cnt, [len(meeting['participants'])
                                                   for meeting in meetings], init_partcnt_cnt)
    print('Number of meetings grouped by number of participants:')
    pprint.pprint(meetings_w_num_participants)
    print()

    # filter the meetings list to exclude meetings w/ only 1 participant
    meetings = [meeting for meeting in meetings if len(meeting['participants']) > 1]

    if len(meetings) == 0:
        print('There were no meetings with more than 1 participant')
        return
    else:
        print(f'Number of meetings with more than 1 participant was {len(meetings)}')

    meeting_duration_distribution = [
        [5, 0],
        [10, 0],
        [20, 0],
        [40, 0],
        [60, 0],
        [120, 0],
        [180, 0],
        [720, 0],
    ]

    meeting_durations = [meeting['meetingLengthMin'] for meeting in meetings]
    reduce(inc_bucket, meeting_durations, meeting_duration_distribution)
    print(f'The {len(meetings)} meetings grouped by meeting length in minutes:')
    # pprint.pprint(meeting_duration_distribution)
    print_buckets(meeting_duration_distribution)
    print()

    longest_meeting = max(meetings, key=lambda m: m['meetingLengthMin'])
    print('The longest meeting was:')
    Riffdata.print_meeting(longest_meeting)
    print()

    avg_meeting_duration = sum(meeting_durations) / len(meeting_durations)
    print(f'Average length of a meeting was {avg_meeting_duration:.1f} minutes\n')

    # find the set of unique participants in these meetings
    all_meeting_participants: Set[str] = set().union(*[meeting['participants'] for meeting in meetings])
    print(f'Total number of participants in these meetings was {len(all_meeting_participants)}\n')

    # print the requested room details
    print_room_details(meetings, RoomDetailLevel(room_detail))

コード例 #8

0

ファイルを表示

ファイル: meetings.py プロジェクト: andrew-reilly19/analyze-data

def print_room_details(meetings, detail_level: RoomDetailLevel = RoomDetailLevel.COUNT) -> None:
    """
    Print the room details for the rooms used by the given meetings

    Level:

    - NONE: no room details are printed
    - COUNT: every room name is listed with the number of times that room was used for a meeting
    - SUMMARY: every room name is listed with a summary of the usage of that room including
        - number of times the room was used
        - shortest and longest meeting times in the room
        - fewest and most participants in the room
    - SUMMARY_ATTENDEES: SUMMARY information and a list of all participants with a count of the
      number of meetings in the room that participant attended
    - ALL_MEETINGS: every room name is listed followed by complete details of each meeting that
      took place in the room
    """
    if detail_level is RoomDetailLevel.NONE:
        # print nothing
        return

    # reorganize the meetings by room
    # dict of room name to room dict containing summary values and list of meetings
    rooms: MutableMapping[str, MutableMapping[str, Any]] = {}
    for meeting in meetings:
        room_name = meeting['room']
        if room_name in rooms:
            rooms[room_name]['meetings'].append(meeting)
        else:
            rooms[room_name] = {'meetings': [meeting]}

    # all detail levels except none show how many rooms were used by the meetings
    print(f'{len(rooms)} rooms used')

    if detail_level is RoomDetailLevel.COUNT:
        # print each room and a count of how many times it was used
        print('Count of the number of times a meeting room was used:')
        for room_name, room in rooms.items():
            print(f'{room_name}: {len(room["meetings"])}')
        return

    if detail_level is RoomDetailLevel.ALL_MEETINGS:
        # print each room and a count of how many times it was used
        for room_name, room in rooms.items():
            print(f'{room_name}: {len(room["meetings"])}')
            for meeting in room['meetings']:
                Riffdata.print_meeting(meeting)
                print()
        return

    # compute room summary details
    for room in rooms.values():
        set_room_summary(room)

    if detail_level is RoomDetailLevel.SUMMARY or detail_level is RoomDetailLevel.SUMMARY_ATTENDEES:
        # print summary information about the meetings in each room
        for room_name, room in rooms.items():
            # shorter var names for summary info (I think we can do even better)
            shortest_meeting = room['summary']['shortest_meeting']
            longest_meeting = room['summary']['longest_meeting']
            avg_meeting = room['summary']['avg_meeting']
            fewest_participants = room['summary']['fewest_participants']
            most_participants = room['summary']['most_participants']
            room_participants = room['summary']['room_participants']

            print(f'{room_name}: {len(room["meetings"])} meetings')

            if fewest_participants == most_participants:
                print(f'\tattended by {fewest_participants} participants')
            else:
                print(f'\tattended by {fewest_participants} - {most_participants} participants')

            if shortest_meeting == longest_meeting:
                print(f'\tlasting {shortest_meeting:.1f} minutes')
            else:
                print(f'\tlasting from {shortest_meeting:.1f} to {longest_meeting:.1f} minutes'
                      f' (avg: {avg_meeting:.1f})')

            if detail_level is RoomDetailLevel.SUMMARY_ATTENDEES:
                print('\troom participants (# of meetings)')
                for p, cnt in room_participants.items():
                    print(f'\t\t{p} ({cnt})')

            print()
        return

コード例 #9

0

ファイルを表示

ファイル: interruptions.py プロジェクト: andrew-reilly19/analyze-data

def drop_single_db(new_db_name):
    riffdata = Riffdata()
    riffdata.client.drop_database(new_db_name)

コード例 #10

0

ファイルを表示

ファイル: interruptions.py プロジェクト: andrew-reilly19/analyze-data

def create_single_db(participantId, new_db_name):
    riffdata = Riffdata()
    riffdata.create_single_participant_db(participantId, new_db_name)