def __init__(self, chat_df, students_df, filter_modes, time_delta,
                 start_sentence, zoom_names_to_ignore):
        """
        :param chat_df: zoom chat (df)
        :param students_df: student class raw data (df)
        :param filter_modes: filters the user picked for parsing the text file (list of str)
        :param time_delta: max time from start sentence to the last message to parse in each session in minutes (int)
        :param start_sentence: start sentence that initiate sessions for parse (str)
        :param zoom_names_to_ignore: zoom names that will not be considered (list of str)
        :return: data frame with the data from the chat
        """
        meta_data = AttendanceMetaData(
            filter_modes=filter_modes,
            time_delta=time_delta,
            start_sentence=start_sentence,
            zoom_names_to_ignore=zoom_names_to_ignore)

        self.first_message_time = chat_df["time"].sort_values().iloc[
            0]  # get time of first message in the chat
        start_indices = Attendance.get_start_indices(chat_df, meta_data)
        df_students_for_report = students_df.set_index("id").astype(
            str).reset_index()  # set all columns to str except the id
        self._df_students = df_students_for_report

        self._sessions = []
        for ind in range(len(start_indices)):
            df_session = Attendance.get_df_of_time_segment(
                chat_df, start_indices, ind, time_delta)
            self._sessions.append(
                Session(self._df_students, df_session, meta_data))
Example #2
0
 def test_get_start_indices(self, folders, student_df, chat_df_func,
                            filter_modes, file_name, time_delta,
                            start_sentence, zoom_names_to_ignore,
                            num_sessions):
     chat_df = chat_df_func(os.path.join(folders["chat_folder"], file_name))
     meta_data = AttendanceMetaData(
         filter_modes=filter_modes,
         time_delta=time_delta,
         start_sentence=start_sentence,
         zoom_names_to_ignore=zoom_names_to_ignore)
     start_indices = Attendance.get_start_indices(chat_df, meta_data)
     assert len(start_indices) == num_sessions
    def test_get_relevant_chat_in_session(self, folders, chat_df_func, df_students_func, filter_modes,
                                chat_file_name, excel_file_name, time_delta, start_sentence, zoom_names_to_ignore, counting_dict):

        chat_df = chat_df_func(os.path.join(folders["chat_files_folder"], chat_file_name))
        student_df = df_students_func(os.path.join(folders["student_list_files_folder"], excel_file_name))
        meta_data = AttendanceMetaData(filter_modes=filter_modes, time_delta=time_delta,
                                       start_sentence=start_sentence, zoom_names_to_ignore=zoom_names_to_ignore)
        start_indices = Attendance.get_start_indices(chat_df, meta_data)
        df_students_for_report, report_df = Attendance.prepare_chat_and_classroom(student_df, chat_df)

        for ind in range(len(start_indices)):
            df_session = Attendance.get_df_of_time_segment(chat_df, start_indices, ind, time_delta)
            session_part = Session.get_relevant_chat_in_session(df_students_for_report, df_session, meta_data)
            assert session_part.shape[0] == counting_dict["total_messages"][ind]
            assert (session_part["relevant"] == 1).sum() == counting_dict["relevant_messages"][ind]
Example #4
0
 def test_get_df_of_time_segment(self, folders, student_df, chat_df_func,
                                 filter_modes, file_name, time_delta,
                                 start_sentence, zoom_names_to_ignore,
                                 list_rows_in_session):
     chat_df = chat_df_func(os.path.join(folders["chat_folder"], file_name))
     meta_data = AttendanceMetaData(
         filter_modes=filter_modes,
         time_delta=time_delta,
         start_sentence=start_sentence,
         zoom_names_to_ignore=zoom_names_to_ignore)
     start_indices = Attendance.get_start_indices(chat_df, meta_data)
     for ind in range(len(start_indices)):
         df_session = Attendance.get_df_of_time_segment(
             chat_df, start_indices, ind, time_delta)
         assert df_session.shape[0] == list_rows_in_session[ind]
    def test_first_message_time(self, folders, chat_df_func, df_students_func, filter_modes,
                                chat_file_name, excel_file_name, time_delta, start_sentence, zoom_names_to_ignore, message_time_tuples_list):
        chat_df = chat_df_func(os.path.join(folders["chat_files_folder"], chat_file_name))
        student_df = df_students_func(os.path.join(folders["student_list_files_folder"], excel_file_name))
        meta_data = AttendanceMetaData(filter_modes=filter_modes, time_delta=time_delta,
                                       start_sentence=start_sentence, zoom_names_to_ignore=zoom_names_to_ignore)

        start_indices = Attendance.get_start_indices(chat_df, meta_data)
        df_students_for_report = student_df.set_index("id").astype(str).reset_index()

        for ind in range(len(start_indices)):
            df_session = Attendance.get_df_of_time_segment(chat_df, start_indices, ind, time_delta)
            current_session = Session(df_students_for_report, df_session, meta_data)
            assert current_session._first_message_time.hour == message_time_tuples_list[ind][0]
            assert current_session._first_message_time.minute == message_time_tuples_list[ind][1]
            assert current_session._first_message_time.second == message_time_tuples_list[ind][2]
Example #6
0
    def __init__(self, chat_df, students_df, filter_modes, time_delta, start_sentence, zoom_names_to_ignore):
        """
        :param chat_df: zoom chat (df)
        :param students_df: student class raw data (df)
        :param filter_modes: filters the user picked for parsing the text file (list of str)
        :param time_delta: max time from start sentence to the last message to parse in each session in minutes (int)
        :param start_sentence: start sentence that initiate sessions for parse (str)
        :param zoom_names_to_ignore: zoom names that will not be considered (list of str)
        :return: data frame with the data from the chat
        """
        # create a nametuple for several values
        meta_data = AttendanceMetaData(filter_modes=filter_modes, time_delta=time_delta,
                                       start_sentence=start_sentence.lower(), zoom_names_to_ignore=zoom_names_to_ignore)

        self.first_message_time = chat_df["time"].sort_values().iloc[0] # get time of first message in the chat
        start_indices = Attendance.get_start_indices(chat_df, meta_data)
        self._df_students, report_df = Attendance.prepare_chat_and_classroom(students_df, chat_df)

        self._sessions = []
        for ind in range(len(start_indices)):
            df_session = Attendance.get_df_of_time_segment(report_df, start_indices, ind, time_delta)
            self._sessions.append(Session(self._df_students, df_session, meta_data))