def __init__(self, chat_df, students_df, filter_modes, time_delta, start_sentence, zoom_names_to_ignore): """ :param chat_df: zoom chat (df) :param students_df: student class raw data (df) :param filter_modes: filters the user picked for parsing the text file (list of str) :param time_delta: max time from start sentence to the last message to parse in each session in minutes (int) :param start_sentence: start sentence that initiate sessions for parse (str) :param zoom_names_to_ignore: zoom names that will not be considered (list of str) :return: data frame with the data from the chat """ meta_data = AttendanceMetaData( filter_modes=filter_modes, time_delta=time_delta, start_sentence=start_sentence, zoom_names_to_ignore=zoom_names_to_ignore) self.first_message_time = chat_df["time"].sort_values().iloc[ 0] # get time of first message in the chat start_indices = Attendance.get_start_indices(chat_df, meta_data) df_students_for_report = students_df.set_index("id").astype( str).reset_index() # set all columns to str except the id self._df_students = df_students_for_report self._sessions = [] for ind in range(len(start_indices)): df_session = Attendance.get_df_of_time_segment( chat_df, start_indices, ind, time_delta) self._sessions.append( Session(self._df_students, df_session, meta_data))
def test_get_start_indices(self, folders, student_df, chat_df_func, filter_modes, file_name, time_delta, start_sentence, zoom_names_to_ignore, num_sessions): chat_df = chat_df_func(os.path.join(folders["chat_folder"], file_name)) meta_data = AttendanceMetaData( filter_modes=filter_modes, time_delta=time_delta, start_sentence=start_sentence, zoom_names_to_ignore=zoom_names_to_ignore) start_indices = Attendance.get_start_indices(chat_df, meta_data) assert len(start_indices) == num_sessions
def test_get_relevant_chat_in_session(self, folders, chat_df_func, df_students_func, filter_modes, chat_file_name, excel_file_name, time_delta, start_sentence, zoom_names_to_ignore, counting_dict): chat_df = chat_df_func(os.path.join(folders["chat_files_folder"], chat_file_name)) student_df = df_students_func(os.path.join(folders["student_list_files_folder"], excel_file_name)) meta_data = AttendanceMetaData(filter_modes=filter_modes, time_delta=time_delta, start_sentence=start_sentence, zoom_names_to_ignore=zoom_names_to_ignore) start_indices = Attendance.get_start_indices(chat_df, meta_data) df_students_for_report, report_df = Attendance.prepare_chat_and_classroom(student_df, chat_df) for ind in range(len(start_indices)): df_session = Attendance.get_df_of_time_segment(chat_df, start_indices, ind, time_delta) session_part = Session.get_relevant_chat_in_session(df_students_for_report, df_session, meta_data) assert session_part.shape[0] == counting_dict["total_messages"][ind] assert (session_part["relevant"] == 1).sum() == counting_dict["relevant_messages"][ind]
def test_get_df_of_time_segment(self, folders, student_df, chat_df_func, filter_modes, file_name, time_delta, start_sentence, zoom_names_to_ignore, list_rows_in_session): chat_df = chat_df_func(os.path.join(folders["chat_folder"], file_name)) meta_data = AttendanceMetaData( filter_modes=filter_modes, time_delta=time_delta, start_sentence=start_sentence, zoom_names_to_ignore=zoom_names_to_ignore) start_indices = Attendance.get_start_indices(chat_df, meta_data) for ind in range(len(start_indices)): df_session = Attendance.get_df_of_time_segment( chat_df, start_indices, ind, time_delta) assert df_session.shape[0] == list_rows_in_session[ind]
def test_first_message_time(self, folders, chat_df_func, df_students_func, filter_modes, chat_file_name, excel_file_name, time_delta, start_sentence, zoom_names_to_ignore, message_time_tuples_list): chat_df = chat_df_func(os.path.join(folders["chat_files_folder"], chat_file_name)) student_df = df_students_func(os.path.join(folders["student_list_files_folder"], excel_file_name)) meta_data = AttendanceMetaData(filter_modes=filter_modes, time_delta=time_delta, start_sentence=start_sentence, zoom_names_to_ignore=zoom_names_to_ignore) start_indices = Attendance.get_start_indices(chat_df, meta_data) df_students_for_report = student_df.set_index("id").astype(str).reset_index() for ind in range(len(start_indices)): df_session = Attendance.get_df_of_time_segment(chat_df, start_indices, ind, time_delta) current_session = Session(df_students_for_report, df_session, meta_data) assert current_session._first_message_time.hour == message_time_tuples_list[ind][0] assert current_session._first_message_time.minute == message_time_tuples_list[ind][1] assert current_session._first_message_time.second == message_time_tuples_list[ind][2]
def __init__(self, chat_df, students_df, filter_modes, time_delta, start_sentence, zoom_names_to_ignore): """ :param chat_df: zoom chat (df) :param students_df: student class raw data (df) :param filter_modes: filters the user picked for parsing the text file (list of str) :param time_delta: max time from start sentence to the last message to parse in each session in minutes (int) :param start_sentence: start sentence that initiate sessions for parse (str) :param zoom_names_to_ignore: zoom names that will not be considered (list of str) :return: data frame with the data from the chat """ # create a nametuple for several values meta_data = AttendanceMetaData(filter_modes=filter_modes, time_delta=time_delta, start_sentence=start_sentence.lower(), zoom_names_to_ignore=zoom_names_to_ignore) self.first_message_time = chat_df["time"].sort_values().iloc[0] # get time of first message in the chat start_indices = Attendance.get_start_indices(chat_df, meta_data) self._df_students, report_df = Attendance.prepare_chat_and_classroom(students_df, chat_df) self._sessions = [] for ind in range(len(start_indices)): df_session = Attendance.get_df_of_time_segment(report_df, start_indices, ind, time_delta) self._sessions.append(Session(self._df_students, df_session, meta_data))