def test_data_preprocessing(self, param_data_preprocessing) -> None: output = check_output(self.__get_args(param_data_preprocessing)) # Delete the new folders remove_directory(os.path.join(get_parent_folder(DATA_PATH), f'{BASE_FOLDER_NAME}_{PREPROCESSING_OUTPUT_PREFIX}')) remove_directory(os.path.join(get_parent_folder(DATA_PATH), f'{BASE_FOLDER_NAME}_{PREPROCESSING_OUTPUT_PREFIX}_{RUNNING_TESTS_OUTPUT_DIRECTORY}'))
def __get_task_by_ct_file(file: str) -> Optional[TASK]: task_key = get_name_from_path(get_parent_folder(file), with_extension=False) try: return TASK(task_key) except ValueError: log.info(f'Unexpected task for the file {file}') return None
def create_directory_for_plots( path: str, folder: plot_consts.CHART_TYPE, file_name: str, statistics_folder_name: str = 'statistics') -> str: path = os.path.join(get_parent_folder(path), statistics_folder_name, str(folder)) create_directory(path) return os.path.join(path, file_name)
def plot_scoring_solutions(tt_file_path: str, name_prefix: str = 'scoring_solution') -> str: ct_df = pd.read_csv(tt_file_path, encoding=ISO_ENCODING) # Delete incorrect fragments correct_df = ct_df[ct_df.apply(lambda row: not __is_incorrect_fragment(row[TESTS_RESULTS]), axis=1)] correct_df[TESTS_RESULTS] = calculate_current_task_rate(correct_df) scores = correct_df[TASK_TRACKER_COLUMN.TESTS_RESULTS.value].values labels, graph_structure = get_labels_and_graph_structure(scores) solutions_representation = get_graph_representation(labels, graph_structure) output_path = get_parent_folder(tt_file_path) output_path = create_dot_graph(output_path, f'{get_name_from_path(tt_file_path, False)}_{name_prefix}', solutions_representation) return output_path
def get_files_from_ati(activity_tracker_data: pd.DataFrame) -> List[str]: paths = __remove_nan(activity_tracker_data[ consts.ACTIVITY_TRACKER_COLUMN.CURRENT_FILE.value].unique()) paths_dict = {} for current_path in paths: path = get_parent_folder(current_path) file = get_name_from_path(current_path) if paths_dict.get(file) is None: paths_dict[file] = path else: if paths_dict[file] != path: log_and_raise_error( 'Activity tracker data contains several files with the same names', log) return list(paths_dict.keys())
def crop_data_and_create_plots(original_data_path: str, column: Column, start_value: Any, end_value: Any = None, file_name_prefix: str = 'crop_', folder_name_prefix: str = 'cropped_data', create_sub_folder: bool = True) -> None: cropped_data_result_path = crop_data_and_save(original_data_path, column, start_value, end_value, file_name_prefix, folder_name_prefix, create_sub_folder) create_comparative_filtering_plot( original_data_path, cropped_data_result_path, folder_to_save=get_parent_folder(cropped_data_result_path))
def get_in_and_out_files(test_type: DIFF_HANDLER_TEST_TYPES, task: TASK) -> List[Tuple[str, str, str]]: src_and_dst_files = get_src_and_dst_files(test_type, task) in_and_out_files = [] for src_file, dst_file in src_and_dst_files: src_file_number = get_name_from_path(src_file, with_extension=False) dst_file_number = get_name_from_path(dst_file, with_extension=False) out_file = os.path.join(get_parent_folder(src_file), f'out_{src_file_number}_{dst_file_number}.py') if get_name_from_path( out_file) in FAILED_APPLYING_DIFFS_TO_STUDENTS_CODE_TEST.get( task, []): continue # If there is no such out_file, it means that out-code is the same as dst-code from dst_file if not os.path.isfile(out_file): out_file = dst_file in_and_out_files.append((src_file, dst_file, out_file)) return in_and_out_files
def plot_profile_statistics( file: str, column: STATISTICS_KEY, plot_type: CHART_TYPE, to_union_rare: bool = False, format: consts.EXTENSION = consts.EXTENSION.HTML, auto_open: bool = False, x_category_order: PLOTTY_CATEGORY_ORDER = PLOTTY_CATEGORY_ORDER. TOTAL_ASCENDING, x_axis_title: Optional[str] = None, y_axis_title: Optional[str] = None, to_add_percents: bool = False, to_add_title: bool = True) -> None: default_value = column.get_default() statistics_df = __get_statistics_df_from_file(file, column, default_value, to_union_rare) path = get_parent_folder(file) labels = get_labels_for_freq_plots(column) title = __get_title_for_plots(column) if to_add_title else None if plot_type == CHART_TYPE.PIE: __plot_pie_chart(statistics_df, title, path, column, labels, plot_name=column.value, format=format, auto_open=auto_open) elif plot_type == CHART_TYPE.BAR: plot_and_save_freq_chart(statistics_df, title, path, column, labels, plot_name=column.value, format=format, auto_open=auto_open, x_category_order=x_category_order, x_axis_title=x_axis_title, y_axis_title=y_axis_title, to_add_percents=to_add_percents) else: log_and_raise_error(f'Plot type {plot_type} is incorrect!', log)
def crop_data_and_save(original_data_path: str, column: Column, start_value: Any, end_value: Any = None, file_name_prefix: str = 'crop_', folder_name_prefix: str = 'cropped_data', create_sub_folder: bool = True) -> str: original_data = pd.read_csv(original_data_path, encoding=ISO_ENCODING) cropped_data = crop_data_by_timestamp(original_data, column, start_value, end_value) cropped_data_name = file_name_prefix + get_name_from_path( original_data_path) cropped_data_folder = get_parent_folder(original_data_path) if create_sub_folder: cropped_data_folder = os.path.join(cropped_data_folder, folder_name_prefix) cropped_data_result_path = os.path.join(cropped_data_folder, cropped_data_name) create_folder_and_write_df_to_file(cropped_data_folder, cropped_data_result_path, cropped_data) return cropped_data_result_path
def main(self) -> None: self.parse_args() if self._plot_type == PLOT_TYPE.PARTICIPANTS_DISTRIBUTION: path = os.path.join( get_profile_statistics(self._path), self._type_distr.value + EXTENSION.PICKLE.value) plot_profile_statistics(path, self._type_distr, self._chart_type, to_union_rare=self._to_union_rare, format=self._format, auto_open=self._auto_open) elif self._plot_type == PLOT_TYPE.TASKS_DISTRIBUTION: plot_tasks_statistics(self._path, format=self._format, auto_open=self._auto_open) elif self._plot_type == PLOT_TYPE.ATI_PLOTS: create_ati_data_plot(self._path, folder_to_save=get_parent_folder(self._path), to_show=self._auto_open) elif self._plot_type == PLOT_TYPE.SCORING_SOLUTIONS: plot_scoring_solutions(self._path) else: raise NotImplemented
def run_test(dict_data: DictData) -> None: for path in dict_data[PARENT_FOLDER_TEST_DATA.PATHS]: assert dict_data[PARENT_FOLDER_TEST_DATA.PARENT_FOLDER_NAME] == get_parent_folder_name(path) assert dict_data[PARENT_FOLDER_TEST_DATA.PARENT_FOLDER_WITHOUT_SLASH] == get_parent_folder(path, False) assert dict_data[PARENT_FOLDER_TEST_DATA.PARENT_FOLDER_WITH_SLASH] == get_parent_folder(path, True)
def __get_dst_path(src_file: str, output_directory: str) -> str: file_name = get_name_from_path(src_file) task_path = get_parent_folder(src_file) task = get_name_from_path(task_path, with_extension=False) language = get_parent_folder_name(task_path) return os.path.join(output_directory, language, task, file_name)
def __get_user_folder_name_from_path(file: str) -> str: task_folder = get_parent_folder(file) return get_parent_folder_name(task_folder)