def __handle_tt_files(tt_files: List[str], output_task_path: str) -> bool: """ The function returns True if new task-tracker file was created and False otherwise We should choose the last state of the task-tracker files for the task or all last states and create a new file where we union them. The student can submit the solution several times, while the history of the task-tracker file is not erased. In this way, we only need to select the final file with the entire history. On the other hand, if the file was full, then it will be sent additionally and new files will contain a new history. In this case, it is necessary to find the last states of all files with a unique history, combine according to timestamps and write to a new final file. For more details see https://github.com/JetBrains-Research/codetracker-data/wiki/Data-preprocessing:-primary-data-processing """ dataframes = [] file_name = None for tt_file in tt_files: current_df = pd.read_csv(tt_file, encoding=consts.ISO_ENCODING) if not is_test_mode(current_df): dataframes.append(current_df) if file_name is None: file_name = get_name_from_path(tt_file) if len(dataframes) == 0: return False new_tt_path = os.path.join(output_task_path, file_name) create_file("", new_tt_path) __merge_dataframes( dataframes, sorted_column=TASK_TRACKER_COLUMN.TIMESTAMP.value).to_csv(new_tt_path) return True
def test_changing_extension(self, param_changing_extension_test: Callable) -> None: (in_data, new_extension, expected_name) = param_changing_extension_test clear_folder(folder_with_slash) create_file('', in_data) change_extension_to(in_data, new_extension, True) assert os.path.isfile(expected_name) remove_file(expected_name)
def create_file_for_tree(self, to_overwrite: bool = False) -> str: if self._tree_file is not None and not to_overwrite: log_and_raise_error(f'File for tree {get_code_from_tree(self.tree)} already exists in files dict', log) if not is_file(self.tree_file): code = get_code_from_tree(self.tree) create_file(code, self.tree_file) self._tree_file = self.tree_file return self.tree_file
def __print_output(output: Optional[Any], file_name: str = 'path_finder_test_system_output', to_write_to_file: bool = False) -> None: if output is not None: print(f'{output}\n') if to_write_to_file: path = os.path.join(SOLUTION_SPACE_FOLDER, 'path_finder_test_system_output', file_name) extension = EXTENSION.HTML.value if isinstance( output, PrettyTable) else EXTENSION.TXT.value path += extension create_file(TestSystem.__format_content(output), path)
def anonymize_cpp_code(root: str, local_gorshochek_path: str, output_folder_name: str = 'anonymizerResult') -> None: """ We use gorshochek library: https://github.com/JetBrains-Research/gorshochek You need to clone the repo and build a docker image (see gorshochek README). Note: you need to change the config.yaml file before building the docker image: n transformations: 1 transformations: - remove comments: p: 1.0 - rename entities: p: 1 rename functions: true rename variables: true strategy: name: hash hash prefix: d You can change 'seed', 'max tokens', 'max token len' params if you want. """ cpp_path = f'{remove_slash(root)}/{LANGUAGE.CPP.value}' output_path = f'{get_parent_folder(root)}/{output_folder_name}/{LANGUAGE.CPP.value}' task_dirs = get_all_file_system_items(cpp_path, item_condition=task_item_condition, item_type=FILE_SYSTEM_ITEM.SUBDIR) gorshochek_anonymizer = GorshochekAnonymizer(local_gorshochek_path) for task_dir in task_dirs: task = get_name_from_path(task_dir, with_extension=False) print(f'Start handling the task {task}') files = get_all_file_system_items( task_dir, item_condition=extension_file_condition(EXTENSION.CSV)) for file in files: print(f'Start handling the file {file}') df = pd.read_csv(file, encoding=ISO_ENCODING) # Delete incorrect fragments df = df[df.apply( lambda row: not is_incorrect_fragment(row[TESTS_RESULTS]), axis=1)] df[TASK_TRACKER_COLUMN.FRAGMENT.value] = \ df[TASK_TRACKER_COLUMN.FRAGMENT.value].apply(gorshochek_anonymizer.anonymize_code_fragment) current_output_path = f'{output_path}/{task}/{get_name_from_path(file)}' create_file('', current_output_path) df.to_csv(current_output_path) gorshochek_anonymizer.remove_directories()
def create_dot_graph( output_folder: str, name_prefix: str, graph_representation: str, output_format: consts.EXTENSION = consts.EXTENSION.PNG) -> str: file_path = os.path.join(output_folder, f'{name_prefix}{consts.EXTENSION.DOT.value}') # Create dot file create_file(graph_representation, file_path) dst_path = os.path.join(output_folder, f'{name_prefix}{output_format.value}') args = ['dot', f'-T{output_format.value[1:]}', file_path, '-o', dst_path] # Generate graph representation check_call_safely(args) return dst_path
def anonymize_code_fragment(self, code_fragment: str): """ gorshochek works only with folders with cpp files. We create a folder with the code fragment and run gorshochek. Note: the default scripts/run.sh file in the gorshochek repository requires sudo access for docker running. We remove it for the anonymization process to avoid running sudo processes from an external process. After getting the result we delete the created folders. """ create_file(code_fragment, f'{self._input_dir}/{self._input_name}.cpp') p = subprocess.Popen( ['sh', f'./scripts/run.sh', self._data_name, self._out_name], cwd=self._local_gorshochek_path) p.wait() return get_content_from_file( f'{self._output_dir}/{self._input_name}/transformation_1.cpp')
def visualize_graph(self, name_prefix: str = 'graph', to_create_vertices_content: bool = True, output_format: consts.EXTENSION = consts.EXTENSION.PNG) -> str: graph_representation = self.__get_graph_representation() folder_path = os.path.join(consts.GRAPH_REPRESENTATION_PATH, f'{name_prefix}_{self._graph.id}') # Remove older graph with the same name remove_directory(folder_path) file_path = os.path.join(folder_path, f'{name_prefix}{consts.EXTENSION.DOT.value}') # Create dot file create_file(graph_representation, file_path) dst_path = os.path.join(folder_path, f'{name_prefix}{output_format.value}') args = ['dot', f'-T{output_format.value[1:]}', file_path, '-o', dst_path] # Generate graph representation check_call_safely(args) if to_create_vertices_content: self.__create_vertices_content(folder_path) return folder_path
def generate_file_for_evaluation( self, name_suffices: List[str], evaluation_fragment_file: str = EVALUATION_FRAGMENT_PATH) -> None: fragment = get_content_from_file(evaluation_fragment_file, to_strip_nl=False) fragments = [ f'{t_i[TEST_INPUT.INDEX]}.\n{fragment}' for t_i in self._test_inputs ] evaluation_content = ''.join(fragments) for name_suffix in name_suffices: evaluation_file = os.path.join( EVALUATION_PATH, f'{self._task.value}_evaluation_file_{name_suffix}{EXTENSION.TXT.value}' ) create_file(evaluation_content, evaluation_file)
def write_candidates_info_to_file(self, user_tree: AnonTree, candidates: List[IMeasuredTree], file_prefix: str = 'candidates', path: Optional[str] = None) -> str: user_info = f'profile: {user_tree.code_info_list[0].user.profile},\n\n' \ f'{get_code_from_tree(user_tree.tree)}\n\n\n\n\n' candidates_info = ''.join([ f'Tree id: {candidate.candidate_tree.id},\n' f'Distance to user: {candidate.distance_to_user}\n' f'Distance info: {candidate.distance_info}\n\n\n' f'{get_code_from_tree(candidate.candidate_tree.tree)}\n\n\n' for candidate in candidates ]) if path is None: path = os.path.join(self.graph.graph_directory, 'candidates_info') file_path = os.path.join( path, f'{file_prefix}_info{consts.EXTENSION.TXT.value}') create_file(user_info + candidates_info, file_path) log.info(f'Candidates were written in the file {file_path}') return file_path
def create_source_file_with_name(self, source_code: str, name: str) -> str: source_code_file = os.path.join( TASKS_TESTS_PATH, SOURCE_OBJECT_NAME, name + get_extension_by_language(self.language).value) create_file(source_code, source_code_file) return source_code_file
def __create_vertices_content(self, folder_path: str) -> None: for vertex in self._graph.get_traversal(): current_path = os.path.join(folder_path, f'vertex_{vertex.id}{consts.EXTENSION.TXT.value}') content = self.__class__.__get_vertex_info(vertex) create_file(content, current_path)
def __create_source_file(self, source_code: str) -> str: source_code_file = os.path.join(self.EXPRESSIONS_SOURCES_PATH, 'brackets_test.py') create_file(source_code, source_code_file) return source_code_file