Exemple #1
0
def __handle_tt_files(tt_files: List[str], output_task_path: str) -> bool:
    """
    The function returns True if new task-tracker file was created and False otherwise
    We should choose the last state of the task-tracker files for the task or all last states and create a new file
    where we union them. The student can submit the solution several times, while the history of the task-tracker file
    is not erased. In this way, we only need to select the final file with the entire history. On the other hand,
    if the file was full, then it will be sent additionally and new files will contain a new history.
    In this case, it is necessary to find the last states of all files with a unique history, combine according to
    timestamps and write to a new final file.

    For more details see https://github.com/JetBrains-Research/codetracker-data/wiki/Data-preprocessing:-primary-data-processing
    """
    dataframes = []
    file_name = None
    for tt_file in tt_files:
        current_df = pd.read_csv(tt_file, encoding=consts.ISO_ENCODING)
        if not is_test_mode(current_df):
            dataframes.append(current_df)
            if file_name is None:
                file_name = get_name_from_path(tt_file)
    if len(dataframes) == 0:
        return False
    new_tt_path = os.path.join(output_task_path, file_name)
    create_file("", new_tt_path)
    __merge_dataframes(
        dataframes,
        sorted_column=TASK_TRACKER_COLUMN.TIMESTAMP.value).to_csv(new_tt_path)
    return True
Exemple #2
0
 def test_changing_extension(self, param_changing_extension_test: Callable) -> None:
     (in_data, new_extension, expected_name) = param_changing_extension_test
     clear_folder(folder_with_slash)
     create_file('', in_data)
     change_extension_to(in_data, new_extension, True)
     assert os.path.isfile(expected_name)
     remove_file(expected_name)
Exemple #3
0
    def create_file_for_tree(self, to_overwrite: bool = False) -> str:
        if self._tree_file is not None and not to_overwrite:
            log_and_raise_error(f'File for tree {get_code_from_tree(self.tree)} already exists in files dict', log)

        if not is_file(self.tree_file):
            code = get_code_from_tree(self.tree)
            create_file(code, self.tree_file)

        self._tree_file = self.tree_file
        return self.tree_file
Exemple #4
0
 def __print_output(output: Optional[Any],
                    file_name: str = 'path_finder_test_system_output',
                    to_write_to_file: bool = False) -> None:
     if output is not None:
         print(f'{output}\n')
         if to_write_to_file:
             path = os.path.join(SOLUTION_SPACE_FOLDER,
                                 'path_finder_test_system_output',
                                 file_name)
             extension = EXTENSION.HTML.value if isinstance(
                 output, PrettyTable) else EXTENSION.TXT.value
             path += extension
             create_file(TestSystem.__format_content(output), path)
def anonymize_cpp_code(root: str,
                       local_gorshochek_path: str,
                       output_folder_name: str = 'anonymizerResult') -> None:
    """
    We use gorshochek library: https://github.com/JetBrains-Research/gorshochek
    You need to clone the repo and build a docker image (see gorshochek README).

    Note: you need to change the config.yaml file before building the docker image:

    n transformations: 1
    transformations:
      - remove comments:
          p: 1.0
      - rename entities:
          p: 1
          rename functions: true
          rename variables: true
          strategy:
              name: hash
              hash prefix: d

    You can change 'seed', 'max tokens', 'max token len' params if you want.
    """
    cpp_path = f'{remove_slash(root)}/{LANGUAGE.CPP.value}'
    output_path = f'{get_parent_folder(root)}/{output_folder_name}/{LANGUAGE.CPP.value}'

    task_dirs = get_all_file_system_items(cpp_path,
                                          item_condition=task_item_condition,
                                          item_type=FILE_SYSTEM_ITEM.SUBDIR)
    gorshochek_anonymizer = GorshochekAnonymizer(local_gorshochek_path)
    for task_dir in task_dirs:
        task = get_name_from_path(task_dir, with_extension=False)
        print(f'Start handling the task {task}')
        files = get_all_file_system_items(
            task_dir, item_condition=extension_file_condition(EXTENSION.CSV))
        for file in files:
            print(f'Start handling the file {file}')
            df = pd.read_csv(file, encoding=ISO_ENCODING)
            # Delete incorrect fragments
            df = df[df.apply(
                lambda row: not is_incorrect_fragment(row[TESTS_RESULTS]),
                axis=1)]
            df[TASK_TRACKER_COLUMN.FRAGMENT.value] = \
                df[TASK_TRACKER_COLUMN.FRAGMENT.value].apply(gorshochek_anonymizer.anonymize_code_fragment)
            current_output_path = f'{output_path}/{task}/{get_name_from_path(file)}'
            create_file('', current_output_path)
            df.to_csv(current_output_path)

    gorshochek_anonymizer.remove_directories()
Exemple #6
0
def create_dot_graph(
        output_folder: str,
        name_prefix: str,
        graph_representation: str,
        output_format: consts.EXTENSION = consts.EXTENSION.PNG) -> str:
    file_path = os.path.join(output_folder,
                             f'{name_prefix}{consts.EXTENSION.DOT.value}')
    # Create dot file
    create_file(graph_representation, file_path)
    dst_path = os.path.join(output_folder,
                            f'{name_prefix}{output_format.value}')
    args = ['dot', f'-T{output_format.value[1:]}', file_path, '-o', dst_path]
    # Generate graph representation
    check_call_safely(args)
    return dst_path
    def anonymize_code_fragment(self, code_fragment: str):
        """
        gorshochek works only with folders with cpp files. We create a folder with the code fragment and run gorshochek.

        Note: the default scripts/run.sh file in the gorshochek repository requires sudo access for docker running.
        We remove it for the anonymization process to avoid running sudo processes from an external process.

        After getting the result we delete the created folders.
        """
        create_file(code_fragment, f'{self._input_dir}/{self._input_name}.cpp')
        p = subprocess.Popen(
            ['sh', f'./scripts/run.sh', self._data_name, self._out_name],
            cwd=self._local_gorshochek_path)
        p.wait()
        return get_content_from_file(
            f'{self._output_dir}/{self._input_name}/transformation_1.cpp')
 def visualize_graph(self, name_prefix: str = 'graph',
                     to_create_vertices_content: bool = True,
                     output_format: consts.EXTENSION = consts.EXTENSION.PNG) -> str:
     graph_representation = self.__get_graph_representation()
     folder_path = os.path.join(consts.GRAPH_REPRESENTATION_PATH, f'{name_prefix}_{self._graph.id}')
     # Remove older graph with the same name
     remove_directory(folder_path)
     file_path = os.path.join(folder_path, f'{name_prefix}{consts.EXTENSION.DOT.value}')
     # Create dot file
     create_file(graph_representation, file_path)
     dst_path = os.path.join(folder_path, f'{name_prefix}{output_format.value}')
     args = ['dot', f'-T{output_format.value[1:]}', file_path, '-o', dst_path]
     # Generate graph representation
     check_call_safely(args)
     if to_create_vertices_content:
         self.__create_vertices_content(folder_path)
     return folder_path
 def generate_file_for_evaluation(
         self,
         name_suffices: List[str],
         evaluation_fragment_file: str = EVALUATION_FRAGMENT_PATH) -> None:
     fragment = get_content_from_file(evaluation_fragment_file,
                                      to_strip_nl=False)
     fragments = [
         f'{t_i[TEST_INPUT.INDEX]}.\n{fragment}'
         for t_i in self._test_inputs
     ]
     evaluation_content = ''.join(fragments)
     for name_suffix in name_suffices:
         evaluation_file = os.path.join(
             EVALUATION_PATH,
             f'{self._task.value}_evaluation_file_{name_suffix}{EXTENSION.TXT.value}'
         )
         create_file(evaluation_content, evaluation_file)
Exemple #10
0
 def write_candidates_info_to_file(self,
                                   user_tree: AnonTree,
                                   candidates: List[IMeasuredTree],
                                   file_prefix: str = 'candidates',
                                   path: Optional[str] = None) -> str:
     user_info = f'profile: {user_tree.code_info_list[0].user.profile},\n\n' \
                 f'{get_code_from_tree(user_tree.tree)}\n\n\n\n\n'
     candidates_info = ''.join([
         f'Tree id: {candidate.candidate_tree.id},\n'
         f'Distance to user: {candidate.distance_to_user}\n'
         f'Distance info: {candidate.distance_info}\n\n\n'
         f'{get_code_from_tree(candidate.candidate_tree.tree)}\n\n\n'
         for candidate in candidates
     ])
     if path is None:
         path = os.path.join(self.graph.graph_directory, 'candidates_info')
     file_path = os.path.join(
         path, f'{file_prefix}_info{consts.EXTENSION.TXT.value}')
     create_file(user_info + candidates_info, file_path)
     log.info(f'Candidates were written in the file {file_path}')
     return file_path
Exemple #11
0
 def create_source_file_with_name(self, source_code: str, name: str) -> str:
     source_code_file = os.path.join(
         TASKS_TESTS_PATH, SOURCE_OBJECT_NAME,
         name + get_extension_by_language(self.language).value)
     create_file(source_code, source_code_file)
     return source_code_file
 def __create_vertices_content(self, folder_path: str) -> None:
     for vertex in self._graph.get_traversal():
         current_path = os.path.join(folder_path, f'vertex_{vertex.id}{consts.EXTENSION.TXT.value}')
         content = self.__class__.__get_vertex_info(vertex)
         create_file(content, current_path)
 def __create_source_file(self, source_code: str) -> str:
     source_code_file = os.path.join(self.EXPRESSIONS_SOURCES_PATH,
                                     'brackets_test.py')
     create_file(source_code, source_code_file)
     return source_code_file