def unpack_tests_results(tests_results: str, tasks: List[TASK]) -> List[float]: tests_results = ast.literal_eval(tests_results) if len(tests_results) != len(tasks): log_and_raise_error( f'Cannot identify tasks because of' f' unexpected tests_results length: {len(tests_results)}', log) return tests_results
def check_task(self, task: TASK, in_and_out_files_dict: FilesDict, source_file: str, stop_after_first_false=True) -> float: log.info(f'Start checking task {task.value}') in_and_out_files = in_and_out_files_dict.get(task) if not in_and_out_files: log_and_raise_error( f'Task data for the {task.value} does not exist', log) counted_tests, passed_tests = len(in_and_out_files), 0 for in_file, out_file in in_and_out_files: is_passed = self.run_test(get_content_from_file(in_file), get_content_from_file(out_file), source_file) log.info( f'Test {in_file} for task {task.value} is passed: {str(is_passed)}' ) if is_passed: passed_tests += 1 elif stop_after_first_false: # keep existing rate, even if it's not 0, to save the information about partly passed tests log.info('Stop after first false') break rate = passed_tests / counted_tests log.info(f'Finish checking task {task.value}, rate: {str(rate)}') return rate
def get_diffs_number(self, anon_dst_tree: Optional[ast.AST], canon_dst_tree: Optional[ast.AST]) -> int: if anon_dst_tree is None or canon_dst_tree is None: log_and_raise_error( f'Trees can not be empty!\nAnon tree:\n{get_code_from_tree(anon_dst_tree)}\n' f'Canon tree:\n{get_code_from_tree(canon_dst_tree)}', log) return len(self.get_diffs(anon_dst_tree, canon_dst_tree)[0])
def set_const_attrib(xml_node: ET.Element, py_node: ast.AST, py_node_attrib_name: str) -> None: try: str_val_type_repr = xml_node.tag.split('-')[1] except IndexError: log_and_raise_error(f'missing value_type merged in tag of {xml_node.tag} node', logger, RuntimeError) str_val_repr = xml_node.attrib['value'] value_type = locate(str_val_type_repr) if str_val_type_repr == 'ellipsis': value = ... elif str_val_type_repr == 'NoneType': value = None elif not value_type: log_and_raise_error(f'failed to locate Constant.value type: {xml_node.attrib["value_type"]}', logger, RuntimeError) elif value_type == bytes: value = value_type(str_val_repr[2:len(str_val_repr) - 1].encode(DEFAULT_ENCODING)) elif value_type == bool: value = str_val_repr == 'True' else: value = value_type(str_val_repr) setattr(py_node, py_node_attrib_name, value)
def get_trees(source: str, tree_types_to_get: Set[TREE_TYPE], to_simplify: bool = True) -> Tuple[ast.AST, ...]: gotten_trees = () # We shouldn't start getting trees if there is no tree types to get if not tree_types_to_get: return gotten_trees orig_tree = __get_orig_tree_from_source(source) # After getting the first tree (orig tree), we should check if we need to continue getting trees gotten_trees = __update_gotten_trees(orig_tree, TREE_TYPE.ORIG, gotten_trees, tree_types_to_get) if not bool(tree_types_to_get): return gotten_trees imports = get_imports(orig_tree) anon_tree = __get_anon_tree_from_orig_tree(orig_tree, imports, to_simplify=to_simplify) # After getting the second tree (anon tree), we should check if we need to continue getting trees gotten_trees = __update_gotten_trees(anon_tree, TREE_TYPE.ANON, gotten_trees, tree_types_to_get) if not bool(tree_types_to_get): return gotten_trees canon_tree = get_canon_tree_from_anon_tree(anon_tree, imports) # After getting the third tree (canon tree), we should raise an error if there are still tree types to get gotten_trees = __update_gotten_trees(canon_tree, TREE_TYPE.CANON, gotten_trees, tree_types_to_get) if not bool(tree_types_to_get): return gotten_trees else: log_and_raise_error(f'There are still tree types to get {tree_types_to_get}, but trees getting is finished', log)
def get_rate(tests_results: str, task_index: int) -> float: tasks = TASK.tasks() tests_results = unpack_tests_results(tests_results, tasks) if task_index >= len(tasks) or task_index >= len(tests_results): log_and_raise_error( f'Task index {task_index} is more than length of tasks list', log) return tests_results[task_index]
def __get_dict_lists_size(res: Dict[str, List[Any]]) -> int: size = 0 for key in res.keys(): if size != 0 and len(res[key]) != size: log_and_raise_error('Lists in the res dict have different sizes', log) size = len(res[key]) return size
def __parse_file_with_statistics(self, file: str, max_number_of_fragments: int = 100) -> List[FragmentStatistics]: statistics = get_content_from_file(file, encoding=UTF_ENCODING) fragments = statistics.split(self.separator * 4) if len(fragments) < max_number_of_fragments: log_and_raise_error(f'Fragments number is {len(fragments)}, but max number of fragments is ' f'{max_number_of_fragments}', log) fragments = fragments[:max_number_of_fragments] return list(map(self.__get_statistics_from_fragment, fragments))
def from_source(cls, source: str, rate: Optional[float], task: Optional[TASK] = None, language: consts.LANGUAGE = consts.LANGUAGE.PYTHON) -> Code: anon_tree, canon_tree = get_trees(source, {TREE_TYPE.ANON, TREE_TYPE.CANON}) if rate is None: if task is None: log_and_raise_error('Cannot find rate without task: both are None', log) rate = check_tasks([task], source, create_in_and_out_dict([task]), language)[0] return Code(anon_tree, canon_tree, rate, language)
def get_diffs_and_delete_edits_numbers(src_file: str, dst_file: str) -> Tuple[int, int]: log.info(f'Calling GumTreeDiff. Src file {src_file}, dst file: {dst_file}') try: args = [consts.GUMTREE_PATH, 'deln', src_file, dst_file] delete_edits, diffs = check_output(args, text=True, stderr=STDOUT).strip('\n').split() return int(diffs), int(delete_edits) except CalledProcessError as e: log_and_raise_error(f'Error during GumTreeDiff running: {e}, src: {src_file}, dst: {dst_file}', log) exit(1)
def get_diffs_number(src_file: str, dst_file: str) -> int: log.info('Calling GumTreeDiff') try: args = [consts.GUMTREE_PATH, 'diffn', src_file, dst_file] output = check_output(args, text=True, stderr=STDOUT).strip('\n') return int(output) except CalledProcessError as e: log_and_raise_error(f'Error during GumTreeDiff running: {e}, src: {src_file}, dst: {dst_file}', log) exit(1)
def get_hint_solution(is_solution: bool, is_not_solution: bool) -> 'HINT_SOLUTION': if contains_at_least_n_true(is_solution, is_not_solution): log_and_raise_error(f'Incorrect value for the hint solution. At least two values are True', log) if is_solution: return HINT_SOLUTION.SOLUTION if is_not_solution: return HINT_SOLUTION.NOT_SOLUTION log_and_raise_error(f'Undefined hint solution value. Passed values:\nis_solution {is_solution}\n' f'is_not_solution {is_not_solution}', log)
def get_src_and_dst_files(test_type: DIFF_HANDLER_TEST_TYPES, task: TASK) -> List[Tuple[str, str]]: root = os.path.join(CANONICALIZATION_TESTS.DATA_PATH.value, ADDITIONAL_FOLDER, test_type.value, task.value) files = get_all_file_system_items(root, match_condition(r'\d+.py')) if len(files) == 0: log_and_raise_error( f'Number of test files is zero! Root for files is {root}', log) return list(itertools.product(files, repeat=2))
def __handle_not_solution_piece(self, not_solution_piece: str) -> Tuple[str, bool, Optional[HINT_SIZE]]: log.info(f'Start handling not solution piece of statistics') not_solution_piece = not_solution_piece.lstrip(self.separator).split(self.separator) if len(not_solution_piece) != self.number_of_lines_for_not_solution_piece_part: log_and_raise_error(f'Not solution piece in statistics has an incorrect structure', log) fragment_id = not_solution_piece[0].rstrip('.') is_not_solution = self.labeled_symbol in not_solution_piece[1] return fragment_id, is_not_solution, \ HINT_SIZE.get_hint_size(*self.__does_contain_labeled_symbol(not_solution_piece[2:]))
def __lt__(self, o: object): """ 1. If o is not an instance of class, raise an error 2. Compare distance """ if not isinstance(o, MeasuredTreeV7): log_and_raise_error( f'The object {o} is not {self.__class__} class', log) return self._distance_to_user < o._distance_to_user
def get_apply_diffs_quality(is_correct: bool, is_incorrect: bool) -> Optional['QUALITY_AFTER_DIFFS_APPLIED']: if contains_at_least_n_true(is_correct, is_incorrect): log_and_raise_error(f'Incorrect value for the apply diffs quality. At least two values are True. ' f'Passed values:\nis_correct {is_correct}\nis_incorrect {is_incorrect}', log) if is_correct: return QUALITY_AFTER_DIFFS_APPLIED.CORRECT if is_incorrect: return QUALITY_AFTER_DIFFS_APPLIED.INCORRECT return None
def get_hint_structure(is_similar: bool, is_dissimilar: bool) -> Optional['HINT_STRUCTURE']: if contains_at_least_n_true(is_similar, is_dissimilar): log_and_raise_error(f'Incorrect value for the hint structure. At least two values are True. ' f'Passed values:\nis_similar{is_similar}\nis_dissimilar {is_dissimilar}', log) if is_similar: return HINT_STRUCTURE.SIMILAR if is_dissimilar: return HINT_STRUCTURE.DISSIMILAR return None
def get_hint_size(is_small: bool, is_big: bool) -> Optional['HINT_SIZE']: if contains_at_least_n_true(is_small, is_big): log_and_raise_error(f'Incorrect value for the hint size. At least two values are True. Passed values:\n' f'is_small {is_small}\nis_big {is_big}', log) if is_small: return HINT_SIZE.SMALL if is_big: return HINT_SIZE.BIG return None
def __get_language(df: pd.DataFrame) -> consts.LANGUAGE: languages = df[LANGUAGE].unique() if len(languages) != 1: log_and_raise_error(f'Dataframe has {len(languages)} unique values!', log) try: return consts.LANGUAGE(languages[0]) except ValueError: log_and_raise_error(f'Unknown language key: {languages[0]}', log)
def __get_statistics_from_fragment(self, fragment: str) -> FragmentStatistics: log.info(f'Start parsing the fragment\n{fragment}') statistics = fragment.split(self.separator * 3) if len(statistics) != 2: log_and_raise_error(f'The fragment has an incorrect structure', log) fragment_id, is_not_solution, hint_size = self.__handle_not_solution_piece(statistics[0]) is_solution, hint_structure, hint_to_solution_distance, hint_step, hint_quality, apply_diffs_quality = self.__handle_solution_piece( statistics[1]) return FragmentStatistics(fragment_id, is_not_solution, is_solution, hint_size, hint_structure, hint_to_solution_distance, hint_step, hint_quality, apply_diffs_quality)
def __get_real_ati_file_index(files: List[str]) -> int: count_ati = 0 ati_index = -1 for i, f in enumerate(files): if consts.ACTIVITY_TRACKER_FILE_NAME in f and not is_ct_file(f): count_ati += 1 ati_index = i if count_ati >= 2: log_and_raise_error('The number of activity tracker files is more than 1', log) return ati_index
def get_unique_child(xml_node, with_tag=None): res = _XmlNodeChildrenGetter.get_children(xml_node, with_tag) if not res: return None if len(res) > 1: log_and_raise_error( f'{len(res)} children of {xml_node.tag} with tag={with_tag} found.' f' But {with_tag} expected to be unique', logger, RuntimeError) else: return res[0]
def create_file_for_tree(self, to_overwrite: bool = False) -> str: if self._tree_file is not None and not to_overwrite: log_and_raise_error(f'File for tree {get_code_from_tree(self.tree)} already exists in files dict', log) if not is_file(self.tree_file): code = get_code_from_tree(self.tree) create_file(code, self.tree_file) self._tree_file = self.tree_file return self.tree_file
def __insert_row(df: pd.DataFrame, row_number: int, row_value: list) -> pd.DataFrame: if row_number > df.index.max() + 1: log_and_raise_error('Invalid row_number in the method __insert_row', log) df1 = df[0:row_number] df2 = df[row_number:] df1.loc[row_number] = row_value df_result = pd.concat([df1, df2]) df_result.index = [*range(df_result.shape[0])] return df_result
def get_hint_step(is_normal: bool, is_big: bool, is_small: bool) -> Optional['HINT_STEP']: if contains_at_least_n_true(is_normal, is_big, is_small): log_and_raise_error(f'Incorrect value for the hint step. At least two values are True. Passed values:\n' f'is_normal {is_normal}\nis_big {is_big}\nis_small {is_small}', log) if is_normal: return HINT_STEP.NORMAL if is_big: return HINT_STEP.BIG if is_small: return HINT_STEP.SMALL return None
def __get_profile_info(ct_df: pd.DataFrame, column: STATISTICS_KEY) -> Profile: values = ct_df[column.value].unique() if len(values) == 1: value = values[0] # If it's a default value, return consts.DEFAULT_VALUE if is_statistics_key_default_value(value, column): return column.get_default() return value log_and_raise_error( f'Have found {len(values)} unique value in profile column {column.value}', log)
def get_profile_info(tt_df: pd.DataFrame, column: STATISTICS_KEY) -> Profile: values = tt_df[column.value].unique() values = delete_default_values(values) if len(values) == 0: # If it's a default value, return consts.DEFAULT_VALUE return column.get_default().value if len(values) == 1: return values[0] log_and_raise_error( f'Have found {len(values)}: {values} unique value in profile column {column.value}', log)
def get_hint_quality(is_good: bool, is_normal: bool, is_bad: bool) -> Optional['HINT_QUALITY']: if contains_at_least_n_true(is_good, is_normal, is_bad): log_and_raise_error(f'Incorrect value for the hint quality. At least two values are True. Passed values:\n' f'is_good {is_good}\nis_normal {is_normal}\nis_bad {is_bad}', log) if is_good: return HINT_QUALITY.GOOD if is_normal: return HINT_QUALITY.NORMAL if is_bad: return HINT_QUALITY.BAD return None
def __is_correct_fragment(tests_results: str) -> bool: tasks = consts.TASK.tasks() tests_results = unpack_tests_results(tests_results, tasks) compiled_task_count = len( [t for i, t in enumerate(tasks) if __is_compiled(tests_results[i])]) # It is an error, if a part of the tasks is incorrect, but another part is correct. # For example: [-1,1,0.5,0.5,-1,-1] if 0 < compiled_task_count < len(tasks): log_and_raise_error( f'A part of the tasks is incorrect, but another part is correct: {tests_results}', log) return compiled_task_count == len(tasks)
def __get_column_unique_value(solutions: pd.DataFrame, column: Column, default: Any) -> Any: column = column.value unique_values = solutions[column].unique() if len(unique_values) == 0: log.info(f'Unique values not found') return default if len(unique_values) > 1: log_and_raise_error( f'There is more than 1 unique value in column {column}: {unique_values}', log) return unique_values[0]