def __find_closest_vertex(self,
                              user_vertex: Vertex,
                              goal: Vertex,
                              to_add_empty: bool = False) -> Optional[Vertex]:
        """
        1. For each (by default, not empty) vertex with different canon_tree:
            1.1 Find dist from it to goal
            1.2 Consider as candidate if dist <= user_dist_to_goal
        2. Choose the best vertex from candidates using __choose_best_vertex()
        """
        user_diffs_to_goal = goal.get_dist(user_vertex)
        log.info(f'User diff to goal is {user_diffs_to_goal}')
        candidates = []
        for vertex in self._graph.get_traversal():
            # We don't want to add to result the same vertex
            if are_asts_equal(user_vertex.serialized_code.canon_tree,
                              vertex.serialized_code.canon_tree):
                continue
            if not to_add_empty and self._graph.is_empty_vertex(vertex):
                continue

            # Todo: calculate diffs to the nearest goal from each vertex or not???
            # Todo: think about empty tree
            diffs = self._graph.get_dist_between_vertices(vertex, goal)

            if diffs <= user_diffs_to_goal:
                candidates.append(vertex)
        return self.__choose_best_vertex(user_vertex, candidates)
Ejemplo n.º 2
0
    def __find_closest_tree(self,
                            user_anon_tree: AnonTree,
                            user_canon_nodes_number: int,
                            canon_nodes_numbers_dict: Dict[int, list],
                            candidates_file_name: str,
                            can_stop_earlier: bool = True,
                            to_use_lower_bound=True) -> Optional[AnonTree]:
        """
        1. Consider each vertex with similar nodes number as candidate (chose at least TOP_N_CANON = {1} candidates)
        2. Choose at least TOP_N_ANON = {0} anon trees from canon candidates and run __choose_best_anon_tree
        """

        # Get vertices ids with canon trees, which have nodes number similar to user canon_nodes_number
        vertices_ids = self.__get_top_n_candidates(self.canon_top_n,
                                                   user_canon_nodes_number,
                                                   canon_nodes_numbers_dict,
                                                   can_stop_earlier,
                                                   to_use_lower_bound)
        log.info(f'CANON_TOP_N vertices ids are {vertices_ids}')
        if len(vertices_ids) == 0:
            return None

        vertices: List[Vertex] = [
            Vertex.get_item_by_id(id) for id in vertices_ids
        ]

        anon_trees = sum([v.serialized_code.anon_trees for v in vertices], [])
        anon_nodes_numbers_dict = self.__get_items_nodes_number_dict(
            anon_trees)
        anon_candidates = self.__get_top_n_candidates(
            self.anon_top_n, user_anon_tree.nodes_number,
            anon_nodes_numbers_dict, can_stop_earlier, to_use_lower_bound)

        return self.__choose_best_anon_tree(user_anon_tree, anon_candidates,
                                            candidates_file_name)
Ejemplo n.º 3
0
    def __go_through_graph(self, user_anon: AnonTree, graph_anon: AnonTree,
                           goal_anon: AnonTree) -> bool:
        """
        1. If __is_rate_worse, return False
        2. If __is_most_of_path_is_done, return False
        2. Return not __is_far_from_graph
        """
        empty_anon = self._graph.empty_vertex.serialized_code.anon_trees[0]
        diffs_from_empty_to_user = GumTreeDiff.get_diffs_number(
            empty_anon.tree_file, user_anon.tree_file)
        diffs_from_user_to_goal = GumTreeDiff.get_diffs_number(
            user_anon.tree_file, goal_anon.tree_file)

        if self.__is_most_of_path_is_done(
                diffs_from_empty_to_user + diffs_from_user_to_goal,
                diffs_from_user_to_goal):
            log.info('Most of path is done')
            return False

        # Todo: add is_rate_worse

        diffs_from_user_to_graph_vertex = GumTreeDiff.get_diffs_number(
            user_anon.tree_file, graph_anon.tree_file)
        return not self.__is_far_from_graph(diffs_from_user_to_goal,
                                            diffs_from_user_to_graph_vertex)
    def __find_closest_tree(self, user_anon_tree: AnonTree,
                            user_canon_nodes_number: int,
                            canon_nodes_numbers_dict: Dict[int, list],
                            candidates_file_name: str) -> Optional[AnonTree]:
        """
        1. Consider each vertex with similar nodes number as candidate (chose at least TOP_N_CANON = {1} candidates)
        2. Choose at least TOP_N_ANON = {0} anon trees from canon candidates and run __choose_best_anon_tree
        """

        # Get vertices ids with canon trees, which have nodes number similar to user canon_nodes_number
        vertices_ids = self.__get_top_n_candidates(self.canon_top_n,
                                                   user_canon_nodes_number,
                                                   canon_nodes_numbers_dict)
        log.info(f'CANON_TOP_N vertices ids are {vertices_ids}')
        vertices: List[Vertex] = [
            Vertex.get_item_by_id(id) for id in vertices_ids
        ]

        anon_trees = sum([v.serialized_code.anon_trees for v in vertices], [])
        anon_nodes_numbers_dict = self.__get_items_nodes_number_dict(
            anon_trees)
        anon_candidates = self.__get_top_n_candidates(
            self.anon_top_n, user_anon_tree.nodes_number,
            anon_nodes_numbers_dict)

        self.write_candidates_info_to_file(
            user_anon_tree, anon_candidates,
            f'{self.candidates_file_prefix}_{candidates_file_name}')
        return self.__choose_best_anon_tree(user_anon_tree, anon_candidates)
 def __find_closest_goal(self, user_vertex: Vertex) -> Vertex:
     """
     1. Get list of all goals
     2. Find the closest using __choose_best_vertex()
     """
     goals = self._graph.end_vertex.parents
     log.info(f'Number of goals: {len(goals)}\nGoals ids are {[g.id for g in goals]}')
     return self.__choose_best_vertex(user_vertex, goals)
 def __choose_best_vertex(self, user_vertex: Vertex, vertices: List[Vertex]) -> Optional[Vertex]:
     """
     1. Sort candidates using MeasuredVertex
     2. Return the first candidate
     """
     log.info(f'Number of candidates: {len(vertices)}\nCandidates ids are {([vertex.id for vertex in vertices])}')
     if len(vertices) == 0:
         return None
     candidates = list(map(lambda vertex: self.get_measured_tree(user_vertex, vertex), vertices))
     candidates.sort()
     log.info(f'The best vertex id is {candidates[0].vertex.id}')
     return candidates[0].vertex
    def _is_close_to_goals(self, closest_tree: AnonTree) -> bool:
        """
        1. Use only nodes number info.
        2. Returns True, if closest_tree nodes number is more, than {0} * median for goals nodes number
        """
        # if self.graph.median_goals_nodes_numbers
        if self.graph.is_goals_median_empty():
            log.info(
                'Cannot check if close to goals because goals median is empty')
            return False

        # Todo: don't we want to use 0.8 here instead of 0.2??
        return closest_tree.nodes_number >= self.graph.goals_median * self.nodes_number_percent_close_to_goals
    def __find_closest_tree(
            self,
            user_anon_tree: AnonTree,
            user_canon_nodes_number: int,
            canon_nodes_numbers_dict: Dict[int, list],
            candidates_file_name: str,
            can_stop_earlier: bool = True,
            to_use_lower_bound: bool = True,
            to_add_empty_tree: bool = False,
            to_add_same_structure_trees: bool = False) -> Optional[AnonTree]:
        """
        1. Consider each vertex with similar nodes number as candidate (chose at least TOP_N_CANON = {1} candidates)
        2. Choose at least TOP_N_ANON = {0} anon trees from canon candidates
        3. Consider each anon tree with same structure as candidate
        4. Choose at least {2} trees according to nodes number from same tree candidates
        4. Add empty tree if needed
        5. Run __choose_best_anon_tree on all candidates
        """

        # Get vertices ids with canon trees, which have nodes number similar to user canon_nodes_number
        vertices_ids = self.__get_top_n_candidates(self.canon_top_n,
                                                   user_canon_nodes_number,
                                                   canon_nodes_numbers_dict,
                                                   can_stop_earlier,
                                                   to_use_lower_bound)
        log.info(f'CANON_TOP_N vertices ids are {vertices_ids}')
        anon_candidates = []

        if len(vertices_ids) != 0:
            vertices: List[Vertex] = [
                Vertex.get_item_by_id(id) for id in vertices_ids
            ]
            anon_trees = sum([v.serialized_code.anon_trees for v in vertices],
                             [])
            anon_nodes_numbers_dict = self.__get_items_nodes_number_dict(
                anon_trees)
            anon_candidates = self.__get_top_n_candidates(
                self.anon_top_n, user_anon_tree.nodes_number,
                anon_nodes_numbers_dict, can_stop_earlier, to_use_lower_bound)
        if to_add_empty_tree:
            anon_candidates.append(
                self._graph.empty_vertex.serialized_code.anon_trees[0])

        if to_add_same_structure_trees:
            anon_candidates += self.__get_same_structure_trees(
                user_anon_tree, self.same_structure_top_n)

        return self.__choose_best_anon_tree(user_anon_tree, anon_candidates,
                                            candidates_file_name)
    def __go_through_graph(self, user_vertex: Vertex, graph_vertex: Vertex,
                           goal: Vertex) -> bool:
        """
        1. If __is_most_of_path_is_done, return False
        2. Return not __is_far_from_graph
        """
        diffs_from_user_to_goal = user_vertex.get_dist(goal)
        diffs_from_empty_to_user = self._graph.empty_vertex.get_dist(
            user_vertex)
        if self.__is_most_of_path_is_done(
                diffs_from_empty_to_user + diffs_from_user_to_goal,
                diffs_from_user_to_goal):
            log.info('Most of path is done')
            return False

        diffs_from_user_to_graph_vertex = user_vertex.get_dist(graph_vertex)
        return not self.__is_far_from_graph(diffs_from_user_to_goal,
                                            diffs_from_user_to_graph_vertex)
 def __get_same_structure_trees(self, user_anon_tree: AnonTree,
                                trees_number: int) -> List[AnonTree]:
     same_structure_anon_trees = [
         AnonTree.get_item_by_id(a_id) for a_id in
         self.graph.anon_structure_dict[user_anon_tree.ast_structure]
     ]
     same_structure_anon_trees_dict = self.__get_items_nodes_number_dict(
         same_structure_anon_trees)
     same_structure_candidates = self.__get_top_n_candidates(
         trees_number,
         user_anon_tree.nodes_number,
         same_structure_anon_trees_dict,
         can_stop_earlier=False,
         to_use_lower_bound=True)
     log.info(
         f'Found trees with same structure: {[c.id for c in same_structure_candidates]}'
     )
     return same_structure_candidates
 def __choose_best_anon_tree(
         self, user_anon_tree: AnonTree,
         anon_trees: List[AnonTree]) -> Optional[AnonTree]:
     """
     1. Sort candidates using MeasuredTree
     2. Return the first candidate
     """
     log.info(
         f'Number of candidates: {len(anon_trees)}\nCandidates ids are {([a_t.id for a_t in anon_trees])}'
     )
     if len(anon_trees) == 0:
         return None
     candidates = list(
         map(
             lambda anon_tree: self.get_measured_tree(
                 user_anon_tree, anon_tree), anon_trees))
     candidates.sort()
     log.info(f'The best vertex id is {candidates[0].candidate_tree.id}')
     return candidates[0].candidate_tree
Ejemplo n.º 12
0
 def _is_close_to_goals(self, closest_tree: AnonTree) -> bool:
     """
     1. Use only nodes number info.
     2. Returns True if percent of goals with similar nodes number (with indent no more than {1}) is more than {0}
     """
     if self.graph.is_goals_median_empty():
         log.info(
             'Cannot check if close to goals because goals median is empty')
         return False
     # Todo: make it better
     goals_nodes_number = sum(
         [[k] * len(v)
          for k, v in self.graph.goals_nodes_number_dict.items()], [])
     count_similar_trees = 0
     for g_n in goals_nodes_number:
         if abs(g_n - closest_tree.nodes_number
                ) <= self.max_goal_nodes_number_indent:
             count_similar_trees += 1
     return count_similar_trees / len(
         goals_nodes_number) >= 1 - self.nodes_number_percent_close_to_goals
    def find_next_anon_tree(self, user_vertex: Vertex) -> Vertex:
        """
        1. Find the closest goal (__find_closest_goal)
        2. Find the closest graph_vertex (__find_closest_vertex_with_path)
        3. Choose between them using __go_through_graph
        """

        log.info(f'{self.__class__.__name__}\n'
                 f'Start finding the next code state for '
                 f'the user code:\n{get_code_from_tree(user_vertex.serialized_code.anon_trees[0])}\nand '
                 f'the user:\n{user_vertex.code_info_list[0].user}')
        goal = self.__find_closest_goal(user_vertex)
        log.info(f'Chosen goal is vertex {goal.id}')
        graph_vertex = self.__find_closest_vertex(user_vertex, goal)
        log.info(f'Chosen graph_vertex is vertex {graph_vertex.id}')
        # We can have graph_vertex = None
        if graph_vertex and self.__go_through_graph(user_vertex, graph_vertex, goal):
            log.info(f'We are going through graph')
            return graph_vertex
        else:
            log.info(f'We are going directly to the goal')
            return goal
    def __find_closest_vertex(self, user_vertex: Vertex, goal: Vertex) -> Optional[Vertex]:
        """
        1. If there is a vertex in the graph with same canon_tree:
            1.1 Return __choose_best_vertex on vertex children
        2. Consider each vertex with small __get_rollback_probability as candidate
        3. Choose the best vertex from candidates using __choose_best_vertex()
        """
        # Todo: 14/04 test vertex from graph
        vertex_in_graph = self._graph.find_vertex(user_vertex.canon_tree)
        if vertex_in_graph:
            log.info('Choosing best vertex from found vertex children')
            return self.__choose_best_vertex(user_vertex, vertex_in_graph.children)

        candidates = []
        for vertex in self._graph.get_traversal():
            if self.__get_rollback_probability(user_vertex.canon_tree, vertex.canon_tree) <= ROLLBACK_PROBABILITY:
                candidates.append(vertex)
            #
            # diffs = self._graph.get_diffs_number_between_vertexes(vertex, goal)
            #
            # if diffs <= user_diffs_to_goal:
            #     candidates.append(vertex)
        return self.__choose_best_vertex(user_vertex, candidates)
Ejemplo n.º 15
0
    def find_next_anon_tree(
            self,
            user_anon_tree: AnonTree,
            user_canon_tree: ast.AST,
            candidates_file_id: Optional[str] = None) -> AnonTree:
        """
        1. Find the same tree SAME_TREE in the graph and get the best tree from next trees (__find_same_tree_in_graph)
        2. If SAME_TREE is not None, return SAME_TREE
        2. Find the closest tree CLOSEST_TREE in graph (__find_closest_tree with graph.canon_trees_nodes_number,
        can_stop_earlier={0}, to_use_lower_bound={1})
        3. If not _is_close_to_goals, return CLOSEST_TREE
        4. Find the closest goal CLOSEST_GOAL in graph (__find_closest_goal_tree, can_stop_earlier={2},
         to_use_lower_bound={3})
        5. Choose between CLOSEST_TREE and CLOSEST_GOAL using __go_through_graph
        """

        log.info(
            f'{self.__class__.__name__}\n'
            f'Start finding the next code state for '
            f'the user code:\n{get_code_from_tree(user_anon_tree.tree)}\nand '
            f'the user:\n{user_anon_tree.code_info_list[0].user}')

        self.candidates_file_prefix = f'{self.get_file_prefix_by_user_tree(candidates_file_id)}'
        same_tree = self.__find_same_tree_in_graph(user_anon_tree,
                                                   user_canon_tree)
        if same_tree is not None:
            log.info(
                f'Found the same tree. Chosen anon tree:\n{get_code_from_tree(same_tree.tree)}'
            )
            return same_tree

        log.info('Same tree not found')

        canon_nodes_number = AstStructure.get_nodes_number_in_ast(
            user_canon_tree)
        graph_anon_tree = self.__find_closest_tree(
            user_anon_tree,
            canon_nodes_number,
            self.graph.canon_nodes_number_dict,
            to_use_lower_bound=self.graph_tree_lower_bound,
            can_stop_earlier=self.graph_tree_stop_earlier,
            candidates_file_name='graph_candidates')
        # We can have graph_anon_tree = None
        if graph_anon_tree:
            log.info(
                f'Chosen anon tree in graph:\n{get_code_from_tree(graph_anon_tree.tree)}'
            )
            if not self._is_close_to_goals(graph_anon_tree):
                log.info(f'The most of path is not done. Go through graph')
                return graph_anon_tree

        goal_anon_tree = self.__find_closest_goal_tree(user_anon_tree,
                                                       canon_nodes_number)
        log.info(
            f'Chosen goal anon tree:\n{get_code_from_tree(goal_anon_tree.tree)}'
        )

        # We can have graph_anon_tree = None
        if graph_anon_tree and self.__go_through_graph(
                user_anon_tree, graph_anon_tree, goal_anon_tree):
            log.info(f'We are going through graph')
            return graph_anon_tree
        else:
            log.info(f'We are going directly to the goal')
            return goal_anon_tree
    def __get_top_n_candidates(
            top_n: int, nodes_number: int,
            nodes_numbers_dict: Dict[int, List[Any]]) -> List[Any]:
        """
        We want to have top_n trees with nodes number, that is close to the given nodes_number.
        So we consequently add vertices with node numbers equal:
        1. nodes_number
        2. nodes_number - 1, nodes_number + 1
        3. nodes_number - 2, nodes_number + 2
        4. ....
        until we reach top_n or have no more node_numbers to add
        """
        log.info(
            f'Start getting top_n candidates, top_n is {top_n}, nodes number is {nodes_number}'
        )
        candidates = []
        nodes_numbers_queue = collections.deque([nodes_number])

        lower_bound = nodes_number
        upper_bound = nodes_number
        max_nodes_number = max(nodes_numbers_dict.keys())
        min_nodes_number = min(nodes_numbers_dict.keys())

        while len(candidates) < top_n and nodes_numbers_queue:
            log.info(
                f'Start adding candidates.\n'
                f'Candidates len is {len(candidates)}, queue have {len(nodes_numbers_queue)} nodes numbers'
            )
            while nodes_numbers_queue:
                nodes_number = nodes_numbers_queue.pop()
                candidates += nodes_numbers_dict.get(nodes_number, [])

            log.info(
                f'Finish adding candidates.\n'
                f'Candidates len is {len(candidates)}, queue have {len(nodes_numbers_queue)} nodes numbers'
            )

            lower_bound -= 1
            if lower_bound >= min_nodes_number:
                log.info(
                    f'Append lower_bound to queue: {lower_bound}, min nodes number is {min_nodes_number}'
                )
                nodes_numbers_queue.append(lower_bound)

            upper_bound += 1
            if upper_bound <= max_nodes_number:
                log.info(
                    f'Append upper_bound to queue: {upper_bound}, max nodes number is {max_nodes_number}'
                )
                nodes_numbers_queue.append(upper_bound)

        log.info(
            f'Finish getting top_n candidates, top_n is {top_n}, candidates len is {len(candidates)}'
        )
        return candidates
Ejemplo n.º 17
0
    def __get_top_n_candidates(top_n: int,
                               nodes_number: int,
                               nodes_numbers_dict: Dict[int, List[Any]],
                               can_stop_earlier: bool = True,
                               to_use_lower_bound: bool = False) -> List[Any]:
        """
        We want to have top_n trees with nodes number, that is close to the given nodes_number.
        So we consequently add vertices with node numbers equal:
        1. nodes_number
        2. nodes_number - 1 (if lower bound is on), nodes_number + 1
        3. nodes_number - 2 (if lower bound is on), nodes_number + 2
        4. ....
        until we reach top_n or have no more node_numbers to add.
        If can_stop_earlier is True, we stop as soon as we far  from user nodes number at {0} nodes numbers
        """
        log.info(
            f'Start getting top_n candidates, top_n is {top_n}, nodes number is {nodes_number}'
        )
        candidates = []
        nodes_numbers_queue = collections.deque([nodes_number])
        indent = 0
        max_nodes_number = max(nodes_numbers_dict.keys())
        min_nodes_number = min(nodes_numbers_dict.keys())

        while len(candidates) < top_n and nodes_numbers_queue and \
                (not can_stop_earlier or indent <= PathFinderV4.max_tree_nodes_number_indent):
            log.info(
                f'Start adding candidates.\n'
                f'Candidates len is {len(candidates)}, queue have {len(nodes_numbers_queue)} nodes numbers'
            )
            while nodes_numbers_queue:
                new_nodes_number = nodes_numbers_queue.pop()
                candidates += nodes_numbers_dict.get(new_nodes_number, [])

            log.info(
                f'Finish adding candidates.\n'
                f'Candidates len is {len(candidates)}, queue have {len(nodes_numbers_queue)} nodes numbers'
            )

            indent += 1

            if to_use_lower_bound:
                lower_bound = nodes_number - indent
                if lower_bound >= min_nodes_number:
                    log.info(
                        f'Append lower_bound to queue: {lower_bound}, min nodes number is {min_nodes_number}'
                    )
                    nodes_numbers_queue.append(lower_bound)

            upper_bound = nodes_number + indent
            if upper_bound <= max_nodes_number:
                log.info(
                    f'Append upper_bound to queue: {upper_bound}, max nodes number is {max_nodes_number}'
                )
                nodes_numbers_queue.append(upper_bound)

        log.info(
            f'Finish getting top_n candidates, top_n is {top_n}, candidates len is {len(candidates)}'
        )
        return candidates