예제 #1
0
 def test_canon_distance_correctness(self, src_vertex, subtests):
     src_canon_tree, = get_trees(
         all_fragments[INDICES_BY_VERTEX[src_vertex][0]], {TREE_TYPE.CANON})
     for dst_vertex in VERTEX:
         with subtests.test():
             dst_canon_tree, = get_trees(
                 all_fragments[INDICES_BY_VERTEX[dst_vertex][0]],
                 {TREE_TYPE.CANON})
             real_dist = GumTreeDiff.create_tmp_files_and_get_diffs_number(
                 src_canon_tree, dst_canon_tree)
             assert real_dist == canon_distance[src_vertex][dst_vertex]
예제 #2
0
 def test_anon_distance_correctness(self, subtests):
     fragments = [
         fragment_0, fragment_1, fragment_2, fragment_3, fragment_4,
         fragment_5
     ]
     for i, src_fragment in enumerate(fragments):
         src_anon_tree, = get_trees(src_fragment, {TREE_TYPE.ANON})
         for j, dst_fragment in enumerate(fragments):
             with subtests.test():
                 dst_anon_tree, = get_trees(dst_fragment, {TREE_TYPE.ANON})
                 real_dist = GumTreeDiff.create_tmp_files_and_get_diffs_number(
                     src_anon_tree, dst_anon_tree)
                 assert real_dist == anon_distance[i][
                     j], f'Dists are not equal: {i}, {j}'
def drop_same_anon_trees(df: pd.DataFrame) -> pd.DataFrame:
    log.info(f'Start dropping same anon trees, df size is {len(df)}')
    df.index = np.arange(0, len(df))
    df_anon_trees = df[CODE_TRACKER_COLUMN.FRAGMENT.value].apply(
        lambda f: get_trees(f, {TREE_TYPE.ANON})[0]).to_list()
    indices_to_drop = []
    i = 0
    while i < len(df_anon_trees):
        log.info(f'Handling {i}/{len(df_anon_trees)} anon tree')
        current_anon_tree = df_anon_trees[i]
        j = i + 1
        while j < len(df_anon_trees):
            next_anon_tree = df_anon_trees[j]
            if are_asts_equal(current_anon_tree, next_anon_tree):
                log.info(f'Dropping {j} anon tree')
                # We have to add len(indices_to_drop) because every time the index j is dropped,
                # corresponding anon_tree is deleted, so indices are shifted by one.
                indices_to_drop.append(j + len(indices_to_drop))
                del df_anon_trees[j]
            else:
                j += 1
        i += 1

    df.drop(df.index[indices_to_drop], inplace=True)
    df.index = np.arange(0, len(df))
    log.info(f'Stop dropping same anon trees, df size is {len(df)}')
    return df
예제 #4
0
 def from_source(cls, source: str, rate: Optional[float], task: Optional[TASK] = None,
                 language: consts.LANGUAGE = consts.LANGUAGE.PYTHON) -> Code:
     anon_tree, canon_tree = get_trees(source, {TREE_TYPE.ANON, TREE_TYPE.CANON})
     if rate is None:
         if task is None:
             log_and_raise_error('Cannot find rate without task: both are None', log)
         rate = check_tasks([task], source, create_in_and_out_dict([task]), language)[0]
     return Code(anon_tree, canon_tree, rate, language)
 def __init__(self,
              source_code: Optional[str] = None,
              anon_tree: Optional[ast.AST] = None,
              canon_tree: Optional[ast.AST] = None):
     if source_code is not None:
         self._orig_tree, self._anon_tree, self._canon_tree = get_trees(
             source_code, TREE_TYPE.get_all_types_set())
     else:
         self._orig_tree, self._anon_tree, self._canon_tree = None, anon_tree, canon_tree
예제 #6
0
 def test_same_canon_trees_in_same_vertices(self, vertex: VERTEX, subtests):
     same_canon_fragments = [
         all_fragments[i] for i in INDICES_BY_VERTEX[vertex]
     ]
     canon_trees = [
         get_trees(f, {TREE_TYPE.CANON})[0] for f in same_canon_fragments
     ]
     for canon_tree_1, canon_tree_2 in itertools.product(canon_trees,
                                                         repeat=2):
         with subtests.test():
             assert are_asts_equal(canon_tree_1, canon_tree_2)
예제 #7
0
def get_expected_out(
        solutions: pd.DataFrame, start_index: int,
        end_index: int) -> Tuple[int, List[AtiItem], ast.AST, ast.AST]:
    ati_elements = []
    fragment = __get_column_value(solutions, start_index,
                                  CODE_TRACKER_COLUMN.FRAGMENT)
    anon_tree, canon_tree = get_trees(fragment,
                                      {TREE_TYPE.ANON, TREE_TYPE.CANON})
    for i in range(start_index, end_index):
        ati_elements.append(__get_ati_data(solutions, i))
    return end_index, ati_elements, anon_tree, canon_tree
예제 #8
0
 def test_different_canon_trees_in_different_vertices(self, subtests):
     # Take the first fragment from each vertex to get all fragments with different canon trees
     different_canon_fragments = [
         all_fragments[INDICES_BY_VERTEX[vertex][0]] for vertex in VERTEX
     ]
     canon_trees = [
         get_trees(f, {TREE_TYPE.CANON})[0]
         for f in different_canon_fragments
     ]
     for canon_tree_1, canon_tree_2 in zip(canon_trees,
                                           np.roll(canon_trees, 1)):
         with subtests.test():
             assert not are_asts_equal(canon_tree_1, canon_tree_2)
예제 #9
0
def __find_same_fragments(
        solutions: pd.DataFrame,
        start_index: int) -> Tuple[int, List[AtiItem], ast.AST, ast.AST]:
    i, ati_elements = start_index + 1, []
    __handle_current_ati(ati_elements, solutions, start_index)
    current_fragment = __get_column_value(solutions, start_index,
                                          consts.CODE_TRACKER_COLUMN.FRAGMENT)
    current_anon_tree, current_canon_tree = get_trees(
        current_fragment, {TREE_TYPE.ANON, TREE_TYPE.CANON})

    while i < solutions.shape[0] and __are_same_fragments(
            current_anon_tree, solutions, i):
        __handle_current_ati(ati_elements, solutions, i)
        i += 1
    return i, ati_elements, current_anon_tree, current_canon_tree
예제 #10
0
def __are_same_fragments(current_anon_tree: ast.AST, solutions: pd.DataFrame,
                         next_index: int) -> bool:
    fragment = __get_column_value(solutions, next_index,
                                  consts.CODE_TRACKER_COLUMN.FRAGMENT)
    next_anon_tree, = get_trees(fragment, {TREE_TYPE.ANON})
    return are_asts_equal(current_anon_tree, next_anon_tree)
예제 #11
0
import sys

sys.path.append('.')

from src.main.canonicalization.consts import TREE_TYPE
from src.main.canonicalization.canonicalization import get_trees
from src.main.canonicalization.diffs.gumtree import GumTreeDiff

src_source = 'a = 5'
dst_source = 'a = 6'
src_anon, = get_trees(src_source, {TREE_TYPE.ANON})
dst_anon, = get_trees(dst_source, {TREE_TYPE.ANON})
GumTreeDiff.create_tmp_files_and_get_diffs_number(src_anon, dst_anon)
예제 #12
0
def __get_code_by_source(source: str, is_goal: bool = False) -> Code:
    anon_tree, canon_tree = get_trees(source, {TREE_TYPE.ANON, TREE_TYPE.CANON})
    rate = 0 if not is_goal else TEST_RESULT.FULL_SOLUTION.value
    return Code(anon_tree=anon_tree, canon_tree=canon_tree, rate=rate)
예제 #13
0
def create_code_from_source(source: str,
                            rate: float = TEST_RESULT.CORRECT_CODE.value
                            ) -> Code:
    anon_tree, canon_tree = get_trees(source,
                                      {TREE_TYPE.ANON, TREE_TYPE.CANON})
    return Code(anon_tree, canon_tree, rate)
def get_canonicalized_code_from_file(file: str) -> str:
    canon_tree, = get_trees(get_content_from_file(file), {TREE_TYPE.CANON})
    return get_code_from_tree(canon_tree).rstrip('\n')
def get_anonymized_code_from_file(file: str) -> str:
    anon_tree, = get_trees(get_content_from_file(file), {TREE_TYPE.ANON}, False)
    return get_code_from_tree(anon_tree).rstrip('\n')