コード例 #1
0
def test_vector_hash():
    cg = ChangeGraph()
    cn1 = ChangeNode(None, None, 'M1', ChangeNode.Kind.OPERATION_NODE, 0, sub_kind=ChangeNode.SubKind.OP_FUNC_CALL)
    cn2 = ChangeNode(None, None, '=', ChangeNode.Kind.OPERATION_NODE, 0, sub_kind=ChangeNode.SubKind.OP_ASSIGNMENT)
    cn3 = ChangeNode(None, None, 'M2', ChangeNode.Kind.OPERATION_NODE, 1, sub_kind=ChangeNode.SubKind.OP_FUNC_CALL)
    cn4 = ChangeNode(None, None, '=', ChangeNode.Kind.OPERATION_NODE, 1, sub_kind=ChangeNode.SubKind.OP_ASSIGNMENT)

    ChangeEdge.create(LinkType.MAP, cn1, cn3)
    ChangeEdge.create(LinkType.MAP, cn2, cn4)
    ChangeEdge.create(LinkType.PARAMETER, cn1, cn2)
    ChangeEdge.create(LinkType.PARAMETER, cn3, cn4)

    cg.nodes.update([cn1, cn2, cn3, cn4])

    fr = Fragment.create_from_node_pair([cn1, cn3])
    ext_fr = Fragment.create_extended(fr, ext_nodes=(cn2, cn4))

    vector_hash = ext_fr.vector.get_hash()
    assert vector_hash == normalize(27320942899360)
コード例 #2
0
    def _mine(self, graphs):
        # TODO: delete assign nodes?
        # TODO: collapse literals?

        label_to_node_pairs = {}
        for graph in graphs:
            if self.MIN_DATE and graph.repo_info.commit_dtm < self.MIN_DATE:
                continue

            for node in graph.nodes:
                if node.version != ChangeNode.Version.BEFORE_CHANGES or not node.mapped:
                    continue

                if not (node.kind == ChangeNode.Kind.OPERATION_NODE
                        and node.sub_kind == ChangeNode.SubKind.OP_FUNC_CALL):
                    # or node.kind == ChangeNode.Kind.CONTROL_NODE):
                    continue

                label = f'{node.label}~{node.mapped.label}'
                arr = label_to_node_pairs.setdefault(label, [])
                arr.append((node, node.mapped))

        logger.warning(f'Total pairs after the first step = {len(label_to_node_pairs.values())}')

        for num, pairs in enumerate(label_to_node_pairs.values()):
            logger.warning(f'Looking at node pair #{num + 1}')

            if len(pairs) < Pattern.MIN_FREQUENCY:
                logger.warning('Skipping...')
                continue

            fragments = set([Fragment.create_from_node_pair(pair) for pair in pairs])
            pattern = Pattern(fragments, len(fragments))
            pattern = pattern.extend()

            if pattern.is_change() and pattern.size >= self.MIN_PATTERN_SIZE:
                self.add_pattern(pattern)
                logger.warning(f'Pattern #{pattern.id} with size {pattern.size} was added')

            logger.warning(f'Done looking at node pair #{num + 1}')

        logger.warning(f'Done patterns\' mining, total count = {self._patterns_cnt}')

        self._filter_patterns()
        logger.warning(f'Done filtering, total count = {self._patterns_cnt}')

        if self.HIDE_OVERLAPPED_FRAGMENTS:
            logger.info('Removing overlapped fragments from patterns')
            for patterns in self._size_to_patterns.values():
                for pattern in patterns:
                    overlapped_fragments = Pattern.get_graph_overlapped_fragments(pattern.fragments)
                    for fragment in overlapped_fragments:
                        pattern.fragments.remove(fragment)
            logger.info('Done removing overlapped fragments from patterns')
コード例 #3
0
def _get_freq_group(fr):
    max_freq = 0
    freq_group = None

    label_to_ext_list = fr.get_label_to_ext_list()
    for label, ext_list in label_to_ext_list.items():
        ext_fragments = set()
        for ext in ext_list:
            ext_fr = Fragment.create_extended(fr, ext)
            ext_fragments.add(ext_fr)

            groups = Fragment.create_groups(ext_fragments)
            for num, group in enumerate(groups):
                freq = len(group)
                if freq > max_freq:
                    max_freq = freq
                    freq_group = group

                logger.log(logger.DEBUG, f'Elements in group #{num + 1} -> {len(group)}')

    return freq_group
コード例 #4
0
def test_fragment_label_to_ext_list():
    cg = ChangeGraph()
    cn1 = ChangeNode(None, None, 'getZoneByName', ChangeNode.Kind.OPERATION_NODE, 0, sub_kind=ChangeNode.SubKind.OP_FUNC_CALL)
    cn2 = ChangeNode(None, None, '=', ChangeNode.Kind.OPERATION_NODE, 0, sub_kind=ChangeNode.SubKind.OP_ASSIGNMENT)
    cn3 = ChangeNode(None, None, 'var', ChangeNode.Kind.DATA_NODE, 0, sub_kind=ChangeNode.SubKind.DATA_VARIABLE_DECL)
    cn4 = ChangeNode(None, None, 'getSettings', ChangeNode.Kind.OPERATION_NODE, 0, sub_kind=ChangeNode.SubKind.OP_FUNC_CALL)
    cn5 = ChangeNode(None, None, '=', ChangeNode.Kind.OPERATION_NODE, 0, sub_kind=ChangeNode.SubKind.OP_ASSIGNMENT)
    cn6 = ChangeNode(None, None, 'var', ChangeNode.Kind.DATA_NODE, 0, sub_kind=ChangeNode.SubKind.DATA_VARIABLE_DECL)
    cn7 = ChangeNode(None, None, 'update', ChangeNode.Kind.OPERATION_NODE, 0, sub_kind=ChangeNode.SubKind.OP_FUNC_CALL)

    ChangeEdge.create(LinkType.PARAMETER, cn1, cn2)     # getZoneByName -para> =
    ChangeEdge.create(LinkType.DEFINITION, cn2, cn3)    # = -def> var
    ChangeEdge.create(LinkType.DEFINITION, cn1, cn3)    # getZoneByName -def> var
    ChangeEdge.create(LinkType.PARAMETER, cn4, cn5)     # getSettings -para> =
    ChangeEdge.create(LinkType.DEFINITION, cn4, cn6)    # getSettings -para> var
    ChangeEdge.create(LinkType.DEFINITION, cn5, cn6)    # = -para> var
    ChangeEdge.create(LinkType.PARAMETER, cn4, cn7)     # = -para> update
    ChangeEdge.create(LinkType.PARAMETER, cn5, cn7)     # = -para> update
    ChangeEdge.create(LinkType.PARAMETER, cn6, cn7)     # = -para> update

    c2n1 = ChangeNode(None, None, 'get_fw_zone_settings', ChangeNode.Kind.OPERATION_NODE, 1, sub_kind=ChangeNode.SubKind.OP_FUNC_CALL)
    c2n2 = ChangeNode(None, None, '=', ChangeNode.Kind.OPERATION_NODE, 1, sub_kind=ChangeNode.SubKind.OP_ASSIGNMENT)
    c2n3 = ChangeNode(None, None, 'var', ChangeNode.Kind.DATA_NODE, 1, sub_kind=ChangeNode.SubKind.DATA_VARIABLE_DECL)
    c2n4 = ChangeNode(None, None, 'var', ChangeNode.Kind.DATA_NODE, 1, sub_kind=ChangeNode.SubKind.DATA_VARIABLE_USAGE)
    c2n5 = ChangeNode(None, None, 'var', ChangeNode.Kind.DATA_NODE, 1, sub_kind=ChangeNode.SubKind.DATA_VARIABLE_DECL)
    c2n6 = ChangeNode(None, None, 'update_fw_settings', ChangeNode.Kind.OPERATION_NODE, 1, sub_kind=ChangeNode.SubKind.OP_FUNC_CALL)

    ChangeEdge.create(LinkType.PARAMETER, c2n1, c2n2)   # get_fw_zone_settings -para> =
    ChangeEdge.create(LinkType.PARAMETER, c2n2, c2n6)   # = -para> update_fw_settings
    ChangeEdge.create(LinkType.DEFINITION, c2n2, c2n3)  # = -def> var
    ChangeEdge.create(LinkType.DEFINITION, c2n2, c2n5)  # = -def> var
    ChangeEdge.create(LinkType.REFERENCE, c2n3, c2n4)   # var -ref> var
    ChangeEdge.create(LinkType.PARAMETER, c2n3, c2n6)   # var -para> update_fw_settings
    ChangeEdge.create(LinkType.PARAMETER, c2n4, c2n6)   # var -para> update_fw_settings
    ChangeEdge.create(LinkType.PARAMETER, c2n5, c2n6)   # var -para> update_fw_settings

    # ---

    ChangeEdge.create(LinkType.MAP, cn1, c2n1)          # getZoneByName -> get_fw_zone_settings
    ChangeEdge.create(LinkType.MAP, cn2, c2n2)          # = -> =
    ChangeEdge.create(LinkType.MAP, cn7, c2n6)          # update -> update_fw_settings
    cn1.mapped = c2n1
    c2n1.mapped = cn1
    cn2.mapped = c2n2
    c2n2.mapped = cn2
    cn7.mapped = c2n6
    c2n6.mapped = cn7

    cg.nodes.update([cn1, cn2, cn3, cn4, cn5, cn6, cn7])
    cg.nodes.update([c2n1, c2n2, c2n3, c2n4, c2n5, c2n6])

    # 4 v1 get_fw_zone_settings operation.method-call
    # 5 v0 getZoneByName operation.method-call
    # 6 v0 getSettings operation.method-call
    # 7 v1 var data.variable-decl
    # 8 v1 = operation.assignment
    # 9 v0 = operation.assignment
    # 10 v0 = operation.assignment
    # 11 v1 var data.variable-decl
    # 1 v0 update operation.method-call
    # 12 v0 var data.variable-decl
    # 2 v1 update_fw_settings operation.method-call
    # 13 v1 var data.variable-usage
    # 3 v0 var data.variable-decl

    # for node in cg.nodes:
    #     logger.log(logger.WARNING, node)
    #
    # export_graph_image(cg)

    # --- --- --- --- ---

    fr = Fragment.create_from_node_pair([cn1, c2n1])
    group = _get_freq_group(fr)

    p = Pattern(group, freq=None)
    print(p.fragments)
コード例 #5
0
def main():
    logger.info('------------------------------ Starting ------------------------------')

    if settings.get('use_stackimpact', required=False):
        _ = stackimpact.start(
            agent_key=settings.get('stackimpact_agent_key'),
            app_name='CodeChangesMiner',
            debug=True,
            app_version=str(datetime.datetime.now())
        )

    sys.setrecursionlimit(2**31-1)
    multiprocessing.set_start_method('spawn', force=True)

    parser = argparse.ArgumentParser()
    parser.add_argument('mode', help=f'One of {RunModes.ALL}', type=str)
    args, _ = parser.parse_known_args()

    current_mode = args.mode

    if current_mode == RunModes.BUILD_PY_FLOW_GRAPH:
        parser.add_argument('-i', '--input', help='Path to source code file', type=str, required=True)
        parser.add_argument('-o', '--output', help='Path to output file', type=str, default='pyflowgraph.dot')
        parser.add_argument('--no-closure', action='store_true')
        parser.add_argument('--show-deps', action='store_true')
        parser.add_argument('--hide-op-kinds', action='store_true')
        parser.add_argument('--show-data-keys', action='store_true')
        args = parser.parse_args()

        fg = pyflowgraph.build_from_file(
            args.input, show_dependencies=args.show_deps, build_closure=not args.no_closure)
        pyflowgraph.export_graph_image(
            fg, args.output, show_op_kinds=not args.hide_op_kinds, show_data_keys=args.show_data_keys)
    elif current_mode == RunModes.BUILD_CHANGE_GRAPH:
        parser.add_argument('-s', '--src', help='Path to source code before changes', type=str, required=True)
        parser.add_argument('-d', '--dest', help='Path to source code after changes', type=str, required=True)
        parser.add_argument('-o', '--output', help='Path to output file', type=str, default='changegraph.dot')
        args = parser.parse_args()

        fg = changegraph.build_from_files(args.src, args.dest)
        changegraph.export_graph_image(fg, args.output)
    elif current_mode == RunModes.COLLECT_CHANGE_GRAPHS:
        GitAnalyzer().build_change_graphs()
    elif current_mode == RunModes.MINE_PATTERNS:
        parser.add_argument('-s', '--src', help='Path to source code before changes', type=str, nargs='+')
        parser.add_argument('-d', '--dest', help='Path to source code after changes', type=str, nargs='+')
        parser.add_argument('--fake-mining', action='store_true')
        args = parser.parse_args()

        if args.src or args.dest or args.fake_mining:
            if not args.src or len(args.src) != len(args.dest):
                raise ValueError('src and dest have different size or unset')

            change_graphs = []
            for old_path, new_path in zip(args.src, args.dest):
                methods = []
                for n, path in enumerate([old_path, new_path]):
                    with open(path, 'r+') as f:
                        src = f.read()
                        methods.append(Method(path, 'test_name', ast.parse(src, mode='exec').body[0], src))

                mock_commit_dtm = datetime.datetime.now(tz=datetime.timezone.utc)
                repo_info = RepoInfo(
                    'mock repo path', 'mock repo name', 'mock repo url', 'mock hash', mock_commit_dtm,
                    'mock old file path', 'mock new file path', methods[0], methods[1])

                cg = changegraph.build_from_files(old_path, new_path, repo_info=repo_info)
                change_graphs.append(cg)

            miner = Miner()
            if args.fake_mining:
                for cg in change_graphs:
                    fragment = Fragment()
                    fragment.graph = cg
                    fragment.nodes = cg.nodes
                    pattern = Pattern([fragment])
                    miner.add_pattern(pattern)
            else:
                miner.mine_patterns(change_graphs)
            miner.print_patterns()
        else:
            storage_dir = settings.get('change_graphs_storage_dir')
            file_names = os.listdir(storage_dir)

            logger.warning(f'Found {len(file_names)} files in storage directory')

            change_graphs = []
            for file_num, file_name in enumerate(file_names):
                file_path = os.path.join(storage_dir, file_name)
                try:
                    with open(file_path, 'rb') as f:
                        graphs = pickle.load(f)

                    for graph in graphs:
                        change_graphs.append(pickle.loads(graph))
                except:
                    logger.warning(f'Incorrect file {file_path}')

                if file_num % 1000 == 0:
                    logger.warning(f'Loaded [{1+file_num}/{len(file_names)}] files')
            logger.warning('Pattern mining has started')

            miner = Miner()
            try:
                miner.mine_patterns(change_graphs)
            except KeyboardInterrupt:
                logger.warning('KeyboardInterrupt: mined patterns will be stored before exit')

            miner.print_patterns()
    else:
        raise ValueError