Esempio n. 1
0
    def _is_pmatch_reach_threshold(self, lib):
        lib_signature_num = int(lib.split("|")[3])

        LOGGER.debug("shrink percentage (before matching): %s, %f", lib, self._get_shrink_percentage(
            self._pmatch_app_classes[lib], lib_signature_num))

        if self._get_shrink_percentage(self._pmatch_app_classes[lib], lib_signature_num) < self.shrink_threshold:
            return False

        return True
Esempio n. 2
0
    def _check_if_library_match(self, lib, lib_name, class_num, signature_num, assume_flattened_package=False):
        start_time = time.time()
        weight, matched_classes_pairs = self._match_relationship_graph_for_lib(
            lib, lib_name, int(class_num), assume_flattened_package)
        end_time = time.time()

        LOGGER.debug("graph matching time: %fs",
                            end_time - start_time)

        matched_app_classes = set(pair[1]
                                    for pair in matched_classes_pairs)

        shrink_percentage = self._get_shrink_percentage(
            matched_app_classes, signature_num)

        LOGGER.debug("matched weight: %f", weight)
        LOGGER.debug("shrink percentage: %f", shrink_percentage)
        LOGGER.debug("matched classes pairs: %s", matched_classes_pairs)

        if shrink_percentage > self.shrink_threshold:
            matched_root_package = self._get_root_package(
                matched_app_classes)
            return self._check_package_lib_match(
                lib_name, matched_root_package, matched_classes_pairs, int(class_num), int(signature_num))
        else:
            return False
Esempio n. 3
0
    def _get_lib_match_similarity(self, matched_classes_pairs, lib_name, lib_class_num, lib_signature_num):

        LOGGER.debug("matched_app_classes: %d",
                          len(matched_classes_pairs))

        LOGGER.debug(matched_classes_pairs)
        
        matched_app_classes, package_classes = self._get_package_classes_within_call_graph(
            matched_classes_pairs, lib_name)

        shrink_percentage = self._get_shrink_percentage(
            matched_app_classes, lib_signature_num)
        
        LOGGER.debug("shrink percentage (after): %f", shrink_percentage)

        if shrink_percentage < self.shrink_threshold:
            return 0

        package_classes = [
            c for c in package_classes if self._classes_signatures[c]]

        divide_classes_num = min(len(package_classes), lib_class_num)

        similarity = len(matched_app_classes) / \
            float(divide_classes_num) if divide_classes_num else 0
        self._lib_shrink_percentage[lib_name] = self._get_shrink_percentage(
            package_classes, lib_signature_num)

        LOGGER.debug("matching info: %s -> %s: %d, %d, %d, %f", self.filename, lib_name, len(matched_app_classes), lib_class_num, len(package_classes), similarity)
        FILE_LOGGER.debug("%s -> %s: %d, %d, %d, %f", self.filename, lib_name, len(
            matched_app_classes), lib_class_num, len(package_classes), similarity)

        return similarity
Esempio n. 4
0
    def _match_libraries(self):
        self._get_possible_matches()

        library_matching_start = time.time()
        LOGGER.info("Start matching libraries ...")
        for lib in tqdm(self._pmatch_app_classes):
            # lib = "lib_name|root_package|class_num|sig_num|category|"
            [lib_name, root_package, class_num,
                signature_num, category, _] = lib.split("|")
            self._lib_info[lib_name] = [root_package, category]

            is_match = self._check_if_library_match(lib, lib_name, class_num, signature_num)
            
            if not is_match and self.consider_classes_repackaging:
                LOGGER.debug("Try matching considering class repackaging")
                LOGGER.debug("---------------------------------------------------")
                self._check_if_library_match(lib, lib_name, class_num, signature_num, True)

        library_matching_end = time.time()

        LOGGER.info("Libraries matching finished. Duration: %fs", library_matching_end - library_matching_start)
Esempio n. 5
0
    def _match_relationship_graph_for_lib(self, lib, lib_name, lib_class_num, assume_flattened_package=False):
        LOGGER.debug("lib_name: %s", lib_name)

        lib_class_names = set(self._pmatch_lib_classes[lib])
        app_class_names = set(self._pmatch_app_classes[lib])
        potential_class_matches = set(self._pmatch_lib_app_classes[lib])

        lib_method_calls, lib_interfaces, lib_superclasses = self._get_relationship_between_classes(
            lib_class_names, lib_name)
        app_method_calls, app_interfaces, app_superclasses = self._get_relationship_between_classes(
            app_class_names)

        app_class_weights = dict()
        for class_name in app_class_names:
            app_class_weights[class_name] = 1.0 / lib_class_num + \
                0.0001 * len(self._classes_signatures[class_name])

        childless_packages = set()
        if self.consider_classes_repackaging:
            childless_packages = set(os.path.dirname(
                cn) for cn in app_class_names if not self._check_package_has_subpackage(os.path.dirname(cn)))

        LOGGER.debug("potential matches: %d, lib calls: %d, method_calls: %d", len(
            potential_class_matches), len(lib_method_calls), len(app_method_calls))
        
        return match(lib_classnames=lib_class_names,
                     app_classnames=app_class_names,
                     potential_class_matches=potential_class_matches,
                     lib_method_calls=lib_method_calls,
                     app_method_calls=app_method_calls,
                     app_class_weights=app_class_weights,
                     lib_class_parents=lib_superclasses,
                     app_class_parents=app_superclasses,
                     lib_class_interfaces=lib_interfaces,
                     app_class_interfaces=app_interfaces,
                     use_pkg_hierarchy=not self.consider_classes_repackaging,
                     assume_flattened_package=assume_flattened_package,
                     flattened_app_pkgs_allowed=childless_packages)
Esempio n. 6
0
    def get_formatted_method_descriptor(self, encoded_method, class_descriptor, method_descriptor=None):
        """Replace all obfuscatable names with X
        
        Args:
            encoded_method (dvm.EncodedMethod): The encoded method parsed by Androidguard.
            class_descriptor (str): The class descriptor.
            method_descriptor (str, optional): Defaults to None. The method descriptor.
        
        Returns:
            str: Formatted method descriptor.
        """
        descriptor = method_descriptor if method_descriptor else encoded_method.get_descriptor()
        LOGGER.debug("descriptor: %s", descriptor)
        splits = re.split(r"\(|\)", descriptor)
        input_types = splits[1].split(' ')
        return_types = splits[2].split(' ')
        types = filter(None, set(input_types).union(return_types))

        for _type in types:
            if _type[-1] == ";" and _type not in config.ANDROID_SDK_CLASSES:
                descriptor = descriptor.replace(_type, "X")

        return "%s%s" % (class_descriptor, descriptor)
Esempio n. 7
0
    def _check_package_lib_match(self, lib_name, package, matched_classes_pairs, lib_class_num, lib_signature_num):
        similarity = self._get_lib_match_similarity(
            matched_classes_pairs, lib_name, lib_class_num, lib_signature_num)

        LOGGER.debug("similarity: %s : %f", lib_name, similarity)

        if similarity > self.similarity_threshold:
            lib_name_base = lib_name.split("_")[0] + "_"

            # If there are libraries already matched to the package
            if package in self._package_libs_matches:
                existed_lib = [
                    lib for lib in self._package_libs_matches[package] if lib.startswith(lib_name_base)]
                # If libraries with the same name have matched to the package
                if existed_lib:
                    if abs(similarity - self._libs_matches[existed_lib[0]]) < 0.0001:
                        self._bind_lib_to_package(
                            lib_name, similarity, package)
                    elif similarity > self._libs_matches[existed_lib[0]]:
                        for lib in existed_lib:
                            del self._libs_matches[lib]
                            for _package in self._lib_packages_matches[lib]:
                                self._package_libs_matches[_package].remove(
                                    lib)
                            del self._lib_packages_matches[lib]

                        self._bind_lib_to_package(
                            lib_name, similarity, package)

                else:
                    self._bind_lib_to_package(lib_name, similarity, package)
            else:
                self._bind_lib_to_package(lib_name, similarity, package)

            return True

        return False
Esempio n. 8
0
def match(lib_classnames,
          app_classnames,
          potential_class_matches,
          lib_method_calls,
          app_method_calls,
          app_class_weights,
          lib_class_parents=None,
          app_class_parents=None,
          lib_class_interfaces=None,
          app_class_interfaces=None,
          use_pkg_hierarchy=True,
          assume_flattened_package=False,
          flattened_app_pkgs_allowed=None,
          use_call_graph_constraints=True):

    m = Model("")

    # If the log level is DEBUG
    if LOGGER.getEffectiveLevel() == 10:
        LOGGER.debug('%d lib classes, %d app classes', len(lib_classnames),
                     len(app_classnames))
        LOGGER.debug('%d lib methods, %d app methods', len(lib_method_calls),
                     len(app_method_calls))
    else:
        m.setParam('OutputFlag', False)

    class_match_vars = {}
    lib_class_match_count_exprs = {}
    app_class_match_count_exprs = {}
    for pcm in potential_class_matches:
        class_match_vars[pcm] = m.addVar(vtype=GRB.BINARY)
        (lib_class, app_class) = pcm

        if lib_class not in lib_class_match_count_exprs:
            lib_class_match_count_exprs[lib_class] = LinExpr(0)
        lib_class_match_count_exprs[lib_class] += class_match_vars[pcm]

        if app_class not in app_class_match_count_exprs:
            app_class_match_count_exprs[app_class] = LinExpr(0)
        app_class_match_count_exprs[app_class] += class_match_vars[pcm]

    for expr in lib_class_match_count_exprs.itervalues():
        m.addConstr(expr <= 1)

    for expr in app_class_match_count_exprs.itervalues():
        m.addConstr(expr <= 1)

    app_class_used_vars = {}
    for app_class in app_classnames:
        app_class_used_vars[app_class] = m.addVar(vtype=GRB.BINARY)
        if app_class in app_class_match_count_exprs:
            m.addConstr(app_class_used_vars[app_class] ==
                        app_class_match_count_exprs[app_class])
        else:
            m.addConstr(app_class_used_vars[app_class] == 0)

    LOGGER.debug('Method matching...')

    methods_matched_total_expr = LinExpr(0)
    if use_call_graph_constraints:
        method_matching_candidates = [
            cand for cand in get_method_matching_candidates(
                lib_method_calls, app_method_calls)
        ]

        lib_method_match_count_exprs = {}
        app_method_match_count_exprs = {}

        for lib_method_call in lib_method_calls:
            lib_method_match_count_exprs[lib_method_call] = LinExpr(0)

        for app_method_call in app_method_calls:
            app_method_match_count_exprs[app_method_call] = LinExpr(0)

        method_matching_vars = {}
        for mm in method_matching_candidates:
            lib_method_call = mm[0]
            app_method_call = mm[1]
            lib_app_class1 = (lib_method_call.class1, app_method_call.class1)
            lib_app_class2 = (lib_method_call.class2, app_method_call.class2)
            if lib_app_class1 in class_match_vars and lib_app_class2 in class_match_vars:
                method_matching_vars[mm] = m.addVar(vtype=GRB.BINARY)
                m.addConstr(method_matching_vars[mm] <=
                            class_match_vars[lib_app_class1])
                m.addConstr(method_matching_vars[mm] <=
                            class_match_vars[lib_app_class2])

                lib_method_match_count_exprs[
                    lib_method_call] += method_matching_vars[mm]
                app_method_match_count_exprs[
                    app_method_call] += method_matching_vars[mm]

                methods_matched_total_expr += 1 * method_matching_vars[mm]

        LOGGER.debug('Done')

        for expr in lib_method_match_count_exprs.itervalues():
            m.addConstr(expr <= 1)

        for app_method_call, expr in app_method_match_count_exprs.iteritems():
            app_method_class1 = app_method_call.class1
            app_method_class2 = app_method_call.class2
            tmp = m.addVar(vtype=GRB.BINARY)
            m.addConstr(tmp == and_(app_class_used_vars[app_method_class1],
                                    app_class_used_vars[app_method_class2]))
            m.addConstr(expr == tmp)

    if use_pkg_hierarchy:
        lib_pkg_parent_dict = {}
        lib_class_pkg_dict = {}
        app_pkg_parent_dict = {}
        app_class_pkg_dict = {}
        process_class_hierarchy(lib_classnames, lib_pkg_parent_dict,
                                lib_class_pkg_dict, ROOT_PKG)
        process_class_hierarchy(app_classnames, app_pkg_parent_dict,
                                app_class_pkg_dict, ROOT_PKG)

        LOGGER.debug(lib_pkg_parent_dict)
        LOGGER.debug(app_pkg_parent_dict)
        LOGGER.debug(lib_class_pkg_dict)
        LOGGER.debug(app_class_pkg_dict)

        lib_pkg_match_cnt_exprs = {}
        app_pkg_match_cnt_exprs = {}

        all_lib_pkgs = list(lib_pkg_parent_dict.keys()) + [ROOT_PKG]
        all_app_pkgs = list(app_pkg_parent_dict.keys()) + [ROOT_PKG]

        LOGGER.debug('All lib packages: %s', all_lib_pkgs)
        LOGGER.debug('All app packages: %s', all_app_pkgs)

        potential_package_matches = list(
            itertools.product(all_lib_pkgs, all_app_pkgs))

        package_matches_vars = {}
        for (lib_pkg, app_pkg) in potential_package_matches:
            match_var = m.addVar(vtype=GRB.BINARY,
                                 name=('%s/%s' % (lib_pkg, app_pkg)))
            package_matches_vars[(lib_pkg, app_pkg)] = match_var

            if lib_pkg not in lib_pkg_match_cnt_exprs:
                lib_pkg_match_cnt_exprs[lib_pkg] = LinExpr(0)
            lib_pkg_match_cnt_exprs[lib_pkg] += match_var

            if app_pkg not in app_pkg_match_cnt_exprs:
                app_pkg_match_cnt_exprs[app_pkg] = LinExpr(0)
            app_pkg_match_cnt_exprs[app_pkg] += match_var

        # Every lib package can be matched to at most one app package
        for expr in lib_pkg_match_cnt_exprs.itervalues():
            m.addConstr(expr <= 1)

        # Every app package can be matched to at most one lib package
        for expr in app_pkg_match_cnt_exprs.itervalues():
            m.addConstr(expr <= 1)

        # Packages can only match if their parent packages match too
        for (lib_pkg, app_pkg) in potential_package_matches:
            if lib_pkg == ROOT_PKG or app_pkg == ROOT_PKG:
                continue
            lib_parent_pkg = lib_pkg_parent_dict[lib_pkg]
            app_parent_pkg = app_pkg_parent_dict[app_pkg]
            match_var = package_matches_vars[(lib_pkg, app_pkg)]
            if (lib_parent_pkg, app_parent_pkg) in package_matches_vars:
                parent_match_var = package_matches_vars[(lib_parent_pkg,
                                                         app_parent_pkg)]
                m.addConstr(match_var <= parent_match_var)
            else:
                m.addConstr(match_var == 0)

        # Classes can only match if their packages also match
        for pcm in potential_class_matches:
            (lib_class, app_class) = pcm
            lib_class_pkg = lib_class_pkg_dict[lib_class]
            app_class_pkg = app_class_pkg_dict[app_class]
            ppm = (lib_class_pkg, app_class_pkg)

            if ppm in potential_package_matches:
                m.addConstr(class_match_vars[pcm] <= package_matches_vars[ppm])
            else:
                m.addConstr(class_match_vars[pcm] == 0)

    elif assume_flattened_package:

        app_pkg_parent_dict = {}
        app_class_pkg_dict = {}
        process_class_hierarchy(app_classnames, app_pkg_parent_dict,
                                app_class_pkg_dict, ROOT_PKG)

        app_pkg_active_vars = {}
        active_pkgs_cnt_expr = LinExpr(0)

        if flattened_app_pkgs_allowed is None:
            flattened_app_pkgs_allowed = app_pkg_parent_dict.keys()
        else:
            flattened_app_pkgs_allowed = [
                '/' + pkg for pkg in flattened_app_pkgs_allowed
            ]

        for pkg in flattened_app_pkgs_allowed:
            app_pkg_active_vars[pkg] = m.addVar(vtype=GRB.BINARY,
                                                name=('%s' % pkg))
            active_pkgs_cnt_expr += app_pkg_active_vars[pkg]

        m.addConstr(active_pkgs_cnt_expr <= 1)

        for pcm in potential_class_matches:
            (lib_class, app_class) = pcm
            app_class_pkg = app_class_pkg_dict[app_class]

            if app_class_pkg in app_pkg_active_vars:
                m.addConstr(
                    class_match_vars[pcm] <= app_pkg_active_vars[app_class_pkg]
                )
            else:
                m.addConstr(class_match_vars[pcm] == 0)

    app_parents_and_interf_matched_expr = LinExpr(0)

    # Superclass matching
    if lib_class_parents:
        for pcm in potential_class_matches:
            (lib_class, app_class) = pcm
            parent_lib = lib_class_parents[
                lib_class] if lib_class in lib_class_parents else None
            parent_app = app_class_parents[
                app_class] if app_class in app_class_parents else None
            if parent_lib:
                if parent_app:
                    parents_match = (parent_lib, parent_app)
                    if parents_match in class_match_vars.keys():
                        if not assume_flattened_package or (
                                basename(lib_class) == basename(parent_lib) and
                                basename(app_class) == basename(parent_app)):
                            m.addConstr(class_match_vars[pcm] <=
                                        class_match_vars[parents_match])
                    else:
                        m.addConstr(class_match_vars[pcm] == 0)
                else:
                    m.addConstr(class_match_vars[pcm] == 0)
            else:
                if parent_app:
                    m.addConstr(1 - class_match_vars[pcm] >=
                                app_class_match_count_exprs[parent_app])

        for app_class, app_class_parent in app_class_parents.iteritems():
            if app_class in app_class_used_vars and app_class_parent in app_class_used_vars:
                app_class_and_parent_matched = m.addVar(vtype=GRB.BINARY)
                m.addConstr(app_class_used_vars[app_class] >=
                            app_class_and_parent_matched)
                m.addConstr(app_class_used_vars[app_class_parent] >=
                            app_class_and_parent_matched)
                app_parents_and_interf_matched_expr += app_class_and_parent_matched

    # Interface matching
    if lib_class_interfaces:
        for pcm in potential_class_matches:
            (lib_class, app_class) = pcm
            interfaces_lib_class = lib_class_interfaces[
                lib_class] if lib_class in lib_class_interfaces else []
            interfaces_app_class = app_class_interfaces[
                app_class] if app_class in app_class_interfaces else []

            matched_interfaces_expr = LinExpr(0)
            for lib_interface in interfaces_lib_class:
                for app_interface in interfaces_app_class:
                    interfaces_match = (lib_interface, app_interface)
                    if interfaces_match in class_match_vars:
                        if not assume_flattened_package or (
                                basename(lib_class) == basename(lib_interface)
                                and basename(app_class)
                                == basename(app_interface)):
                            matched_interfaces_expr += class_match_vars[
                                interfaces_match]

            matched_lib_interfaces_expr = LinExpr(0)
            matched_app_interfaces_expr = LinExpr(0)
            for lib_interface in interfaces_lib_class:
                if lib_interface in lib_class_match_count_exprs:
                    matched_lib_interfaces_expr += lib_class_match_count_exprs[
                        lib_interface]
            for app_interface in interfaces_app_class:
                if app_interface in app_class_match_count_exprs:
                    matched_app_interfaces_expr += app_class_match_count_exprs[
                        app_interface]

            m.addConstr(
                2 * matched_interfaces_expr == matched_app_interfaces_expr +
                matched_lib_interfaces_expr)

        for app_class, app_class_interfaces in app_class_interfaces.iteritems(
        ):
            for interface in app_class_interfaces:
                if app_class in app_class_used_vars and interface in app_class_used_vars:
                    app_class_and_interface_matched = m.addVar(
                        vtype=GRB.BINARY)
                    m.addConstr(app_class_used_vars[app_class] >=
                                app_class_and_interface_matched)
                    m.addConstr(app_class_used_vars[interface] >=
                                app_class_and_interface_matched)
                    app_parents_and_interf_matched_expr += app_class_and_interface_matched

    objective_expr = LinExpr(0)

    if use_call_graph_constraints:
        objective_expr += 0.0001 * methods_matched_total_expr + 0.0001 * app_parents_and_interf_matched_expr

    for app_class in app_classnames:
        weight = app_class_weights[app_class]
        objective_expr += weight * app_class_used_vars[app_class]
    m.setObjective(objective_expr, GRB.MAXIMIZE)

    LOGGER.debug('Optimizing...')

    m.optimize()

    matched_app_classes = set()
    class_matches = set()
    for pcm in potential_class_matches:
        if class_match_vars[pcm].x > 0.5:
            class_matches.add(pcm)
            matched_app_classes.add(pcm[1])

    LOGGER.debug('Done')
    LOGGER.debug('Class matches: %s', class_matches)

    # If the log level is DEBUG
    if LOGGER.getEffectiveLevel() == 10:

        unmatched_lib_classes = set(lib_classnames)
        unmatched_app_classes = set(app_classnames)

        class_match_cnt = 0
        for pcm in potential_class_matches:
            if class_match_vars[pcm].x > 0.5:
                class_match_cnt += 1
                if pcm[0] != pcm[1]:
                    LOGGER.debug('Potentially wrong match: %s / %s' % pcm)
                    LOGGER.debug('Lib class methods: ')
                    for lm in lib_method_calls:
                        if lm[0] == pcm[0] or lm[1] == pcm[0]:
                            LOGGER.debug(lm)
                    LOGGER.debug('App class methods: ')
                    for am in app_method_calls:
                        if am[0] == pcm[1] or am[1] == pcm[1]:
                            LOGGER.debug(am)

                if pcm[0] in lib_classnames:
                    unmatched_lib_classes.remove(pcm[0])
                else:
                    LOGGER.debug('Missing lib class: %s' % pcm[0])
                if pcm[1] in app_classnames:
                    unmatched_app_classes.remove(pcm[1])
                else:
                    LOGGER.debug('Missing lib class: %s' % pcm[1])
        LOGGER.debug('%d classes matched', class_match_cnt)
        LOGGER.debug('Unmatched lib classes:')
        for cl in unmatched_lib_classes:
            LOGGER.debug(cl)

        LOGGER.debug('Unmatched app classes:')
        for cl in unmatched_app_classes:
            LOGGER.debug(cl)

        if use_call_graph_constraints:
            LOGGER.debug('Method matches:')
            method_match_cnt = 0
            for mm in method_matching_vars.keys():
                if method_matching_vars[mm].x > 0.5:
                    LOGGER.debug(mm)
                    method_match_cnt += 1

            LOGGER.debug('%d methods matched', method_match_cnt)

        if use_pkg_hierarchy:
            LOGGER.debug('Package matches:')
            package_match_cnt = 0
            for pm in package_matches_vars.keys():
                if package_matches_vars[pm].x > 0.5:
                    LOGGER.debug(pm)
                    package_match_cnt += 1
            LOGGER.debug('%d packages matched', package_match_cnt)

        LOGGER.debug('Active packages:')
        if assume_flattened_package:
            for pkg in flattened_app_pkgs_allowed:
                LOGGER.debug('%s: %s', pkg, app_pkg_active_vars[pkg].x)

        LOGGER.debug('Objective value: %0.4f', m.objval)

    return (m.objval, class_matches)
Esempio n. 9
0
                       type=str,
                       help='the folder that contains library profiles')

    return parser.parse_args()


if __name__ == '__main__':

    args = parse_arguments()

    if args.v:
        LOGGER.setLevel('DEBUG')
    else:
        LOGGER.setLevel('INFO')

    LOGGER.debug("args: %s", args)

    if args.subparser_name == 'profile':
        profile_binaries(base_path=args.d,
                         file_paths=args.f,
                         output_folder=args.o,
                         processes=args.p,
                         overwrite=args.w)
    else:
        search_libs_in_apps(lib_folder=args.ld,
                            lib_profiles=args.lf,
                            app_folder=args.ad,
                            app_profiles=args.af,
                            mode=MODE.ACCURATE if args.A else MODE.SCALABLE,
                            overwrite=args.w,
                            output_folder=args.o,
Esempio n. 10
0
    def _get_package_classes_within_call_graph(self, matched_classes_pairs, lib_name):
        # package could be '' if the root package is /
        package_classes = set()
        matched_app_classes = set(pair[1] for pair in matched_classes_pairs)

        for class_name in matched_app_classes:
            package_name = os.path.dirname(class_name)
            if package_name:
                package_classes.update(self._package_classes[package_name])
            else:
                package_classes.update(class_name)

        if self.mode == MODE.ACCURATE:
            graphs = [self._call_graph.subgraph(package_classes), self._interface_graph.subgraph(
                package_classes), self._superclass_graph.subgraph(package_classes)]
            USG = nx.compose_all(graphs).to_undirected()

            LOGGER.debug("Before removing ghost: %d", len(USG.nodes()))

            lib_ghost_graph = self.LIB_RELATIONSHIP_GRAPHS[lib_name][3]
            for pair in matched_classes_pairs:
                (lib_class, app_class) = pair

                if lib_class in lib_ghost_graph:
                    ghost_relations = lib_ghost_graph.out_edges(
                        lib_class, data=True)

                    for _, ghost_lib_class, info in ghost_relations:
                        relation_type = info["type"]
                        if app_class in graphs[relation_type]:
                            ghost_app_classes = set(graphs[relation_type].neighbors(
                                app_class)) - matched_app_classes

                            if not self.consider_classes_repackaging:
                                ghost_app_classes = set(c for c in ghost_app_classes if c.count(
                                    "/") - app_class.count("/") == ghost_lib_class.count("/") - lib_class.count("/"))
                            
                            if info["type"] == 0:
                                # Call graph
                                for ghost_app_class in ghost_app_classes:
                                    app_call_descriptors = set(
                                        m[:2] for m in graphs[0][app_class][ghost_app_class]["method"])
                                    lib_call_descriptors = set(info["method"])

                                    if ghost_app_class in USG and app_call_descriptors <= lib_call_descriptors:
                                        LOGGER.debug("Ghost app class found: [%d] %s, %s, %s, %s", 0, lib_class, app_class, ghost_lib_class, ghost_app_class)
                                        USG.remove_node(ghost_app_class)
                            else:
                                # Inheritance/Interface graph
                                if ghost_app_classes:
                                    LOGGER.debug("Ghost app classes found: [%d] %s, %s, %s, %s", info["type"], lib_class, app_class, ghost_lib_class, ghost_app_classes)
                                    USG.remove_nodes_from(ghost_app_classes)

            LOGGER.debug("After removing ghost: %d", len(USG.nodes()))
            
            ingraph_classes = set()
            for ssg in nx.connected_component_subgraphs(USG):
                nodes = ssg.nodes()
                matched_nodes = set(nodes).intersection(matched_app_classes)

                # If classes repackaging is considered, it is very possible to mismatch other classes inside the package
                # We set a threshold in this case to remove the influence
                # threshold = 0.05 if self.consider_classes_repackaging else 0

                threshold = 0

                if len(matched_nodes) > len(nodes) * threshold:
                    ingraph_classes.update(nodes)
                else:
                    matched_app_classes -= matched_nodes

            # Some matched_app_classes may not exist in call graph
            ingraph_classes.update(matched_app_classes)

            LOGGER.debug("matched_app_classes (after): %d", len(matched_app_classes))

            return matched_app_classes, ingraph_classes
        else:
            return matched_app_classes, package_classes