Ejemplo n.º 1
0
    def __init__(self, apk, core_library="androguard"):
        """

        :param apk: the filename of the apk.
        """
        core_library = core_library.lower()
        if core_library == "rizin":
            self.apkinfo = RizinImp(apk)
        elif core_library == "androguard":
            self.apkinfo = AndroguardImp(apk)
        else:
            raise ValueError(
                f"Unsupported core library for Quark: {core_library}")

        self.quark_analysis = QuarkAnalysis()
Ejemplo n.º 2
0
    def _worker_analysis(rule_obj):
        _quark.quark_analysis = QuarkAnalysis()
        _quark.run(rule_obj)

        # Pack analysis result
        def to_raw_method(methodobject):
            return (
                methodobject.class_name,
                methodobject.name,
                methodobject.descriptor,
            )

        reached_stage = rule_obj.check_item.count(True)
        level_4_result = tuple(
            to_raw_method(method)
            for method in _quark.quark_analysis.level_4_result)
        behavior_list = [(
            to_raw_method(item["parent"]),
            to_raw_method(item["first_call"]),
            to_raw_method(item["second_call"]),
        ) for item in _quark.quark_analysis.call_graph_analysis_list]

        return (
            reached_stage,
            level_4_result,
            behavior_list,
            _quark.quark_analysis.parent_wrapper_mapping,
        )
Ejemplo n.º 3
0
    def test_clean_result(self):
        analysis = QuarkAnalysis()
        analysis.level_1_result = ["123"]
        analysis.level_2_result = ["123"]
        analysis.level_3_result = ["123"]
        analysis.level_4_result = ["123"]
        analysis.level_5_result = ["123"]

        analysis.clean_result()

        assert analysis.level_1_result == []
        assert analysis.level_2_result == []
        assert analysis.level_3_result == []
        assert analysis.level_4_result == []
        assert analysis.level_5_result == []
Ejemplo n.º 4
0
    def test_init(self):
        analysis = QuarkAnalysis()
        summary_table_field_list = [
            "Filename",
            "Rule",
            "Confidence",
            "Score",
            "Weight",
        ]
        label_table_field_list = [
            "Label",
            "Description",
            "Number of rules",
            "MAX Confidence %",
        ]

        assert analysis.crime_description == ""
        assert analysis.first_api is None
        assert analysis.second_api is None
        assert analysis.level_1_result == []
        assert analysis.level_2_result == []
        assert analysis.level_3_result == []
        assert analysis.level_4_result == []
        assert analysis.level_5_result == []

        assert analysis.json_report == []
        assert analysis.weight_sum == 0
        assert analysis.score_sum == 0
        assert all(label for label in summary_table_field_list
                   if label in analysis.summary_report_table)
        assert all(label for label in label_table_field_list
                   if label in analysis.label_report_table)

        assert analysis.call_graph_analysis_list == []
        assert isinstance(analysis.parent_wrapper_mapping, defaultdict)
        assert len(analysis.parent_wrapper_mapping.items()) == 0
Ejemplo n.º 5
0
class Quark:
    """Quark module is used to check quark's five-stage theory"""

    def __init__(self, apk, core_library="androguard"):
        """

        :param apk: the filename of the apk.
        """
        core_library = core_library.lower()
        if core_library == "rizin":
            self.apkinfo = RizinImp(apk)
        elif core_library == "androguard":
            self.apkinfo = AndroguardImp(apk)
        else:
            raise ValueError(
                f"Unsupported core library for Quark: {core_library}"
            )

        self.quark_analysis = QuarkAnalysis()

    def find_previous_method(
        self, base_method, parent_function, wrapper, visited_methods=None
    ):
        """
        Find the method under the parent function, based on base_method before to parent_function.
        This will append the method into wrapper.

        :param base_method: the base function which needs to be searched.
        :param parent_function: the top-level function which calls the basic function.
        :param wrapper: list is used to track each function.
        :param visited_methods: set with tested method.
        :return: None
        """
        if visited_methods is None:
            visited_methods = set()

        method_set = self.apkinfo.upperfunc(base_method)
        visited_methods.add(base_method)

        if method_set is not None:

            if parent_function in method_set:
                wrapper.append(base_method)
            else:
                for item in method_set:
                    # prevent to test the tested methods.
                    if item in visited_methods:
                        continue
                    self.find_previous_method(
                        item, parent_function, wrapper, visited_methods
                    )

    def find_intersection(self, first_method_set, second_method_set, depth=1):
        """
        Find the first_method_list ∩ second_method_list.
        [MethodAnalysis, MethodAnalysis,...]

        :param first_method_set: first list that contains each MethodAnalysis.
        :param second_method_set: second list that contains each MethodAnalysis.
        :param depth: maximum number of recursive search functions.
        :return: a set of first_method_list ∩ second_method_list or None.
        """
        # Check both lists are not null

        if not first_method_set or not second_method_set:
            raise ValueError("Set is Null")
        # find ∩
        result = first_method_set & second_method_set
        if result:
            return result
        else:
            return self.method_recursive_search(
                depth, first_method_set, second_method_set
            )

    def method_recursive_search(
        self, depth, first_method_set, second_method_set
    ):
        # Not found same method usage, try to find the next layer.
        depth += 1
        if depth > MAX_SEARCH_LAYER:
            return None

        # Append first layer into next layer.
        next_level_set_1 = first_method_set.copy()
        next_level_set_2 = second_method_set.copy()

        # Extend the xref from function into next layer.
        for method in first_method_set:
            if self.apkinfo.upperfunc(method):
                next_level_set_1 = (
                    self.apkinfo.upperfunc(method) | next_level_set_1
                )
        for method in second_method_set:
            if self.apkinfo.upperfunc(method):
                next_level_set_2 = (
                    self.apkinfo.upperfunc(method) | next_level_set_2
                )

        return self.find_intersection(
            next_level_set_1, next_level_set_2, depth
        )

    def check_sequence(
        self, mutual_parent, first_method_list, second_method_list
    ):
        """
        Check if the first function appeared before the second function.

        :param mutual_parent: function that call the first function and second functions at the same time.
        :param first_method_list: the first show up function, which is a MethodAnalysis
        :param second_method_list: the second show up function, which is a MethodAnalysis
        :return: True or False
        """
        state = False

        for first_call_method in first_method_list:
            for second_call_method in second_method_list:

                seq_table = [
                    (call, number)
                    for call, number in self.apkinfo.lowerfunc(mutual_parent)
                    if call in (first_call_method, second_call_method)
                ]

                # sorting based on the value of the number
                if len(seq_table) < 2:
                    # Not Found sequence in same_method
                    continue
                seq_table.sort(key=operator.itemgetter(1))
                # seq_table would look like: [(getLocation, 1256), (sendSms, 1566), (sendSms, 2398)]

                method_list_need_check = [x[0] for x in seq_table]
                sequence_pattern_method = [
                    first_call_method,
                    second_call_method,
                ]

                if tools.contains(
                    sequence_pattern_method, method_list_need_check
                ):
                    state = True

                    # Record the mapping between the parent function and the wrapper method
                    self.quark_analysis.parent_wrapper_mapping[
                        mutual_parent.full_name
                    ] = self.apkinfo.get_wrapper_smali(
                        mutual_parent, first_call_method, second_call_method
                    )

        return state

    def check_parameter(
        self,
        parent_function,
        first_method_list,
        second_method_list,
        keyword_item_list=None,
    ):
        """
        Check the usage of the same parameter between two method.

        :param parent_function: function that call the first function and second functions at the same time.
        :param first_method_list: function which calls before the second method.
        :param second_method_list: function which calls after the first method.
        :return: True or False
        """
        state = False

        for first_call_method in first_method_list:
            for second_call_method in second_method_list:

                pyeval = PyEval(self.apkinfo)
                # Check if there is an operation of the same register

                for bytecode_obj in self.apkinfo.get_method_bytecode(
                    parent_function
                ):
                    # ['new-instance', 'v4', Lcom/google/progress/SMSHelper;]
                    instruction = [bytecode_obj.mnemonic]
                    if bytecode_obj.registers is not None:
                        instruction.extend(bytecode_obj.registers)
                    if bytecode_obj.parameter is not None:
                        instruction.append(bytecode_obj.parameter)

                    # for the case of MUTF8String
                    instruction = [str(x) for x in instruction]

                    if instruction[0] in pyeval.eval.keys():
                        pyeval.eval[instruction[0]](instruction)

                for table in pyeval.show_table():
                    for val_obj in table:

                        for c_func in val_obj.called_by_func:

                            first_method_pattern = f"{first_call_method.class_name}->{first_call_method.name}{first_call_method.descriptor}"
                            second_method_pattern = f"{second_call_method.class_name}->{second_call_method.name}{second_call_method.descriptor}"

                            if (
                                first_method_pattern in c_func
                                and second_method_pattern in c_func
                            ):
                                state = True

                                if keyword_item_list and any(
                                    keyword_item_list
                                ):
                                    self.check_parameter_values(
                                        c_func,
                                        (
                                            first_method_pattern,
                                            second_method_pattern,
                                        ),
                                        keyword_item_list,
                                    )

                                # Record the mapping between the parent function and the wrapper method
                                self.quark_analysis.parent_wrapper_mapping[
                                    parent_function.full_name
                                ] = self.apkinfo.get_wrapper_smali(
                                    parent_function,
                                    first_call_method,
                                    second_call_method,
                                )

                # Build for the call graph
                if state:
                    call_graph_analysis = {
                        "parent": parent_function,
                        "first_call": first_call_method,
                        "second_call": second_call_method,
                        "apkinfo": self.apkinfo,
                        "first_api": self.quark_analysis.first_api,
                        "second_api": self.quark_analysis.second_api,
                        "crime": self.quark_analysis.crime_description,
                    }
                    self.quark_analysis.call_graph_analysis_list.append(
                        call_graph_analysis
                    )

        return state

    @staticmethod
    def check_parameter_values(source_str, pattern_list, keyword_item_list):
        for pattern, keyword_item in zip(pattern_list, keyword_item_list):
            if keyword_item is None:
                continue

            start_index = source_str.index(pattern) + len(pattern)

            end_index = -1
            brackets_count = 1
            for idx, char in enumerate(source_str[start_index:]):
                if char == "(":
                    brackets_count += 1
                elif char == ")":
                    brackets_count -= 1

                if brackets_count == 0:
                    end_index = idx + start_index
                    break

            parameter_str = source_str[start_index:end_index]

            for keyword in keyword_item:
                if str(keyword) not in parameter_str:
                    return False

        return True

    def find_api_usage(self, class_name, method_name, descriptor_name):
        method_list = []

        # Source method
        source_method = self.apkinfo.find_method(
            class_name, method_name, descriptor_name
        )
        if source_method:
            return [source_method]

        # Potential Method
        potential_method_list = [
            method
            for method in self.apkinfo.all_methods
            if method.name == method_name
            and method.descriptor == descriptor_name
        ]

        potential_method_list = [
            method
            for method in potential_method_list
            if not next(self.apkinfo.get_method_bytecode(method), None)
        ]

        # Check if each method's class is a subclass of the given class
        for method in potential_method_list:
            current_class_set = {method.class_name}

            while not current_class_set.intersection(
                {class_name, "Ljava/lang/Object;"}
            ):
                next_class_set = set()
                for clazz in current_class_set:
                    next_class_set.update(
                        self.apkinfo.superclass_relationships[clazz]
                    )

                current_class_set = next_class_set

            current_class_set.discard("Ljava/lang/Object;")
            if current_class_set:
                method_list.append(method)

        return method_list

    def run(self, rule_obj):
        """
        Run the five levels check to get the y_score.

        :param rule_obj: the instance of the RuleObject.
        :return: None
        """
        self.quark_analysis.clean_result()
        self.quark_analysis.crime_description = rule_obj.crime

        # Level 1: Permission Check
        if self.apkinfo.ret_type == "DEX":
            rule_obj.check_item[0] = True
        elif set(rule_obj.permission).issubset(set(self.apkinfo.permissions)):
            rule_obj.check_item[0] = True
        else:
            # Exit if the level 1 stage check fails.
            return

        # Level 2: Single Native API Check
        api_1_method_name = rule_obj.api[0]["method"]
        api_1_class_name = rule_obj.api[0]["class"]
        api_1_descriptor = rule_obj.api[0]["descriptor"]

        api_2_method_name = rule_obj.api[1]["method"]
        api_2_class_name = rule_obj.api[1]["class"]
        api_2_descriptor = rule_obj.api[1]["descriptor"]

        first_api_list = self.find_api_usage(
            api_1_class_name, api_1_method_name, api_1_descriptor
        )
        second_api_list = self.find_api_usage(
            api_2_class_name, api_2_method_name, api_2_descriptor
        )

        if not first_api_list and not second_api_list:
            # Exit if the level 2 stage check fails.
            return

        else:
            rule_obj.check_item[1] = True

        if first_api_list:
            self.quark_analysis.level_2_result.append(first_api_list[0])
        if second_api_list:
            self.quark_analysis.level_2_result.append(second_api_list[0])

        # Level 3: Both Native API Check
        if not (first_api_list and second_api_list):
            # Exit if the level 3 stage check fails.
            return

        self.quark_analysis.first_api = first_api_list[0]
        self.quark_analysis.second_api = second_api_list[0]
        rule_obj.check_item[2] = True

        self.quark_analysis.level_3_result = [set(), set()]

        # Level 4: Sequence Check
        for first_api in first_api_list:
            for second_api in second_api_list:
                # Looking for the first layer of the upper function
                first_api_xref_from = self.apkinfo.upperfunc(first_api)
                second_api_xref_from = self.apkinfo.upperfunc(second_api)

                self.quark_analysis.level_3_result[0].update(
                    first_api_xref_from
                )
                self.quark_analysis.level_3_result[1].update(
                    second_api_xref_from
                )

                mutual_parent_function_list = self.find_intersection(
                    first_api_xref_from, second_api_xref_from
                )

                if mutual_parent_function_list is None:
                    # Exit if the level 4 stage check fails.
                    return
                for parent_function in mutual_parent_function_list:
                    first_wrapper = []
                    second_wrapper = []

                    self.find_previous_method(
                        first_api, parent_function, first_wrapper
                    )
                    self.find_previous_method(
                        second_api, parent_function, second_wrapper
                    )

                    if self.check_sequence(
                        parent_function, first_wrapper, second_wrapper
                    ):
                        rule_obj.check_item[3] = True
                        self.quark_analysis.level_4_result.append(
                            parent_function
                        )

                        keyword_item_list = (
                            rule_obj.api[i].get("keyword", None)
                            for i in range(2)
                        )

                        # Level 5: Handling The Same Register Check
                        if self.check_parameter(
                            parent_function,
                            first_wrapper,
                            second_wrapper,
                            keyword_item_list=keyword_item_list,
                        ):
                            rule_obj.check_item[4] = True
                            self.quark_analysis.level_5_result.append(
                                parent_function
                            )

    def get_json_report(self):
        """
        Get quark report including summary and detail with json format.

        :return: json report
        """

        w = Weight(
            self.quark_analysis.score_sum, self.quark_analysis.weight_sum
        )
        warning = w.calculate()

        # Filter out color code in threat level
        for level in ["Low Risk", "Moderate Risk", "High Risk"]:
            if level in warning:
                warning = level

        return {
            "md5": self.apkinfo.md5,
            "apk_filename": self.apkinfo.filename,
            "size_bytes": self.apkinfo.filesize,
            "threat_level": warning,
            "total_score": self.quark_analysis.score_sum,
            "crimes": self.quark_analysis.json_report,
        }

    def generate_json_report(self, rule_obj):
        """
        Show the json report.

        :param rule_obj: the instance of the RuleObject
        :return: None
        """
        # Count the confidence
        confidence = str(rule_obj.check_item.count(True) * 20) + "%"
        conf = rule_obj.check_item.count(True)
        weight = rule_obj.get_score(conf)
        score = rule_obj.score

        # Assign level 1 examine result
        permissions = rule_obj.permission if rule_obj.check_item[0] else []

        # Assign level 2 examine result
        api = []
        if rule_obj.check_item[1]:
            for item2 in self.quark_analysis.level_2_result:
                api.append(
                    {
                        "class": str(item2.class_name),
                        "method": str(item2.name),
                        "descriptor": str(item2.descriptor),
                    }
                )

        # Assign level 3 examine result
        combination = []
        if rule_obj.check_item[2]:
            combination = rule_obj.api

        # Assign level 4 - 5 examine result if exist
        sequnce_show_up = []
        same_operation_show_up = []

        # Check examination has passed level 4
        if self.quark_analysis.level_4_result and rule_obj.check_item[3]:
            for item4 in self.quark_analysis.level_4_result:
                sequnce_show_up.append(
                    {
                        item4.full_name: self.quark_analysis.parent_wrapper_mapping[
                            item4.full_name
                        ]
                    }
                )

            # Check examination has passed level 5
            if self.quark_analysis.level_5_result and rule_obj.check_item[4]:
                for item5 in self.quark_analysis.level_5_result:
                    same_operation_show_up.append(
                        {
                            item5.full_name: self.quark_analysis.parent_wrapper_mapping[
                                item5.full_name
                            ]
                        }
                    )

        crime = {
            "crime": rule_obj.crime,
            "score": score,
            "weight": weight,
            "confidence": confidence,
            "permissions": permissions,
            "native_api": api,
            "combination": combination,
            "sequence": sequnce_show_up,
            "register": same_operation_show_up,
        }
        self.quark_analysis.json_report.append(crime)

        # add the weight
        self.quark_analysis.weight_sum += weight
        # add the score
        self.quark_analysis.score_sum += score

    def add_table_row(self, name, rule_obj, confidence, score, weight):

        self.quark_analysis.summary_report_table.add_row(
            [
                name,
                green(rule_obj.crime),
                yellow(confidence),
                score,
                red(weight),
            ]
        )

    def show_summary_report(self, rule_obj, threshold=None):
        """
        Show the summary report.

        :param rule_obj: the instance of the RuleObject.
        :return: None
        """
        # Count the confidence
        confidence = f"{rule_obj.check_item.count(True) * 20}%"
        conf = rule_obj.check_item.count(True)
        weight = rule_obj.get_score(conf)
        score = rule_obj.score
        name = rule_obj.rule_filename

        if threshold:

            if rule_obj.check_item.count(True) * 20 >= int(threshold):
                self.add_table_row(name, rule_obj, confidence, score, weight)

        else:
            self.add_table_row(name, rule_obj, confidence, score, weight)

        # add the weight
        self.quark_analysis.weight_sum += weight
        # add the score
        self.quark_analysis.score_sum += score

    def show_label_report(self, rule_path, all_labels, table_version):
        """
        Show the report based on label, last column represents max confidence for that label
        :param rule_path: the path where may be present the file label_desc.csv.
        :param all_labels: dictionary containing label:<array of confidence values associated to that label>
        :return: None
        """
        label_desc = {}
        # clear table to manage max/detail version
        self.quark_analysis.label_report_table.clear()
        if os.path.isfile(os.path.join(rule_path, "label_desc.csv")):
            # associate to each label a description
            col_list = ["label", "description"]
            # csv file on form <label,description>
            # put this file in the folder of rules (it must not be a json file since it could create conflict with management of rules)
            df = pd.read_csv(
                os.path.join(rule_path, "label_desc.csv"), usecols=col_list
            )
            label_desc = dict(zip(df["label"], df["description"]))

        for label_name in all_labels:
            confidences = np.array(all_labels[label_name])

            if table_version == "max":
                self.quark_analysis.label_report_table.field_names = [
                    "Label",
                    "Description",
                    "Number of rules",
                    "MAX Confidence %",
                ]
                self.quark_analysis.label_report_table.add_row(
                    [
                        green(label_name),
                        yellow(label_desc.get(label_name, "-")),
                        (len(confidences)),
                        red(np.max(confidences)),
                    ]
                )
            else:
                self.quark_analysis.label_report_table.field_names = [
                    "Label",
                    "Description",
                    "Number of rules",
                    "MAX Confidence %",
                    "AVG Confidence",
                    "Std Deviation",
                    "# of Rules with Confidence >= 80%",
                ]
                self.quark_analysis.label_report_table.add_row(
                    [
                        green(label_name),
                        yellow(label_desc.get(label_name, "-")),
                        (len(confidences)),
                        red(np.max(confidences)),
                        magenta(round(np.mean(confidences), 2)),
                        lightblue(round(np.std(confidences), 2)),
                        lightyellow(np.count_nonzero(confidences >= 80)),
                    ]
                )

    def show_detail_report(self, rule_obj):
        """
        Show the detail report.

        :param rule_obj: the instance of the RuleObject.
        :return: None
        """

        # Count the confidence
        print("")
        print(f"Confidence: {rule_obj.check_item.count(True) * 20}%")
        print("")

        if rule_obj.check_item[0]:

            colorful_report("1.Permission Request")
            for permission in rule_obj.permission:
                print(f"\t\t {permission}")
        if rule_obj.check_item[1]:
            colorful_report("2.Native API Usage")
            for api in self.quark_analysis.level_2_result:
                print(f"\t\t {api.full_name}")
        if rule_obj.check_item[2]:
            colorful_report("3.Native API Combination")
            for numbered_api, method_list in zip(
                ("First API", "Second API"), self.quark_analysis.level_3_result
            ):
                print(f"\t\t {numbered_api} show up in:")
                if method_list:
                    for comb_method in method_list:
                        print(f"\t\t {comb_method.full_name}")
                else:
                    print("\t\t None")

        if rule_obj.check_item[3]:

            colorful_report("4.Native API Sequence")
            print("\t\t Sequence show up in:")
            for seq_method in self.quark_analysis.level_4_result:
                print(f"\t\t {seq_method.full_name}")
        if rule_obj.check_item[4]:

            colorful_report("5.Native API Use Same Parameter")
            for seq_operation in self.quark_analysis.level_5_result:
                print(f"\t\t {seq_operation.full_name}")

    def show_call_graph(self, output_format=None):
        print_info("Creating Call Graph...")
        for (
            call_graph_analysis
        ) in self.quark_analysis.call_graph_analysis_list:
            call_graph(call_graph_analysis, output_format)
        print_success("Call Graph Completed")

    def show_rule_classification(self):
        print_info("Rules Classification")

        data_bundle = get_rule_classification_data(
            self.quark_analysis.call_graph_analysis_list, MAX_SEARCH_LAYER
        )

        output_parent_function_table(data_bundle)
        output_parent_function_json(data_bundle)
        output_parent_function_graph(data_bundle)
Ejemplo n.º 6
0
 def __init__(self, apk):
     self.apkinfo = Apkinfo(apk)
     self._report = QuarkAnalysis()
     self._apkinfo_stack = []
     self._sequence_stack = []
     self._register_stack = []
Ejemplo n.º 7
0
class Quark:
    """Quark module is used to check quark's five-stage theory"""
    def __init__(self, apk):
        self.apkinfo = Apkinfo(apk)
        self._report = QuarkAnalysis()
        self._apkinfo_stack = []
        self._sequence_stack = []
        self._register_stack = []

    @property
    def report(self):
        return self._report

    def get_invoke_tree(self, method: MethodId, search_depth=3):
        tree = Tree(deep=search_depth, identifier=method.address)

        # Parent method with invoke address list
        tree.create_node(identifier=method, data=[])

        for _ in range(search_depth):
            for leaf in tree.leaves():
                uppers = self.apkinfo.find_upper_methods(leaf.identifier)
                for offset, upper in uppers:
                    bytecode = self.apkinfo.find_bytecode_by_addr(
                        upper.dexindex, offset)
                    if not tree.contains(upper):
                        tree.create_node(identifier=upper,
                                         data=[bytecode],
                                         parent=leaf)
                    else:
                        tree.get_node(upper).data.append(bytecode)

        return tree

    def check_register_in_method(self,
                                 method: MethodId,
                                 registers,
                                 start_bytecode=None,
                                 end_bytecode=None,
                                 reset_bytecodes=None):
        old_registers = copy(registers)
        # Fetch target ranger of instructions
        instructions = [
            ins for ins in self.apkinfo.get_function_bytecode(
                method, start_bytecode.address if start_bytecode else -1,
                end_bytecode.address if end_bytecode else -1)
        ]
        instructions.reverse()

        # Apply all opcode reversely and remove those were override
        TRANSITION_TYPE_1 = (
            # If destination register exists. It will appear at the least.
            # Otherwise , destination is the parameter or the return register.
            'invoke',
            'filled',
            'return')
        TRANSITION_TYPE_2 = (
            # First register is destination, second one is source.
            'move',
            'neg',
            'not',
            'int',
            'long',
            'float',
            'double',
            'array')
        NEW_TYPE = (
            # Given registers will be override.
            'const',
            'new')
        NOP_TYPE = (
            # Instructions needed to skip.
            'monitor',
            'instance',
            'goto',
            'if',
            'add',
            'sub',
            'rsub',
            'mul',
            'div',
            'rem',
            'and',
            'or',
            'xor',
            'shl',
            'shr',
            'ushr',
            'check',
            'cmp',
            'iget',
            'iput',
            'aget',
            'aput')

        reset_offsets = (bytecode.address for bytecode in reset_bytecodes)

        for ins in instructions:
            # print(f'{ins.address} {str(ins)}')

            # Combine two sets of registers if a reset offset comes
            if ins.address in reset_offsets:
                for reg_index in range(MAX_REG_COUNT):
                    registers[reg_index] = registers[
                        reg_index] ^ old_registers[reg_index]
                continue

            prefix = ins.mnemonic.split('-')[0]

            # Transition
            if prefix in TRANSITION_TYPE_1:
                if ins.parameter and registers[RETURN_REG_INDEX]:
                    # invoke-kind, filled-new-array
                    registers[RETURN_REG_INDEX] = False
                    for reg_index in ins.registers:
                        registers[reg_index] = True

            elif prefix in TRANSITION_TYPE_2:
                if len(ins.registers) > 1:
                    if registers[ins.registers[0]]:
                        registers[ins.registers[0]] = False
                        registers[ins.registers[1]] = True
                elif registers[ins.registers[0]]:
                    # move-result
                    registers[ins.registers[0]] = False
                    registers[RETURN_REG_INDEX] = True

            elif prefix in NEW_TYPE:
                for reg_index in ins.registers:
                    registers[reg_index] = False
            elif prefix not in NOP_TYPE:
                # TODO - warning
                pass

        return registers

    def check_register_downward(self, invoke_nodes: list, registers):
        # Check registers reversely from common parent to api
        if len(invoke_nodes) <= 2:
            return registers
        invoke_nodes.reverse()
        invoke_nodes.pop()  # Pop out the api

        while len(invoke_nodes) > 2 and any(registers):
            current_node = invoke_nodes.pop()
            first_bytecode = min(current_node.data)
            self.check_register_in_method(current_node.identifier,
                                          registers,
                                          start_bytecode=first_bytecode,
                                          reset_bytecodes=current_node.data)

        return registers

    def check_register_upward(self, invoke_nodes: list, registers):
        # Check registers reversely from api to common parent
        if len(invoke_nodes) <= 2:
            return registers
        invoke_nodes.pop()  # Popup api node

        while len(invoke_nodes) > 2 and any(registers):
            current_node = invoke_nodes.pop()
            least_bytecode = max(current_node.data)
            self.check_register_in_method(current_node.identifier,
                                          registers,
                                          end_bytecode=least_bytecode,
                                          reset_bytecodes=current_node.data)

        return registers

    def check_register(self, sequence: Sequence, registers=None):
        first_tree = sequence.tree_list[0]
        second_tree = sequence.tree_list[1]
        parent = sequence.parent

        first_node = [
            first_tree.get_node(method)
            for method in first_tree.rsearch(parent)
        ]
        second_node = [
            second_tree.get_node(method)
            for method in second_tree.rsearch(parent)
        ]

        if registers is None:
            # Setup the registers and adjust end_offset
            upper_node = second_node[-2]
            least_bytecode = max(upper_node.data)

            if not least_bytecode:
                logging.warning(
                    f'Unable fetch bytecode at {least_bytecode} with {upper_node.identifier}, skip this scanning.'
                )
                return [False for _ in range(MAX_REG_COUNT)]

            registers = [False for _ in range(MAX_REG_COUNT)]
            for reg_index in least_bytecode.registers:
                registers[reg_index] = True

        first_invoke_for_first_api = min(first_tree.get_node(parent).data)

        reset_offsets = second_tree.get_node(parent).data
        least_invoke_for_second_api = max(reset_offsets)

        if (first_invoke_for_first_api >= least_invoke_for_second_api):
            logging.error(
                f'Address for first api is less than address for second api @ {parent}'
            )
            return [False]

        registers = self.check_register_upward(second_node, registers)
        registers = self.check_register_in_method(parent, registers,
                                                  first_invoke_for_first_api,
                                                  least_invoke_for_second_api,
                                                  reset_offsets)
        registers = self.check_register_downward(first_node, registers)

        return registers

    def run_apkinfo_phase(self, behavior: Behavior):
        rule = behavior.related_rule

        # Stage 1 - Check Permission
        passed_permissions = (permission for permission in rule.permission
                              if permission in self.apkinfo.permissions)

        if len(list(passed_permissions)) != len(rule.permission):
            return CONF_STAGE_NONE

        api_object = []
        for api in rule.api:
            methods = self.apkinfo.find_methods(api['class'], api['method'],
                                                api['descriptor'])
            try:
                api_object.append(next(methods))
            except StopIteration:
                break

        behavior.api_objects = api_object

        # Stage 2 - All native apis exist
        return CONF_STAGE_1 if len(api_object) < len(
            rule.api) else CONF_STAGE_2

    def run_sequence_phase(self, behavior: Behavior):
        # Check if apis exist in the same call graph
        trees = [self.get_invoke_tree(api)
                 for api in behavior.api_objects]  # tree list

        # Test each combination of trees
        for first_index in range(len(trees)):
            for second_index in range(first_index + 1, len(trees)):
                first_tree = trees[first_index]
                second_tree = trees[second_index]

                first_all_methods = {
                    node.identifier
                    for node in first_tree.all_nodes()
                }
                second_all_methods = {
                    node.identifier
                    for node in second_tree.all_nodes()
                }
                common_parents = first_all_methods.intersection(
                    second_all_methods)

                # Stage 3 - Check combination
                # Stage 4 - Check sequence
                # Check invoke address
                passing_3_list = []
                passing_4_list = []
                for parent in common_parents:
                    # Test sequence of invoke addresses from two methods
                    first_bytecode_for_first_method = min(
                        first_tree.get_node(parent).data)
                    least_bytecode_for_second_method = max(
                        second_tree.get_node(parent).data)

                    cloned_behavior = copy(behavior)
                    cloned_behavior.sequence = Sequence(
                        parent, (trees[first_index], trees[second_index]))
                    if first_bytecode_for_first_method < least_bytecode_for_second_method:
                        passing_4_list.append(cloned_behavior)
                    else:
                        passing_3_list.append(cloned_behavior)

        return passing_3_list, passing_4_list

    def run_register_phase(self, behavior: Behavior):
        # Stage 5 - Handling the same register
        registers = self.check_register(behavior.sequence)

        if any(registers):
            critical_indexes = [
                index for index, is_critical in enumerate(registers)
                if is_critical
            ]

            behavior.registers = critical_indexes
            return CONF_STAGE_5
        else:
            return CONF_STAGE_4

    def analysis_rule(self, rule: QuarkRule):
        self.add_rule(rule)
        self.run_analysis()

    def add_rule(self, rule: QuarkRule):
        behavior = self._report.add_rule(rule)
        if behavior is None:
            return False

        self._apkinfo_stack.append(behavior)
        return True

    def run_analysis(self):
        while self._apkinfo_stack or self._register_stack:
            if self._apkinfo_stack:
                behavior = self._apkinfo_stack.pop()
                result = self.run_apkinfo_phase(behavior)

                if result != CONF_STAGE_2:
                    self._report.set_passed(behavior, result)
                    continue

                passing_3_list, passing_4_list = self.run_sequence_phase(
                    behavior)

                if passing_3_list or passing_4_list:
                    for passing in passing_3_list:
                        self._report.set_passed(passing, CONF_STAGE_3)

                    self._register_stack.extend(passing_4_list)
                    # for passing in passing_4_list:
                    #     self._report.set_passed(passing, CONF_STAGE_4)

                else:
                    self._report.set_passed(behavior, CONF_STAGE_2)

            if self._register_stack:
                behavior = self._register_stack.pop()
                result = self.run_register_phase(behavior)
                self._report.set_passed(behavior, result)

    def get_json_report(self):
        return {
            'md5': self.apkinfo.md5,
            'apk_filename': self.apkinfo.filename,
            'size_bytes': self.apkinfo.filesize,
            'threat_level': self._report.get_thread_level(),
            'total_score': self._report.weighted_sum,
            'crimes': self._report.get_json_report()
        }