def test_invalid_modifier_combo(self):
        try:
            YaraString("my_string",
                       "potato",
                       string_type=TEXT_TYPE,
                       modifiers=[{
                           "keyword": XOR
                       }, {
                           "keyword": WIDE
                       }, {
                           "keyword": NO_CASE
                       }])

            self.fail(
                "An INVALID combination of YARA String modifiers DIDN'T RAISE YaraModifierRestrictionError! "
                "Restrictions are likely broken.")
        except YaraStringModifierRestrictionError:
            pass
    def _legacy_from_source_file_yara_python(cls, source_path=None):
        """Initialize YaraRule from sourcecode using the limited yara-python API."""
        try:
            # Compile the YARA source code (only way to get yara-python to parse the thing)
            yar_compiled = yara.compile(filepath=source_path)

            # Get the parsed source code via yara.Rules.match
            yar_src = yar_compiled.match(filepath=source_path)[0]

            name = yar_src.rule
            namespace = yar_src.namespace
            tags = yar_src.tags
            meta = [YaraMeta(identifier, value) for identifier, value in yar_src.meta.items()]
            strings = [YaraString(identifier, value.decode('utf-8')) for offset, identifier, value in yar_src.strings]

            # Get condition from the sourcecode file by hand due to it not being part of yara.Rules.
            condition = None
            this_is_the_condition = False
            with open(source_path, 'r') as f:
                for line in f.readlines():
                    if this_is_the_condition:
                        # Strip leading whitespace/indent.
                        for i in range(len(line)):
                            if line[i] == ' ':
                                continue
                            else:
                                condition = YaraCondition(line[i:].strip('\n'))
                                break
                        break

                    if 'condition' in line.lower():
                        # Next line will contain the actual condition, this one just has the declaration.
                        this_is_the_condition = True

            log.debug(condition)

            return cls(name, tags, meta, strings, condition, namespace=namespace)

        except Exception as exc:
            log.exception("YaraRule.from_source_file_yara_python exc", exc_info=exc)
            return None
 def test_valid_modifier_combo(self):
     try:
         ys = YaraString(
             "my_string",
             "potato",
             string_type=TEXT_TYPE,
             modifiers=[{
                 "keyword": ASCII
             }, {
                 "keyword": WIDE
             }, {
                 "keyword":
                 BASE64,
                 "data":
                 "!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu"
             }])
     except YaraStringModifierRestrictionError:
         self.fail(
             "A VALID combination of YARA String modifiers raised YaraModifierRestrictionError!"
         )
         pass
    def from_dict(cls, dct: dict):
        """
        Initialize YaraRule from a dict.

        :param dct: Dict on the form of:
                    {
                        name: str,
                        tags: List[str],
                        meta: {identifier, value, value_type},
                        strings: [{identifier, value, value_type, string_type, modifiers, modifier_str, str}]
                        condition: str
                    }.
        :return:
        """
        return cls(name=dct["name"],
                   tags=dct["tags"],
                   meta=[YaraMeta(ym["identifier"], ym["value"], ym["value_type"]) for ym in dct["meta"]],
                   strings=
                   [YaraString(ys["identifier"], ys["value"], ys["value_type"], ys["string_type"], ys["modifiers"])
                    for ys in dct["strings"]],
                   condition=dct["condition"])
    def test_base64_str_representation(self):
        """
        Test that the str(YaraString) returns the expected data in the expected format.
        :return:
        """
        try:
            correct = '$base64_string = "Test __str()__ call on YaraString w/ Base64 modifier." ' \
                      'base64(!@#$%^&*(){}[].,|ABCDEFGHIJ	LMNOPQRSTUVWXYZabcdefghijklmnopqrstu)'

            ys = YaraString(
                "base64_string",
                "Test __str()__ call on YaraString w/ Base64 modifier.",
                string_type=TEXT_TYPE,
                modifiers=[{
                    "keyword":
                    BASE64,
                    "data":
                    "!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu"
                }])

            self.assertEqual(str(ys), correct)
        except Exception as exc:
            self.fail("{}".format(exc))
                '{proto}://{host}:{port}'.format(
                    proto=("https" if CONFIG["hive_server_use_ssl"] else "http"),
                    host=CONFIG["hive_server"],
                    port=CONFIG["hive_port"]
                ), CONFIG["hive_api_key"])

            # Retrieve Observables in a separate API call (as they're not included in responder)
            observables_response = hive_api.get_case_observables(case_id)

            # Add observables to thehive:case as its own sub-dict
            case['observables'] = observables_response.json()

            strings = []
            for o in case["observables"]:
                # FIXME: Implement backend str type determination.
                strings.append(YaraString("observable_{md5sum}".format(
                    md5sum=md5(o["data"].encode("utf-8")).hexdigest()), o["data"]))

            # Append additional strings if specified in config.
            strings.extend(
                [
                    YaraString(
                        "observable_{md5sum}".format(
                            md5sum=md5(field.encode("utf-8")).hexdigest()),
                        case[field]) for field in CONFIG["hive_case_string_fields"]
                ]
            )

            all_tags = case["tags"]
            observables_tags = [t for li in [o["tags"] for o in case["observables"]] for t in li]
            all_tags.extend(observables_tags)
            all_unique_tags = list(set(all_tags))
    def from_compiled_file(cls, yara_rules: Union[yara.Rules, str],
                           source_filename=None, compiled_filepath=None,
                           condition: Union[str, YaraCondition] = None, rules_dir=RULES_DIR, timeout=60):
        """
        Initialize YaraRule from a compiled (binary) file.

        :param timeout:             If the match function does not finish before the specified number
                                    of seconds elapsed, a TimeoutError exception is raised.
        :param compiled_filepath:
        :param source_filename:
        :param condition:
        :param yara_rules: yara.Rules object or path to a compiled yara rules .bin
        :param rules_dir:
        :return:
        """
        # It looks like compiled YARA rules don't have a condition,
        # so we have to apply it ourselves or leave it blank.
        if isinstance(condition, str):
            condition = YaraCondition(condition)

        if isinstance(yara_rules, yara.Rules):
            if source_filename is None:
                raise ValueError("yara.Rules object was given, but source_filename was not set!")

            # Load rules from yara.Rules object.
            compiled_blob: yara.Rules = yara_rules
        elif isinstance(yara_rules, str):
            compiled_filepath = os.path.join(rules_dir, yara_rules + COMPILED_FILE_EXTENSION)
            # Set source filename.
            source_filename = yara_rules + SOURCE_FILE_EXTENSION

            # Load rules from file.
            compiled_blob: yara.Rules = yara.load(
                filepath=compiled_filepath)
        else:
            raise ValueError("yara_rules must be 'yara.Rules' object or 'str' filepath to a compiled yara rules .bin")

        # The match method returns a list of instances of the class Match.
        # Instances of this class have the same attributes as the dictionary passed to the callback function,
        # with the exception of 'matches' which is ONLY passed to the callback function!
        yara_match_callback = YaraMatchCallback()
        matches: yara.Match = compiled_blob.match(filepath=os.path.join(rules_dir, source_filename),
                                                  callback=yara_match_callback.callback,
                                                  timeout=timeout)

        meta = [YaraMeta(identifier, value) for identifier, value in yara_match_callback.meta.items()]
        namespace = yara_match_callback.namespace
        name = yara_match_callback.rule
        strings = [
            YaraString(identifier, value.decode('utf-8')) for offset, identifier, value in yara_match_callback.strings]
        tags = yara_match_callback.tags

        if not yara_match_callback.matches:
            log.error("Compiled YARA does *NOT* match source code!")
            # raise
        else:
            log.info("Compiled YARA matches source code.")
            match = matches[0]
            log.info("match: {}".format(match))

        if isinstance(yara_rules, yara.Rules) and compiled_filepath is None:
            log.warning("yara.Rules object was given, but compiled_filepath was not set, "
                        "assuming same name as rule name!")
            compiled_filepath = os.path.join(rules_dir, name + COMPILED_FILE_EXTENSION)

        return cls(name, tags, meta, strings, condition,
                   namespace=namespace, compiled_blob=compiled_blob,
                   compiled_path=compiled_filepath, compiled_match_source=yara_match_callback.matches)
    def from_source_code(cls, source_code):
        """Initialize YaraRule from sourcecode using own custom written parser."""
        try:
            log.debug(source_code)

            constructor_line_pattern = re.compile(
                r"(?P<rule_keyword>rule)\s+(?P<rule_identifier>\w+)\s*"
                r"(?P<tag_body>(?P<tag_delimiter>:)\s*(?P<tags>[\s+\w]+))?\{(?P<rule_body>.*)\}",
                re.MULTILINE | re.DOTALL)

            rule_match = constructor_line_pattern.search(source_code)

            log.debug(rule_match)
            if not rule_match:
                raise ValueError("Rule did not match!\n{source}\n{match}".format(source=source_code, match=rule_match))

            log.debug("rule_match groupdict:\n{}".format(rule_match.groupdict()))

            name = rule_match.groupdict()["rule_identifier"]

            # Only add valid tags to tags list (apply some sanitation on the matched string).
            if rule_match.groupdict()["tags"]:
                tags = []
                for tag in rule_match.groupdict()["tags"].strip('\n').replace('\t', ' ').split(' '):
                    if tag != ' ' and tag != '':
                        tags.append(tag)

                # If no tags were added, set it to None for a more clean approach.
                if len(tags) == 0:
                    tags = None
            else:
                tags = None

            body = rule_match.groupdict()["rule_body"]

            log.debug("body:\n{}".format(body))

            # Seek thru the whole shebang until you match keyword:

            # Generate a string safe copy of the body, which won't contain irrelevant extra ':' chars etc.
            string_safe_body = cls.abstract_source_body(body)

            log.info("string-safe body:\n{}".format(string_safe_body))

            # Get index of meta and strings (if either is present)
            meta_index = string_safe_body.find("meta:")
            strings_index = string_safe_body.find("strings:")
            condition_index = string_safe_body.find("condition:")

            log.info("Meta @ {m}, Strings @ {s}, Condition @ {c}".format(
                m=meta_index, s=strings_index, c=condition_index))

            # Make a second pass with a pattern that doesn't use dotall, in order to better parse each sub-body,
            # FIXME: Check if meta can go after string in a rule (read: more headache if-spaghetti needed if so...)
            meta = None
            strings = None
            if meta_index > -1:
                if strings_index > -1:
                    # If we have strings, then that is our body part cutoff.
                    meta_body = body[meta_index+len("meta:"):strings_index]
                    log.info("meta body:\n{}".format(meta_body))
                else:
                    # If we don't have strings then condition will be our body part cutoff.
                    meta_body = body[meta_index+len("meta:"):condition_index]
                    log.info("meta body:\n{}".format(meta_body))

                # Parse meta body items into a list of regex match group dicts.:
                p = re.compile(
                    r"\s*(?P<full>(?P<identifier>\w+)\s*=\s*(?P<value>\".*\"|true|false|[0-9]*)).*",
                    re.MULTILINE)

                # Use finditer() to get a sequence of match objects, in order to get the groupdict for each match.
                match_dicts = [m.groupdict() for m in p.finditer(meta_body)]
                log.info("meta body match dict:\n{}".format(json.dumps(match_dicts, indent=4)))

                # Parse matched dicts into a list of YaraMeta objects.
                meta = []
                for d in match_dicts:
                    identifier = d["identifier"]
                    value = d["value"]
                    value_type = determine_value_type(value)

                    if value_type is str:
                        # If value type is a string, strip the redundant quotes,
                        # which will just make a mess of things.
                        value = value[1:-1]

                    meta.append(YaraMeta(identifier, value, value_type))

                log.info("Parsed YaraMeta objects:\n{}".format(json.dumps([repr(o) for o in meta], indent=4)))

            if strings_index > -1:
                strings_body = body[strings_index+len("strings:"):condition_index]
                log.info("strings body:\n{}".format(strings_body))

                # Parse strings programmatically (wildcard content makes regex approach exceedingly hard)
                parsed_string_dicts = cls.parse_strings_body(strings_body)
                log.info("Parsed YARA string dicts:\n{}".format(json.dumps(parsed_string_dicts, indent=4)))

                # Parse parsed YARA string dicts into a list of YaraString objects.
                strings = [
                    YaraString(d["identifier"], d["value"], determine_value_type(d["value"]), d["string_type"],
                               d["modifiers"]) for d in parsed_string_dicts
                ]

            # Parse raw condition gathered from abstraction.
            # Strip leading and trailing whitespace and offset the start past the constructor,
            # in order to avoid false positives when searching for non-separator chars.
            raw_condition = body[condition_index:].lstrip()[len("condition:"):].rstrip()

            separators = [' ', '\n', '\t']
            uniform_separator = ' '
            condition_str = ""
            inside_condition_content = False

            # Sanitize the raw condition which can have unpredictable spacing/indent.
            for i in range(len(raw_condition)):
                # Seek until we get an actual non-separator character.
                if not inside_condition_content and raw_condition[i] not in separators:
                    condition_str += raw_condition[i]
                    inside_condition_content = True
                elif inside_condition_content:
                    if raw_condition[i] in separators and raw_condition[i-1] not in separators:
                        # If this is a separator and there is no separator behind us, then add a uniform one.
                        condition_str += uniform_separator
                    elif raw_condition[i] not in separators:
                        condition_str += raw_condition[i]

            condition = YaraCondition(condition_str)

            parsed_source = {
                "name": name,
                "tags": tags,
                "meta": [repr(o) for o in meta] if isinstance(meta, list) else None,
                "strings": [repr(o) for o in strings] if isinstance(strings, list) else None,
                "condition": repr(condition)
            }
            log.info("parsed_source:\n{}".format(json.dumps(parsed_source, indent=4)))

            return cls(name, tags, meta, strings, condition)

        except Exception as exc:
            log.exception("YaraRule.from_source_file exc", exc_info=exc)
            return None