def test_invalid_modifier_combo(self): try: YaraString("my_string", "potato", string_type=TEXT_TYPE, modifiers=[{ "keyword": XOR }, { "keyword": WIDE }, { "keyword": NO_CASE }]) self.fail( "An INVALID combination of YARA String modifiers DIDN'T RAISE YaraModifierRestrictionError! " "Restrictions are likely broken.") except YaraStringModifierRestrictionError: pass
def _legacy_from_source_file_yara_python(cls, source_path=None): """Initialize YaraRule from sourcecode using the limited yara-python API.""" try: # Compile the YARA source code (only way to get yara-python to parse the thing) yar_compiled = yara.compile(filepath=source_path) # Get the parsed source code via yara.Rules.match yar_src = yar_compiled.match(filepath=source_path)[0] name = yar_src.rule namespace = yar_src.namespace tags = yar_src.tags meta = [YaraMeta(identifier, value) for identifier, value in yar_src.meta.items()] strings = [YaraString(identifier, value.decode('utf-8')) for offset, identifier, value in yar_src.strings] # Get condition from the sourcecode file by hand due to it not being part of yara.Rules. condition = None this_is_the_condition = False with open(source_path, 'r') as f: for line in f.readlines(): if this_is_the_condition: # Strip leading whitespace/indent. for i in range(len(line)): if line[i] == ' ': continue else: condition = YaraCondition(line[i:].strip('\n')) break break if 'condition' in line.lower(): # Next line will contain the actual condition, this one just has the declaration. this_is_the_condition = True log.debug(condition) return cls(name, tags, meta, strings, condition, namespace=namespace) except Exception as exc: log.exception("YaraRule.from_source_file_yara_python exc", exc_info=exc) return None
def test_valid_modifier_combo(self): try: ys = YaraString( "my_string", "potato", string_type=TEXT_TYPE, modifiers=[{ "keyword": ASCII }, { "keyword": WIDE }, { "keyword": BASE64, "data": "!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu" }]) except YaraStringModifierRestrictionError: self.fail( "A VALID combination of YARA String modifiers raised YaraModifierRestrictionError!" ) pass
def from_dict(cls, dct: dict): """ Initialize YaraRule from a dict. :param dct: Dict on the form of: { name: str, tags: List[str], meta: {identifier, value, value_type}, strings: [{identifier, value, value_type, string_type, modifiers, modifier_str, str}] condition: str }. :return: """ return cls(name=dct["name"], tags=dct["tags"], meta=[YaraMeta(ym["identifier"], ym["value"], ym["value_type"]) for ym in dct["meta"]], strings= [YaraString(ys["identifier"], ys["value"], ys["value_type"], ys["string_type"], ys["modifiers"]) for ys in dct["strings"]], condition=dct["condition"])
def test_base64_str_representation(self): """ Test that the str(YaraString) returns the expected data in the expected format. :return: """ try: correct = '$base64_string = "Test __str()__ call on YaraString w/ Base64 modifier." ' \ 'base64(!@#$%^&*(){}[].,|ABCDEFGHIJ LMNOPQRSTUVWXYZabcdefghijklmnopqrstu)' ys = YaraString( "base64_string", "Test __str()__ call on YaraString w/ Base64 modifier.", string_type=TEXT_TYPE, modifiers=[{ "keyword": BASE64, "data": "!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu" }]) self.assertEqual(str(ys), correct) except Exception as exc: self.fail("{}".format(exc))
'{proto}://{host}:{port}'.format( proto=("https" if CONFIG["hive_server_use_ssl"] else "http"), host=CONFIG["hive_server"], port=CONFIG["hive_port"] ), CONFIG["hive_api_key"]) # Retrieve Observables in a separate API call (as they're not included in responder) observables_response = hive_api.get_case_observables(case_id) # Add observables to thehive:case as its own sub-dict case['observables'] = observables_response.json() strings = [] for o in case["observables"]: # FIXME: Implement backend str type determination. strings.append(YaraString("observable_{md5sum}".format( md5sum=md5(o["data"].encode("utf-8")).hexdigest()), o["data"])) # Append additional strings if specified in config. strings.extend( [ YaraString( "observable_{md5sum}".format( md5sum=md5(field.encode("utf-8")).hexdigest()), case[field]) for field in CONFIG["hive_case_string_fields"] ] ) all_tags = case["tags"] observables_tags = [t for li in [o["tags"] for o in case["observables"]] for t in li] all_tags.extend(observables_tags) all_unique_tags = list(set(all_tags))
def from_compiled_file(cls, yara_rules: Union[yara.Rules, str], source_filename=None, compiled_filepath=None, condition: Union[str, YaraCondition] = None, rules_dir=RULES_DIR, timeout=60): """ Initialize YaraRule from a compiled (binary) file. :param timeout: If the match function does not finish before the specified number of seconds elapsed, a TimeoutError exception is raised. :param compiled_filepath: :param source_filename: :param condition: :param yara_rules: yara.Rules object or path to a compiled yara rules .bin :param rules_dir: :return: """ # It looks like compiled YARA rules don't have a condition, # so we have to apply it ourselves or leave it blank. if isinstance(condition, str): condition = YaraCondition(condition) if isinstance(yara_rules, yara.Rules): if source_filename is None: raise ValueError("yara.Rules object was given, but source_filename was not set!") # Load rules from yara.Rules object. compiled_blob: yara.Rules = yara_rules elif isinstance(yara_rules, str): compiled_filepath = os.path.join(rules_dir, yara_rules + COMPILED_FILE_EXTENSION) # Set source filename. source_filename = yara_rules + SOURCE_FILE_EXTENSION # Load rules from file. compiled_blob: yara.Rules = yara.load( filepath=compiled_filepath) else: raise ValueError("yara_rules must be 'yara.Rules' object or 'str' filepath to a compiled yara rules .bin") # The match method returns a list of instances of the class Match. # Instances of this class have the same attributes as the dictionary passed to the callback function, # with the exception of 'matches' which is ONLY passed to the callback function! yara_match_callback = YaraMatchCallback() matches: yara.Match = compiled_blob.match(filepath=os.path.join(rules_dir, source_filename), callback=yara_match_callback.callback, timeout=timeout) meta = [YaraMeta(identifier, value) for identifier, value in yara_match_callback.meta.items()] namespace = yara_match_callback.namespace name = yara_match_callback.rule strings = [ YaraString(identifier, value.decode('utf-8')) for offset, identifier, value in yara_match_callback.strings] tags = yara_match_callback.tags if not yara_match_callback.matches: log.error("Compiled YARA does *NOT* match source code!") # raise else: log.info("Compiled YARA matches source code.") match = matches[0] log.info("match: {}".format(match)) if isinstance(yara_rules, yara.Rules) and compiled_filepath is None: log.warning("yara.Rules object was given, but compiled_filepath was not set, " "assuming same name as rule name!") compiled_filepath = os.path.join(rules_dir, name + COMPILED_FILE_EXTENSION) return cls(name, tags, meta, strings, condition, namespace=namespace, compiled_blob=compiled_blob, compiled_path=compiled_filepath, compiled_match_source=yara_match_callback.matches)
def from_source_code(cls, source_code): """Initialize YaraRule from sourcecode using own custom written parser.""" try: log.debug(source_code) constructor_line_pattern = re.compile( r"(?P<rule_keyword>rule)\s+(?P<rule_identifier>\w+)\s*" r"(?P<tag_body>(?P<tag_delimiter>:)\s*(?P<tags>[\s+\w]+))?\{(?P<rule_body>.*)\}", re.MULTILINE | re.DOTALL) rule_match = constructor_line_pattern.search(source_code) log.debug(rule_match) if not rule_match: raise ValueError("Rule did not match!\n{source}\n{match}".format(source=source_code, match=rule_match)) log.debug("rule_match groupdict:\n{}".format(rule_match.groupdict())) name = rule_match.groupdict()["rule_identifier"] # Only add valid tags to tags list (apply some sanitation on the matched string). if rule_match.groupdict()["tags"]: tags = [] for tag in rule_match.groupdict()["tags"].strip('\n').replace('\t', ' ').split(' '): if tag != ' ' and tag != '': tags.append(tag) # If no tags were added, set it to None for a more clean approach. if len(tags) == 0: tags = None else: tags = None body = rule_match.groupdict()["rule_body"] log.debug("body:\n{}".format(body)) # Seek thru the whole shebang until you match keyword: # Generate a string safe copy of the body, which won't contain irrelevant extra ':' chars etc. string_safe_body = cls.abstract_source_body(body) log.info("string-safe body:\n{}".format(string_safe_body)) # Get index of meta and strings (if either is present) meta_index = string_safe_body.find("meta:") strings_index = string_safe_body.find("strings:") condition_index = string_safe_body.find("condition:") log.info("Meta @ {m}, Strings @ {s}, Condition @ {c}".format( m=meta_index, s=strings_index, c=condition_index)) # Make a second pass with a pattern that doesn't use dotall, in order to better parse each sub-body, # FIXME: Check if meta can go after string in a rule (read: more headache if-spaghetti needed if so...) meta = None strings = None if meta_index > -1: if strings_index > -1: # If we have strings, then that is our body part cutoff. meta_body = body[meta_index+len("meta:"):strings_index] log.info("meta body:\n{}".format(meta_body)) else: # If we don't have strings then condition will be our body part cutoff. meta_body = body[meta_index+len("meta:"):condition_index] log.info("meta body:\n{}".format(meta_body)) # Parse meta body items into a list of regex match group dicts.: p = re.compile( r"\s*(?P<full>(?P<identifier>\w+)\s*=\s*(?P<value>\".*\"|true|false|[0-9]*)).*", re.MULTILINE) # Use finditer() to get a sequence of match objects, in order to get the groupdict for each match. match_dicts = [m.groupdict() for m in p.finditer(meta_body)] log.info("meta body match dict:\n{}".format(json.dumps(match_dicts, indent=4))) # Parse matched dicts into a list of YaraMeta objects. meta = [] for d in match_dicts: identifier = d["identifier"] value = d["value"] value_type = determine_value_type(value) if value_type is str: # If value type is a string, strip the redundant quotes, # which will just make a mess of things. value = value[1:-1] meta.append(YaraMeta(identifier, value, value_type)) log.info("Parsed YaraMeta objects:\n{}".format(json.dumps([repr(o) for o in meta], indent=4))) if strings_index > -1: strings_body = body[strings_index+len("strings:"):condition_index] log.info("strings body:\n{}".format(strings_body)) # Parse strings programmatically (wildcard content makes regex approach exceedingly hard) parsed_string_dicts = cls.parse_strings_body(strings_body) log.info("Parsed YARA string dicts:\n{}".format(json.dumps(parsed_string_dicts, indent=4))) # Parse parsed YARA string dicts into a list of YaraString objects. strings = [ YaraString(d["identifier"], d["value"], determine_value_type(d["value"]), d["string_type"], d["modifiers"]) for d in parsed_string_dicts ] # Parse raw condition gathered from abstraction. # Strip leading and trailing whitespace and offset the start past the constructor, # in order to avoid false positives when searching for non-separator chars. raw_condition = body[condition_index:].lstrip()[len("condition:"):].rstrip() separators = [' ', '\n', '\t'] uniform_separator = ' ' condition_str = "" inside_condition_content = False # Sanitize the raw condition which can have unpredictable spacing/indent. for i in range(len(raw_condition)): # Seek until we get an actual non-separator character. if not inside_condition_content and raw_condition[i] not in separators: condition_str += raw_condition[i] inside_condition_content = True elif inside_condition_content: if raw_condition[i] in separators and raw_condition[i-1] not in separators: # If this is a separator and there is no separator behind us, then add a uniform one. condition_str += uniform_separator elif raw_condition[i] not in separators: condition_str += raw_condition[i] condition = YaraCondition(condition_str) parsed_source = { "name": name, "tags": tags, "meta": [repr(o) for o in meta] if isinstance(meta, list) else None, "strings": [repr(o) for o in strings] if isinstance(strings, list) else None, "condition": repr(condition) } log.info("parsed_source:\n{}".format(json.dumps(parsed_source, indent=4))) return cls(name, tags, meta, strings, condition) except Exception as exc: log.exception("YaraRule.from_source_file exc", exc_info=exc) return None