def __init__(self): self.statusController = StatusController() self.filename = "" # set raw data of rule content, to be parsed by analyze* methods self.raw_header = "" self.raw_header_cleaned = "" self.raw_meta = "" self.raw_meta_cleaned = "" self.raw_strings = "" self.raw_strings_cleaned = "" self.raw_condition = "" self.raw_condition_cleaned = "" # rule header self.is_global = False self.is_private = False self.rule_name = "" self.rule_description = [] # meta self.meta = [] # strings self.strings = [] #condition self.condition = "" # match data as provided by yara-python self.match_data = {}
def __init__(self): """ init Yara Rule Loader Object """ self.statusController = StatusController()
class YaraRuleLoader(object): """ Yara Rule Loader class """ def __init__(self): """ init Yara Rule Loader Object """ self.statusController = StatusController() def loadRulesFromFile(self, filename): """ load content of file (1) """ content = "" # read file with open(filename, 'r') as f_input: content = f_input.read() #clean content content_cleaned = self._cleanContent(content) # split content in Yara Rules return self._splitYaraRules(content, content_cleaned, filename) def _cleanContent(self, content): """ clean content, replace comments by spaces, replace strings by underlines """ # current status while going through content ("", "string", "comment_multiline", "comment_singleline") self.statusController.reset() # result result = "" # go through file and copy everything but comments and strings, instead write blanks or underlines skip_i = 0 for i in xrange(len(content)): # skip character(s) if (skip_i > 0): skip_i -= 1 continue ## find strings - text # find beginnig of string if self.statusController.controlStatus( "", self.statusController.findKeyword(content, i, "\""), "string_text"): result += "\"" continue # skip next character when finding the escape character \ inside string if self.statusController.controlStatus( "string_text", self.statusController.findKeyword(content, i, "\\"), None): result += "__" skip_i = 1 continue # find end of string if self.statusController.controlStatus( "string_text", self.statusController.findKeyword(content, i, "\""), ""): result += "\"" continue ## find strings - regex # find beginnig of string if self.statusController.controlStatus( "", self.statusController.findKeyword(content, i, "/") and not self.statusController.findKeyword(content, i, "//") and not self.statusController.findKeyword(content, i, "/*"), "string_regex"): result += "/" continue # skip next character when finding the escape character \ inside string if self.statusController.controlStatus( "string_regex", self.statusController.findKeyword(content, i, "\\"), None): result += "__" skip_i = 1 continue # find end of string if self.statusController.controlStatus( "string_regex", self.statusController.findKeyword(content, i, "/"), ""): result += "/" continue ## find multi line comments # find beginnig of comment if self.statusController.controlStatus( "", self.statusController.findKeyword(content, i, "/*"), "comment_multiline"): result += " " skip_i = 1 continue # find end of string if self.statusController.controlStatus( "comment_multiline", self.statusController.findKeyword(content, i, "*/"), ""): result += " " skip_i = 1 continue ## find single line comments # find beginnig of comment if self.statusController.controlStatus( "", self.statusController.findKeyword(content, i, "//"), "comment_singleline"): result += " " skip_i = 1 continue # find end of comment by finding end of line \r if self.statusController.controlStatus( "comment_singleline", self.statusController.findKeyword(content, i, "\r"), ""): result += "\r" continue # find end of comment by finding end of line \n if self.statusController.controlStatus( "comment_singleline", self.statusController.findKeyword(content, i, "\n"), ""): result += "\n" continue ## copy content # copy content if this is neither a comment nor a string, else add spaces or underlines if (self.statusController.status == ""): result += content[i] elif (self.statusController.status == "string_text"): result += "_" elif (self.statusController.status == "string_regex"): result += "_" else: result += " " # return content without comments and strings return result def _splitYaraRules(self, content, content_cleaned, filename): """ get all Yara rules split in sections (header, meta, strings, condition) """ # result, list of Yara rules yara_rules = [] # sections of current Yara rule current_rule = YaraRule() # status ("", "header", "meta", "strings", "condition"), file starts in Yara rule header, so status is "header" self.statusController.reset("header") # list of characters, one of them must stand in front of every section keyword needed_chars = [" ", "\r", "\n", "\t", "\"", "/", "{", "}"] # go through file and split it in Yara rules and them in sections for i in xrange(len(content_cleaned)): ## header # find end of header section if self.statusController.controlStatus( "header", self.statusController.findKeyword(content_cleaned, i, "{"), ""): continue # copy header if (self.statusController.status == "header"): current_rule.raw_header += content[i] current_rule.raw_header_cleaned += content_cleaned[i] ## meta # find beginning of meta section self.statusController.controlStatus( "", self.statusController.findKeyword(content_cleaned, i, "meta", needed_chars), "meta") # find end of meta section self.statusController.controlStatus( "meta", self.statusController.findKeyword(content_cleaned, i, "strings", needed_chars), "") self.statusController.controlStatus( "meta", self.statusController.findKeyword(content_cleaned, i, "condition", needed_chars), "") # copy meta if (self.statusController.status == "meta"): current_rule.raw_meta += content[i] current_rule.raw_meta_cleaned += content_cleaned[i] ## strings # find beginning of strings section self.statusController.controlStatus( "", self.statusController.findKeyword(content_cleaned, i, "strings", needed_chars), "strings") # find end of strings section self.statusController.controlStatus( "strings", self.statusController.findKeyword(content_cleaned, i, "condition", needed_chars), "") # copy meta if (self.statusController.status == "strings"): current_rule.raw_strings += content[i] current_rule.raw_strings_cleaned += content_cleaned[i] ## condition # find beginning of condition section self.statusController.controlStatus( "", self.statusController.findKeyword(content_cleaned, i, "condition", needed_chars), "condition") # find end of condition section self.statusController.controlStatus( "condition", self.statusController.findKeyword(content_cleaned, i, "}"), "endOfRule") # copy meta if (self.statusController.status == "condition"): current_rule.raw_condition += content[i] current_rule.raw_condition_cleaned += content_cleaned[i] ## find end of rule # save rule and reinit parsing if self.statusController.controlStatus("endOfRule", True, "header"): # add fully parsed rule to list and create next rule yara_rules.append(current_rule) # analyze Yara rule current_rule.filename = filename current_rule.analyze() current_rule = YaraRule() # return list of Yara rules return yara_rules
class YaraRule(object): """ Yara Rule class """ def __init__(self): self.statusController = StatusController() self.filename = "" # set raw data of rule content, to be parsed by analyze* methods self.raw_header = "" self.raw_header_cleaned = "" self.raw_meta = "" self.raw_meta_cleaned = "" self.raw_strings = "" self.raw_strings_cleaned = "" self.raw_condition = "" self.raw_condition_cleaned = "" # rule header self.is_global = False self.is_private = False self.rule_name = "" self.rule_description = [] # meta self.meta = [] # strings self.strings = [] #condition self.condition = "" # match data as provided by yara-python self.match_data = {} def checkRule(self): unique_names = {} for string in self.strings: if string[1] in unique_names: print "[!] Rule %s has duplicate variable name: \"%s\"" % (self.rule_name, self.filename, string[1]) raise Exception("Duplicate variable name") else: unique_names[string[1]] = "loaded" return True def analyze(self): self._analyzeHeader() self._analyzeMeta() self._analyzeStrings() self._analyzeCondition() def _linebreakAndTabsToSpace(self, content): """ replace all linebreaks and tabs by spaces """ new_content = "" for i in xrange(len(content)): if (content[i] == "\r"): new_content += " " elif (content[i] == "\n"): new_content += " " elif (content[i] == "\t"): new_content += " " else: new_content += content[i] return new_content def _analyzeHeader(self): """ analyze Yara rule header, find keywords PRIVATE and GLOBAL, get rule NAME and DESCRIPTION """ self.statusController.reset() # delete tabs and linebreaks and then split rule header into single words raw_header_cleaned = self._linebreakAndTabsToSpace(self.raw_header_cleaned) raw_header_cleaned = raw_header_cleaned.replace(":", " : ") # analyze words for header_word in raw_header_cleaned.split(" "): if header_word == "private": self.is_private = True elif header_word == "global": self.is_global = True elif header_word == "rule": self.statusController.status = "find_rule_name" elif header_word == ":": self.statusController.status = "find_rule_description" elif self.statusController.status == "find_rule_name" and header_word != "": self.rule_name = header_word elif self.statusController.status == "find_rule_description" and header_word != "": self.rule_description.append(header_word) def _analyzeMeta(self): """ analyze meta section of Yara rule and save tuples (meta name, meta value) in list of meta entries """ # current meta entry meta_name = "" meta_content = "" # status ("find_name", "name", "find_field_value", "value") self.statusController.reset("find_name") # read meta string and replace line breaks and tabs raw_meta = self.raw_meta raw_meta_cleaned = self._linebreakAndTabsToSpace(self.raw_meta_cleaned) # check if meta section exists if (len(raw_meta_cleaned) == 0) or (raw_meta_cleaned.find(":") == -1): return # insert an additional whitespace at the end as end delimiter to handle compact rules raw_meta += " " raw_meta_cleaned += " " # split at first colon temp, meta_body_cleaned = raw_meta_cleaned.split(":", 1) meta_body = raw_meta[len(temp) + 1:] # go through file and split it in Yara rules and them in sections for i in xrange(len(meta_body_cleaned)): # find beginning of meta entry name if self.statusController.controlStatus("find_name", not self.statusController.findKeyword(meta_body_cleaned, i, " "), "name"): pass # find end of meta entry name elif self.statusController.controlStatus("name", self.statusController.findKeyword(meta_body_cleaned, i, "="), "find_field_value"): continue # find beginning of meta entry value if self.statusController.controlStatus("find_field_value", not self.statusController.findKeyword(meta_body_cleaned, i, " "), "value"): # skip first letter by continue if value is a string if (meta_body_cleaned[i] == "\""): continue # find end of meta entry value if self.statusController.controlStatus("value", i == len(meta_body_cleaned) - 1 or self.statusController.findKeyword(meta_body_cleaned, i, " ") or self.statusController.findKeyword(meta_body_cleaned, i, "\""), "find_name"): if not self.statusController.findKeyword(meta_body_cleaned, i, " ") and i == len(meta_body_cleaned) - 1: meta_content += meta_body[i] if self.statusController.findKeyword(meta_body_cleaned, i, " ") or i == len(meta_body_cleaned) - 1: if meta_content == "true": meta_content = True elif meta_content == "false": meta_content = False elif meta_content.isdigit(): meta_content = int(meta_content) meta_name = meta_name.strip() self.meta.append([meta_name, meta_content]) # reset variables meta_name = "" meta_content = "" continue # copy content in meta name or meta value if (self.statusController.status == "name"): meta_name += meta_body[i] if (self.statusController.status == "value"): meta_content += meta_body[i] def _identifyStringType(self, indicator): """ identify type of string (text, regex, byte array) by it's first character """ if indicator == "\"": return "text" if indicator == "/": return "regular_expression" if indicator == "{": return "byte_array" raise ValueError("Invalid string type indicator: %s" % indicator) def _checkStringValueTerminator(self, string_type, char): terminator_types = [("text", "\""), ("regular_expression", "/"), ("byte_array", "}")] return (string_type, char) in terminator_types def _analyzeStrings(self): """ analyze strings section of Yara rule and save tuples (string name, string value, string type) in list of string entries """ # current string variable var_name = "" var_content = "" var_keywords = [] # status ("find_name", "name", "find_field_value", "value") self.statusController.reset("find_name") # read strings string and replace line breaks and tabs raw_strings = self.raw_strings raw_strings_cleaned = self._linebreakAndTabsToSpace(self.raw_strings_cleaned) # check if meta section exists if ((len(raw_strings_cleaned) == 0) or (raw_strings_cleaned.find(":") == -1)): return # insert an additional whitespace at the end as end delimiter to handle compact rules raw_strings += " " raw_strings_cleaned += " " # split at first colon temp, raw_strings_cleaned = raw_strings_cleaned.split(":", 1) raw_strings = raw_strings[len(temp) + 1:] # go through file and split it in Yara rules and them in sections skip_i = 0 for i in xrange(len(raw_strings_cleaned)): # skip character(s) if (skip_i > 0): skip_i -= 1 continue # find beginning of string variable name if self.statusController.controlStatus("find_name", not self.statusController.findKeyword(raw_strings_cleaned, i, " "), "name"): pass # find end of string variable name elif self.statusController.controlStatus("name", self.statusController.findKeyword(raw_strings_cleaned, i, "="), "find_field_value"): continue # find beginning of string variable value if self.statusController.controlStatus("find_field_value", not self.statusController.findKeyword(raw_strings_cleaned, i, " "), "value"): string_variable_type = self._identifyStringType(raw_strings_cleaned[i]) continue # find end of string variable value if (self.statusController.status == "value"): # check for all 3 types of strings (text, regex, byte array) if the string is complete # and save string variable if string is complete if self._checkStringValueTerminator(string_variable_type, raw_strings_cleaned[i]): self.statusController.status = "stringModifier" continue # look for string modification keywords after value ("wide", "ascii", "nocase", "fullword") string_modifiers = ["wide", "ascii", "nocase", "fullword"] for modifier in string_modifiers: if self.statusController.controlStatus("stringModifier", self.statusController.findKeyword(raw_strings_cleaned, i, modifier), "stringModifier"): var_keywords.append(modifier) skip_i = len(modifier) break if (skip_i > 0): continue # check if there is any character after string, that is not part of a keyword and is no blank self.statusController.controlStatus("stringModifier", not self.statusController.findKeyword(raw_strings_cleaned, i, " "), "saveString") # save string if this the end of strings section is reached if i == len(raw_strings_cleaned) - 1: self.statusController.status = "saveString" # save string if (self.statusController.status == "saveString"): # delete white spaces var_name = var_name.strip() if (string_variable_type == "regular_expression") or (string_variable_type == "byte_array"): var_content = var_content.strip() # save tuple in strings list self.strings.append((string_variable_type, var_name, var_content, var_keywords)) # reset status self.statusController.status = "name" # reset variables var_name = "" var_content = "" var_keywords = [] # copy content in string name or string value if (self.statusController.status == "name"): var_name += raw_strings[i] if (self.statusController.status == "value"): var_content += raw_strings[i] def _analyzeCondition(self): """ analyze Yara rule condition """ # delete tabs and linebreaks temp_condition = self._linebreakAndTabsToSpace(self.raw_condition_cleaned) # check if meta section exists if (len(temp_condition) == 0) or (temp_condition.find(":") == -1): return # split at first colon temp, temp_condition = temp_condition.split(":", 1) # delete white spaces at beginning and end of string temp_condition = temp_condition.strip() # replace multiple spaces in string temp_condition_len = 0 while (temp_condition_len != len(temp_condition)): temp_condition_len = len(temp_condition) temp_condition = temp_condition.replace(" ", " ") # save condition in Yara rule self.condition = temp_condition def __str__(self): if not self.rule_name: return "Failed to load rule through YaraRuleLoader. Please be so kind and report this back for a bug fix! :)" start_delimiters = {"text": "\"", "regular_expression": "/ ", "byte_array": "{ "} end_delimiters = {"text": "\"", "regular_expression": " /", "byte_array": " }"} result = "" result += "global " if self.is_global else "" result += "private " if self.is_private else "" result += "rule " + self.rule_name if self.rule_description: result += " : " + " ".join(self.rule_description) result += "\n{\n" if self.meta: result += " meta:\n" for meta_line in self.meta: if isinstance(meta_line[1], str): result += " " * 8 + "%s = \"%s\"\n" % (meta_line[0], meta_line[1]) else: result += " " * 8 + "%s = %s\n" % (meta_line[0], meta_line[1]) result += "\n" if self.strings: result += " strings:\n" for string_line in self.strings: result += " " * 8 + "%s = %s%s%s %s\n" % (string_line[1], start_delimiters[string_line[0]], string_line[2], end_delimiters[string_line[0]], " ".join(string_line[3])) result += "\n" result += " condition:\n" result += " " * 8 + "%s\n" % self.condition result += "}" return result
class YaraRuleLoader(object): """ Yara Rule Loader class """ def __init__(self): """ init Yara Rule Loader Object """ self.statusController = StatusController() def loadRulesFromFile(self, filename): """ load content of file (1) """ content = "" # read file with open(filename, 'r') as f_input: content = f_input.read() #clean content content_cleaned = self._cleanContent(content) # split content in Yara Rules return self._splitYaraRules(content, content_cleaned, filename) def _cleanContent(self, content): """ clean content, replace comments by spaces, replace strings by underlines """ # current status while going through content ("", "string", "comment_multiline", "comment_singleline") self.statusController.reset() # result result = "" # go through file and copy everything but comments and strings, instead write blanks or underlines skip_i = 0 for i in xrange(len(content)): # skip character(s) if (skip_i > 0): skip_i -= 1 continue ## find strings - text # find beginnig of string if self.statusController.controlStatus("", self.statusController.findKeyword(content, i, "\""), "string_text"): result += "\"" continue # skip next character when finding the escape character \ inside string if self.statusController.controlStatus("string_text", self.statusController.findKeyword(content, i, "\\"), None): result += "__" skip_i = 1 continue # find end of string if self.statusController.controlStatus("string_text", self.statusController.findKeyword(content, i, "\""), ""): result += "\"" continue ## find strings - regex # find beginnig of string if self.statusController.controlStatus("", self.statusController.findKeyword(content, i, "/") and not self.statusController.findKeyword(content, i, "//") and not self.statusController.findKeyword(content, i, "/*"), "string_regex"): result += "/" continue # skip next character when finding the escape character \ inside string if self.statusController.controlStatus("string_regex", self.statusController.findKeyword(content, i, "\\"), None): result += "__" skip_i = 1 continue # find end of string if self.statusController.controlStatus("string_regex", self.statusController.findKeyword(content, i, "/"), ""): result += "/" continue ## find multi line comments # find beginnig of comment if self.statusController.controlStatus("", self.statusController.findKeyword(content, i, "/*"), "comment_multiline"): result += " " skip_i = 1 continue # find end of string if self.statusController.controlStatus("comment_multiline", self.statusController.findKeyword(content, i, "*/"), ""): result += " " skip_i = 1 continue ## find single line comments # find beginnig of comment if self.statusController.controlStatus("", self.statusController.findKeyword(content, i, "//"), "comment_singleline"): result += " " skip_i = 1 continue # find end of comment by finding end of line \r if self.statusController.controlStatus("comment_singleline", self.statusController.findKeyword(content, i, "\r"), ""): result += "\r" continue # find end of comment by finding end of line \n if self.statusController.controlStatus("comment_singleline", self.statusController.findKeyword(content, i, "\n"), ""): result += "\n" continue ## copy content # copy content if this is neither a comment nor a string, else add spaces or underlines if (self.statusController.status == ""): result += content[i] elif (self.statusController.status == "string_text"): result += "_" elif (self.statusController.status == "string_regex"): result += "_" else: result += " " # return content without comments and strings return result def _splitYaraRules(self, content, content_cleaned, filename): """ get all Yara rules split in sections (header, meta, strings, condition) """ # result, list of Yara rules yara_rules = [] # sections of current Yara rule current_rule = YaraRule() # status ("", "header", "meta", "strings", "condition"), file starts in Yara rule header, so status is "header" self.statusController.reset("header") # list of characters, one of them must stand in front of every section keyword needed_chars = [" ", "\r", "\n", "\t", "\"", "/", "{", "}"] # go through file and split it in Yara rules and them in sections for i in xrange(len(content_cleaned)): ## header # find end of header section if self.statusController.controlStatus("header", self.statusController.findKeyword(content_cleaned, i, "{"), ""): continue # copy header if (self.statusController.status == "header"): current_rule.raw_header += content[i] current_rule.raw_header_cleaned += content_cleaned[i] ## meta # find beginning of meta section self.statusController.controlStatus("", self.statusController.findKeyword(content_cleaned, i, "meta", needed_chars), "meta") # find end of meta section self.statusController.controlStatus("meta", self.statusController.findKeyword(content_cleaned, i, "strings", needed_chars), "") self.statusController.controlStatus("meta", self.statusController.findKeyword(content_cleaned, i, "condition", needed_chars), "") # copy meta if (self.statusController.status == "meta"): current_rule.raw_meta += content[i] current_rule.raw_meta_cleaned += content_cleaned[i] ## strings # find beginning of strings section self.statusController.controlStatus("", self.statusController.findKeyword(content_cleaned, i, "strings", needed_chars), "strings") # find end of strings section self.statusController.controlStatus("strings", self.statusController.findKeyword(content_cleaned, i, "condition", needed_chars), "") # copy meta if (self.statusController.status == "strings"): current_rule.raw_strings += content[i] current_rule.raw_strings_cleaned += content_cleaned[i] ## condition # find beginning of condition section self.statusController.controlStatus("", self.statusController.findKeyword(content_cleaned, i, "condition", needed_chars), "condition") # find end of condition section self.statusController.controlStatus("condition", self.statusController.findKeyword(content_cleaned, i, "}"), "endOfRule") # copy meta if (self.statusController.status == "condition"): current_rule.raw_condition += content[i] current_rule.raw_condition_cleaned += content_cleaned[i] ## find end of rule # save rule and reinit parsing if self.statusController.controlStatus("endOfRule", True, "header"): # add fully parsed rule to list and create next rule yara_rules.append(current_rule) # analyze Yara rule current_rule.filename = filename current_rule.analyze() current_rule = YaraRule() # return list of Yara rules return yara_rules