def main(): if len(sys.argv) <= 2: return_data( {"status": "error", "message": "Missing argument to the Python script"} ) if sys.argv[1] == "check": return_data({"status": "success"}) pattern_type = sys.argv[1] indicator_value = sys.argv[2] if pattern_type == "stix": result = False try: errors = run_validator(indicator_value) if len(errors) == 0: result = True except: result = False return_data({"status": "success", "data": result}) if pattern_type == "yara": parser = plyara.Plyara() result = False try: parser.parse_string(indicator_value) result = True except: result = False return_data({"status": "success", "data": result}) if pattern_type == "sigma": result = False try: parser = SigmaCollectionParser(indicator_value) result = True except: result = False return_data({"status": "success", "data": result}) if pattern_type == "snort": result = False try: parsed = Parser(indicator_value).all result = True except: result = False return_data({"status": "success", "data": result}) if pattern_type == "suricata": result = False try: parsed = parse_rules(indicator_value) result = True except: result = False return_data({"status": "success", "data": result}) return_data({"status": "unknown", "data": None})
def main(): if len(sys.argv) <= 2: return_data({'status': 'error', 'message': 'Missing argument to the Python script'}) if sys.argv[1] == 'check': return_data({'status': 'success'}) pattern_type = sys.argv[1] indicator_value = sys.argv[2] if pattern_type == 'stix': result = False try: errors = run_validator(indicator_value) if len(errors) == 0: result = True except: result = False return_data({'status': 'success', 'data': result}) if pattern_type == 'yara': parser = plyara.Plyara() result = False try: parser.parse_string(indicator_value) result = True except: result = False return_data({'status': 'success', 'data': result}) if pattern_type == 'sigma': result = False try: parser = SigmaCollectionParser(indicator_value) result = True except: result = False return_data({'status': 'success', 'data': result}) if pattern_type == 'snort': result = False try: parsed = Parser(indicator_value).all result = True except: result = False return_data({'status': 'success', 'data': result}) if pattern_type == 'suricata': result = False try: parsed = parse_rules(indicator_value) result = True except: result = False return_data({'status': 'success', 'data': result}) return_data({'status': 'unknown', 'data': None})
def test_exclude_rules(): parser = plyara.Plyara() shutil.copy("test/testrules/test.yar", "test.yar") exclude_rules("test/testrules.json") with open("test.yar", 'r') as test_yar_file: parsed = parser.parse_string(test_yar_file.read()) assert len(parsed) == 1 assert parsed[0]["rule_name"] == "IncludedRule" os.remove("test.yar")
def test_yara(): """ This test should pass with no exceptions if we are generating valid Yara rules. This is useful to ensure that future Yara versions work correctly with the tool. """ strings = ["foo", "bar"] data = generate_yara_rule("test_rule", "This is a test rule", strings) parser = plyara.Plyara() rule = parser.parse_string(data) assert rule
def execute_search(job_hash): logging.info('Parsing...') job = redis.hgetall('job:' + job_hash) yara_rule = job['raw_yara'] redis.hmset('job:' + job_hash, { 'status': 'parsing', 'timestamp': time.time(), }) try: rules = plyara.Plyara().parse_string(yara_rule) parser = YaraParser(rules[0]) parsed = parser.parse() except Exception as e: logging.exception(e) raise RuntimeError('Failed to parse Yara') redis.hmset('job:' + job_hash, { 'status': 'querying', 'timestamp': time.time(), }) logging.info('Querying backend...') result = db.query(parsed) if 'error' in result: raise RuntimeError(result['error']) job = redis.hgetall(job_hash) files = [f for f in result['files'] if f.strip()] logging.info('Database responded with {} files'.format(len(files))) if 'max_files' in job and int(job['max_files']) > 0: files = files[:int(job['max_files'])] redis.hmset('job:' + job_hash, { 'status': 'processing', 'files_processed': 0, 'total_files': len(files) }) if files: pipe = redis.pipeline() for file in files: pipe.rpush('queue-yara', '{}:{}'.format(job_hash, file)) pipe.execute() logging.info('Done uploading yara jobs.') else: redis.hset('job:{}'.format(job_hash), 'status', 'done')
def query(): req = request.get_json() raw_yara = req['rawYara'] try: rules = plyara.Plyara().parse_string(raw_yara) except Exception as e: return jsonify({'error': 'PLYara failed (not my fault): ' + str(e)}), 400 if len(rules) > 1: return jsonify({'error': 'More than one rule specified!'}), 400 rule_name = rules[0].get('rule_name') try: parser = YaraParser(rules[0]) pre_parsed = parser.pre_parse() parsed = parser.parse() except Exception as e: logging.exception('YaraParser failed') return jsonify( {'error': 'YaraParser failed (msm\'s fault): {}'.format(str(e))}), 400 if req['method'] == 'parse': return jsonify({'rule_name': rule_name, "parsed": parsed}) job_hash = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(12)) job_obj = { 'status': 'new', 'max_files': -1, 'rule_name': rule_name, 'parsed': parsed, 'pre_parsed': pre_parsed, 'raw_yara': raw_yara, 'submitted': int(time.time()) } if req['method'] == 'query_100': job_obj.update({'max_files': 100}) redis.hmset('job:' + job_hash, job_obj) redis.rpush('queue-search', job_hash) return jsonify({'query_hash': job_hash})
def parse_string(self, input_string): try: if PERMISSIVE_MODE: return self._permissive_parse_string(input_string) else: return plyara.Plyara().parse_string(input_string) except ParseError as e: raise # some errors are not properly caught by plyara # convert everything to ParseError to avoid uncatchable crashes except Exception as e: raise ParseError( 'Uncaught plyara exception ({}): {}'.format( type(e).__name__, str(e)), None, None)
def _create_yara_indicator( self, yara: dict, valid_from: Optional[int] = None) -> Optional[Indicator]: """Create an indicator containing the YARA rule from VirusTotal.""" valid_from_date = (datetime.datetime.min if valid_from is None else datetime.datetime.utcfromtimestamp(valid_from)) ruleset_id = yara.get("ruleset_id", "No ruleset id provided") self.helper.log_info(f"[VirusTotal] Retrieving ruleset {ruleset_id}") # Lookup in the cache for the ruleset id, otherwise, request VirusTotal API. if ruleset_id in self.yara_cache: self.helper.log_debug( f"Retrieving YARA ruleset {ruleset_id} from cache.") ruleset = self.yara_cache[ruleset_id] else: self.helper.log_debug( f"Retrieving YARA ruleset {ruleset_id} from API.") ruleset = self.client.get_yara_ruleset(ruleset_id) self.yara_cache[ruleset_id] = ruleset # Parse the rules to find the correct one. parser = plyara.Plyara() rules = parser.parse_string(ruleset["data"]["attributes"]["rules"]) rule_name = yara.get("rule_name", "No ruleset name provided") rule = [r for r in rules if r["rule_name"] == rule_name] if len(rule) == 0: self.helper.log_warning(f"No YARA rule for rule name {rule_name}") return None return self.helper.api.indicator.create( name=yara.get("rule_name", "No rulename provided"), description=json.dumps({ "description": yara.get("description", "No description provided"), "author": yara.get("author", "No author provided"), "source": yara.get("source", "No source provided"), "ruleset_id": ruleset_id, "ruleset_name": yara.get("ruleset_name", "No ruleset name provided"), }), pattern=plyara.utils.rebuild_yara_rule(rule[0]), pattern_type="yara", valid_from=self.helper.api.stix2.format_date(valid_from_date), x_opencti_main_observable_type="StixFile", )
def query_by_hash(qhash): yara = redis.get('query:' + qhash) try: rules = plyara.Plyara().parse_string(yara) except Exception as e: return error_page(yara, 'PLYara failed (not my fault): ' + str(e)) if len(rules) > 1: return error_page(yara, 'More than one rule specified') rule_name = rules[0].get('rule_name') try: parser = YaraParser(rules[0]) pre_parsed = parser.pre_parse() parsed = parser.parse() except Exception as e: return error_page(yara, 'YaraParser failed (msm\'s fault): ' + str(e)) matches = redis.smembers('matches:' + qhash) false_positives = redis.smembers('false_positives:' + qhash) job = redis.hgetall('job:' + qhash) debug = 'debug' in request.args error = job.get('error') body = render_template('index.html', yara=yara, pre_parsed=pre_parsed, parsed=parsed, job=job, matches=matches, errors=error, false_positives=false_positives, debug=debug, saved_rules=get_saved_rules(), qhash=qhash, rule_name=rule_name, repo_url=config.REPO_URL) return body
import yara import plyara rule = yara.compile(source='rule foo: bar {strings: $a = "lmn" condition: $a}') matches = rule.match(data='abcdefgjiklmnoprstuvwxyz') parser = plyara.Plyara() mylist = parser.parse_string('rule foo: bar {strings: $a = "lmn" condition: $a}') print("all is good: {}".format(matches))
Log.error("[E] Error: input file '%s' doesn't exist" % f) else: input_files.append(f) # Directory list if args.d: for d in args.d[0]: if not os.path.exists(d): Log.error("[E] Error: input directory '%s' doesn't exist" % d) else: for f in (os.listdir(d)): if ".yar" in f: input_files.append(os.path.join(d, f)) # Calibration rule p = plyara.Plyara() calibration_rule = p.parse_string(CALIBRATION_RULE) # Loop over input files rules_list = [] for f in input_files: # Parse YARA rules to Dictionary if not os.path.exists(f): Log.error("Cannot find input file '%s'" % f) sys.exit(1) try: Log.info("Processing %s ..." % f) p = plyara.Plyara() file_data = "" # Read file with open(f, 'r') as fh:
def example(): """Execute the example code.""" parser = argparse.ArgumentParser() parser.add_argument('file', metavar='FILE', help='File containing YARA rules to parse.') args = parser.parse_args() print('Parsing file...') with open(args.file, 'r') as fh: data = fh.read() parser = plyara.Plyara() rules_dict = parser.parse_string(data) print('Analyzing dictionary...') imps = {} tags = {} rule_count = 0 for rule in rules_dict: rule_count += 1 # Imports if 'imports' in rule: for imp in rule['imports']: imp = imp.replace('"', '') if imp in imps: imps[imp] += 1 else: imps[imp] = 1 # Tags if 'tags' in rule: for tag in rule['tags']: if tag in tags: tags[tag] += 1 else: tags[tag] = 1 print('\n======================\n') print('Number of rules in file: {}'.format(rule_count)) ordered_imps = sorted(imps.items(), key=operator.itemgetter(1), reverse=True) ordered_tags = sorted(tags.items(), key=operator.itemgetter(1), reverse=True) print('\n======================\n') print('Top imports:') for i in range(5): if i < len(ordered_imps): print(ordered_imps[i]) print('\n======================\n') print('Top tags:') for i in range(5): if i < len(ordered_tags): print(ordered_tags[i])
def execute_job(job_id, hash, yara_rule): logging.info('Parsing...') redis.hmset(job_id, { 'status': 'processing', 'timestamp': time.time(), }) try: rules = plyara.Plyara().parse_string(yara_rule) parser = YaraParser(rules[0]) parsed = parser.parse() except Exception as e: logging.exception(e) raise RuntimeError('Failed to parse Yara') redis.hmset(job_id, { 'status': 'querying', 'timestamp': time.time(), }) logging.info('Querying backend...') result = db.query(parsed) if 'error' in result: raise RuntimeError(result['error']) job = redis.hgetall(job_id) files = [f for f in result['files'] if f.strip()] logging.info('Database responded with {} files'.format(len(files))) if 'max_files' in job and int(job['max_files']) > 0: files = files[:int(job['max_files'])] redis.hmset(job_id, { 'total_files': len(files), 'files_processed': 0, }) logging.info('Compiling Yara') try: rule = yara.compile(source=yara_rule) except SyntaxError as e: logging.exception('Yara parse error') raise e for file_ndx, file_path in enumerate(files): matches = rule.match(data=open(file_path, 'rb').read()) if matches: logging.info('Processed (match): {}'.format(file_path)) redis.sadd('matches:' + hash, file_path) else: logging.info('Processed (nope ): {}'.format(file_path)) redis.sadd('false_positives:' + hash, file_path) redis.hmset(job_id, { 'files_processed': file_ndx + 1, }) status = redis.hget(job_id, 'status') if status == 'cancelled': logging.info('Job cancelled') return redis.hmset(job_id, { 'status': 'done', }) logging.info('Done')
import operator import os import sys import codecs sys.path.insert(0, os.getcwd()) import plyara if __name__ == '__main__': file_to_analyze = sys.argv[1] print("...parsing file...") rulesDict = plyara.Plyara().parse_string(codecs.open(file_to_analyze, encoding='utf-8').read()) print("...analyzing dictionary...") authors = {} imps = {} meta_keys = {} max_strings = [] max_string_len = 0 tags = {} rule_count = 0 for rule in rulesDict: rule_count += 1 #Imports if 'imports' in rule: for imp in rule['imports']:
def load_rules(self): # load and compile the rules # we load all the rules into memory as a string to be compiled sources = {} rule_count = 0 # get the list of all the files to compile all_files = {} # key = "namespace", value = [] of file_paths # XXX there's a bug in yara where using an empty string as the namespace causes a segfault all_files['DEFAULT'] = self.tracked_files.keys() for dir_path in self.tracked_dirs.keys(): all_files[dir_path] = self.tracked_dirs[dir_path] for repo_path in self.tracked_repos.keys(): all_files[repo_path] = [] for file_path in os.listdir(repo_path): file_path = os.path.join(repo_path, file_path) if file_path.lower().endswith( '.yar') or file_path.lower().endswith('.yara'): all_files[repo_path].append(file_path) if self.test_mode: execution_times = [ ] # of (total_seconds, buffer_type, file_name, rule_name) execution_errors = [ ] # of (error_message, buffer_type, file_name, rule_name) random_buffer = os.urandom(1024 * 1024) # random data to scan for namespace in all_files.keys(): for file_path in all_files[namespace]: with open(file_path, 'r') as fp: log.debug("loading namespace {} rule file {}".format( namespace, file_path)) data = fp.read() try: # compile the file as a whole first, make sure that works rule_context = yara.compile(source=data) rule_count += 1 except Exception as e: log.error("unable to compile {}: {}".format( file_path, str(e))) continue if self.test_mode: parser = plyara.Plyara() parsed_rules = { } # key = rule_name, value = parsed_yara_rule for parsed_rule in parser.parse_string(data): parsed_rules[ parsed_rule['rule_name']] = parsed_rule for rule_name in parsed_rules.keys(): # some rules depend on other rules, so we deal with that here dependencies = [ ] # list of rule_names that this rule needs rule_context = None while True: # compile all the rules we've collected so far as one dep_source = '\n'.join([ parser.rebuild_yara_rule(parsed_rules[r]) for r in dependencies ]) try: rule_context = yara.compile( source='{}\n{}'.format( dep_source, parser.rebuild_yara_rule( parsed_rules[rule_name]))) break except Exception as e: # some rules depend on other rules m = re.search( r'undefined identifier "([^"]+)"', str(e)) if m: dependency = m.group(1) if dependency in parsed_rules: # add this rule to the compilation and try again dependencies.insert(0, dependency) continue log.warning( "rule {} in file {} does not compile by itself: {}" .format(rule_name, file_path, e)) rule_context = None break if not rule_context: continue if dependencies: log.info("testing {}:{},{}".format( file_path, rule_name, ','.join(dependencies))) else: log.info("testing {}:{}".format( file_path, rule_name)) start_time = time.time() try: rule_context.match(data=random_buffer, timeout=5) end_time = time.time() total_seconds = end_time - start_time execution_times.append( (total_seconds, 'random', file_path, rule_name)) except Exception as e: execution_errors.append( (str(e), 'random', file_name)) for x in range(255): byte_buffer = bytes([x]) * (1024 * 1024) start_time = time.time() try: rule_context.match(data=byte_buffer, timeout=5) end_time = time.time() total_seconds = end_time - start_time execution_times.append( (total_seconds, 'byte({})'.format(x), file_path, rule_name)) except Exception as e: execution_errors.append( (str(e), 'byte({})'.format(x), file_path, rule_name)) # if we fail once we break out break # then we just store the source to be loaded all at once in the compilation that gets used if namespace not in sources: sources[namespace] = [] sources[namespace].append(data) if self.test_mode: execution_times = sorted(execution_times, key=lambda x: x[0]) for execution_time, buffer_type, file_path, yara_rule in execution_times: print("{}:{} <{}> {}".format(file_path, yara_rule, buffer_type, execution_time)) for error_message, buffer_type, file_path, yara_rule in execution_errors: print("{}:{} <{}> {}".format(file_path, yara_rule, buffer_type, error_message)) return for namespace in sources.keys(): sources[namespace] = '\r\n'.join(sources[namespace]) try: log.info("loading {} rules".format(rule_count)) self.rules = yara.compile(sources=sources) except Exception as e: log.error("unable to compile all yara rules combined: {0}".format( str(e))) self.rules = None