def main(debug, verbose, config_path, parser_dir, parser_config, parser_source): # Setup configuration mwcp.config.load(config_path) if parser_dir: mwcp.config["PARSER_DIR"] = parser_dir parser_dir = mwcp.config.get("PARSER_DIR") if parser_config: mwcp.config["PARSER_CONFIG_PATH"] = parser_config parser_config = mwcp.config.get("PARSER_CONFIG_PATH") if parser_source: mwcp.config["PARSER_SOURCE"] = parser_source parser_source = mwcp.config.get("PARSER_SOURCE") # Setup logging mwcp.setup_logging() if debug: logging.root.setLevel(logging.DEBUG) elif verbose: logging.root.setLevel(logging.INFO) # else let log_config.yaml set log level. # Register parsers mwcp.register_entry_points() if parser_dir: mwcp.register_parser_directory(parser_dir, config_file_path=parser_config) if parser_source: mwcp.set_default_source(parser_source)
def test_register_parser_directory(monkeypatch, Sample_parser): # Monkey patch parsers registration so previous test runs don't muck with this. monkeypatch.setattr('mwcp.registry._sources', {}) parser_path, config_path = Sample_parser parser_dir = str(parser_path.dirname) # Test registration assert not list(mwcp.iter_parsers('Sample')) mwcp.register_parser_directory(parser_dir, config_file_path=str(config_path)) parsers = list(mwcp.iter_parsers('Sample')) assert len(parsers) == 1 # Test it was registered properly source, parser = parsers[0] assert parser.name == 'Sample' assert source.name == parser_dir assert source.path == parser_dir # Test we can also pull by source name. parsers = list(mwcp.iter_parsers(source=parser_dir)) assert len(parsers) == 1 parsers = list(mwcp.iter_parsers(parser_dir + ':')) assert len(parsers) == 1
def test_register_parser_directory2(make_sample_parser): registry.clear() parser_path, config_path = make_sample_parser() parser_dir = str(parser_path.dirname) # Test registration assert not list(mwcp.iter_parsers('Sample')) mwcp.register_parser_directory(parser_dir, config_file_path=str(config_path), source_name='ACME') parsers = list(mwcp.iter_parsers('Sample')) assert len(parsers) == 1 # Test it was registered properly source, parser = parsers[0] assert parser.name == 'Sample' assert source.name == 'ACME' assert source.path == parser_dir # Test we can also pull by source name. parsers = list(mwcp.iter_parsers(source='ACME')) assert len(parsers) == 1 parsers = list(mwcp.iter_parsers('ACME:')) assert len(parsers) == 1
def test_iter_parsers(make_sample_parser): registry.clear() parser_path, config_path = make_sample_parser() source = os.path.abspath(str(parser_path.dirname)) mwcp.register_parser_directory(source, config_file_path=str(config_path)) parsers = list(mwcp.iter_parsers('Sample')) assert len(parsers) == 1 _source, parser = parsers[0] assert parser.__class__ == mwcp.Dispatcher assert parser.name == 'Sample' assert _source.path == source assert len(parser.parsers) == 2 assert parser.DESCRIPTION == 'A test parser' parsers = sorted(mwcp.iter_parsers(config_only=False), key=lambda x: x[1].DESCRIPTION) assert len(parsers) == 3 _source, parser = parsers[0] assert parser.__class__ == mwcp.Dispatcher assert parser.name == 'Sample' assert len(parser.parsers) == 2 downloader_parser, implant_parser = parser.parsers assert parser.DESCRIPTION == 'A test parser' assert downloader_parser.DESCRIPTION == 'TestParser Downloader' assert implant_parser.DESCRIPTION == 'TestParser Implant' assert parsers[1][1] == downloader_parser assert parsers[2][1] == implant_parser
def test_parsers_descriptions(monkeypatch, test_parser): monkeypatch.setattr('mwcp.parsers._PARSERS', collections.defaultdict(dict)) mwcp.register_parser_directory(os.path.dirname(test_parser)) descriptions = list(mwcp.get_parser_descriptions('test_parser')) assert len(descriptions) == 1 assert descriptions[0] == ('test_parser', os.path.dirname(test_parser), 'Mr. Tester', 'A test parser')
def test_recursive_error(make_sample_parser): """Tests error handling for a recursive parser.""" registry.clear() parser_path, config_file = make_sample_parser(config_text=u''' Sample: description: A test parser author: Mr. Tester parsers: - .Downloader - .Implant - Sample2 Sample2: description: A test parser 2 author: Mr. Tester parsers: - Sample.Downloader # This one should be fine. - Sample # It should complain about this. ''') parser_dir = str(parser_path.dirname) mwcp.register_parser_directory(parser_dir, config_file_path=str(config_file), source_name='ACME') with pytest.raises(RuntimeError) as exec_info: list(mwcp.iter_parsers('Sample')) assert 'Detected recursive loop: Sample2 -> Sample' in str(exec_info.value)
def test_missing_parser_class(Sample_parser, tmpdir): """Tests error handling for a missing parser class.""" registry.clear() parser_path, config_file = Sample_parser parser_dir = str(parser_path.dirname) config_file.write_text( u''' Sample: description: A test parser author: Mr. Tester parsers: - .Downloader - .Implant - .NoExist ''', 'utf8') mwcp.register_parser_directory(parser_dir, config_file_path=str(config_file), source_name='ACME') with pytest.raises(RuntimeError) as exec_info: list(mwcp.iter_parsers('Sample')) assert 'Unable to find Sample.NoExist' in str(exec_info.value)
def test_parsers_descriptions(make_sample_parser): registry.clear() parser_path, config_path = make_sample_parser() source = os.path.abspath(str(parser_path.dirname)) mwcp.register_parser_directory(source, config_file_path=str(config_path)) # Test bogus descriptions = list(mwcp.get_parser_descriptions('bogus')) assert descriptions == [] # Test config only descriptions = list(mwcp.get_parser_descriptions()) assert descriptions == [('Sample', source, 'Mr. Tester', 'A test parser')] descriptions = list(mwcp.get_parser_descriptions('Sample')) assert descriptions == [('Sample', source, 'Mr. Tester', 'A test parser')] descriptions = list(mwcp.get_parser_descriptions(source=source)) assert descriptions == [('Sample', source, 'Mr. Tester', 'A test parser')] # Test all non-config only descriptions = list( mwcp.get_parser_descriptions('Sample', config_only=False)) assert descriptions == [('Sample', source, 'Mr. Tester', 'A test parser')] descriptions = list(mwcp.get_parser_descriptions(config_only=False)) assert descriptions == [ ('Sample', source, 'Mr. Tester', 'A test parser'), ('Sample.Downloader', source, '', 'TestParser Downloader'), ('Sample.Implant', source, '', 'TestParser Implant'), ] descriptions = list( mwcp.get_parser_descriptions('Sample.Downloader', config_only=False)) assert descriptions == [('Sample.Downloader', source, '', 'TestParser Downloader')] descriptions = list( mwcp.get_parser_descriptions(source=source, config_only=False)) assert descriptions == [ ('Sample', source, 'Mr. Tester', 'A test parser'), ('Sample.Downloader', source, '', 'TestParser Downloader'), ('Sample.Implant', source, '', 'TestParser Implant'), ] # Test using ":" syntax descriptions = list( mwcp.get_parser_descriptions(':Sample', config_only=False)) assert descriptions == [('Sample', source, 'Mr. Tester', 'A test parser')] descriptions = list( mwcp.get_parser_descriptions(source + ':', config_only=False)) assert descriptions == [ ('Sample', source, 'Mr. Tester', 'A test parser'), ('Sample.Downloader', source, '', 'TestParser Downloader'), ('Sample.Implant', source, '', 'TestParser Implant'), ]
def main(debug, verbose, parser_dir, parser_config, parser_source): # Setup logging mwcp.setup_logging() if debug: logging.root.setLevel(logging.DEBUG) elif verbose: logging.root.setLevel(logging.INFO) # else let log_config.yaml set log level. # Register parsers mwcp.register_entry_points() if parser_dir: mwcp.register_parser_directory(parser_dir, config_file_path=parser_config) if parser_source: mwcp.set_default_source(parser_source)
def test_non_importable_module(make_sample_parser): """Tests error handling for non importable module.""" registry.clear() parser_path, config_file = make_sample_parser() parser_dir = str(parser_path.dirname) # Add garbage so that the module will have an import error parser_path.write('\nimport dummy\n', mode='w+') mwcp.register_parser_directory(parser_dir, config_file_path=str(config_file), source_name='ACME') with pytest.raises(ImportError) as exec_info: list(mwcp.iter_parsers('Sample')) assert "No module named 'dummy'" in str(exec_info.value)
def load_mwcp_parsers() -> Tuple[Dict[str, str], ModuleType]: if not process_cfg.mwcp.enabled: return {}, False # Import All config parsers try: import mwcp logging.getLogger("mwcp").setLevel(logging.CRITICAL) mwcp.register_parser_directory( os.path.join(CUCKOO_ROOT, process_cfg.mwcp.modules_path)) _malware_parsers = { block.name.rsplit(".", 1)[-1]: block.name for block in mwcp.get_parser_descriptions(config_only=False) } assert "MWCP_TEST" in _malware_parsers return _malware_parsers, mwcp except ImportError as e: log.info("Missed MWCP -> pip3 install mwcp\nDetails: %s", e) return {}, False
def main(): import argparse argparser = argparse.ArgumentParser() argparser.add_argument('--parserdir', help='Parser directory to use.') argparser.add_argument('--parserconfig', help='Parser configuration file to use') argparser.add_argument( '--parsersource', help='Default parser source to use. Otherwise parsers from all sources are available.') options = argparser.parse_args() if options.parserdir: mwcp.register_parser_directory(options.parserdir, config_file_path=options.parserconfig) print('Set parser directory to: {}'.format(options.parserdir)) else: mwcp.register_entry_points() if options.parsersource: mwcp.set_default_source(options.parsersource) run(app, server='auto', host='localhost', port=8080)
def test_register_parser_directory(monkeypatch, test_parser): # Monkey patch parsers registration so previous test runs don't muck with this. monkeypatch.setattr('mwcp.parsers._PARSERS', collections.defaultdict(dict)) # Test registration assert not list(mwcp.iter_parsers('test_parser')) mwcp.register_parser_directory(os.path.dirname(test_parser)) parsers = list(mwcp.iter_parsers('test_parser')) assert len(parsers) == 1 # Test it was registered properly name, source_name, klass = parsers[0] assert name == 'test_parser' assert source_name == os.path.dirname(test_parser) # Test we can also pull by source name. parsers = list(mwcp.iter_parsers(source=os.path.dirname(test_parser))) assert len(parsers) == 1 parsers = list(mwcp.iter_parsers(os.path.dirname(test_parser) + ':')) assert len(parsers) == 1
def main(): import argparse argparser = argparse.ArgumentParser() argparser.add_argument('--parserdir', help='Parser directory to use.') argparser.add_argument('--parserconfig', help='Parser configuration file to use') argparser.add_argument( '--parsersource', help= 'Default parser source to use. Otherwise parsers from all sources are available.' ) options = argparser.parse_args() if options.parserdir: mwcp.register_parser_directory(options.parserdir, config_file_path=options.parserconfig) print('Set parser directory to: {}'.format(options.parserdir)) else: mwcp.register_entry_points() if options.parsersource: mwcp.set_default_source(options.parsersource) run(app, server='auto', host='localhost', port=8080)
def __init__( self, parserdir=None, outputdir=None, tempdir=None, outputfile_prefix=None, interpreter_path=None, disabledebug=False, disableoutputfiles=False, disabletempcleanup=False, disableautosubfieldparsing=False, disablevaluededup=False, disablemodulesearch=False, base64outputfiles=False, ): """ Initializes the Reporter object :param str parserdir: sets parser directory (defaults to parsers found in mwcp/parsers) :param str tempdir: sets path to temporary directory :param str outputdir: sets directory for output_file(). Should not be written to (or read from) by parsers directly (use tempdir) :param str outputfile_prefix: sets prefix for output files written to outputdir. Special value "md5" causes prefix by md5 of the input file. :param str interpreter_path: overrides value returned by interpreter_path() :param bool disabledebug: disable inclusion of debug messages in output :param bool disableoutputfiles: disable writing if files to filesystem :param bool disabletempcleanup: disable cleanup (deletion) of temp files :param bool disableautosubfieldparsing: disable parsing of metadata item of subfields :param bool disablevaluededup: disable deduplication of metadata items :param bool disablemodulesearch: disable search of modules for parsers, only look in parsers directory """ # defaults self.tempdir = tempdir or tempfile.gettempdir() self.outputfiles = {} self._handle = None self.fields = { "debug": { "description": "debug", "type": "listofstrings" } } self.metadata = {} self.errors = [] self.input_file = None # Continue to allow use of deprecated resourcedir. # TODO: Remove this in a new release version. self._resourcedir = None self.resourcedir = os.path.dirname(resources.__file__) self.__managed_tempdir = None self.__outputdir = outputdir or '' self.__outputfile_prefix = outputfile_prefix or '' # Register parsers from given directory. # Only register if a custom parserdir was provided or MWCP's entry_points did not get registered because # the project was not installed with setuptools. # NOTE: This is all to keep backwards compatibility. mwcp.register_parser_directory() should be # called outside of this class in the future. self.parserdir = parserdir or self.DEFAULT_PARSERDIR if self.parserdir != self.DEFAULT_PARSERDIR or not any( mwcp.iter_parsers(source='mwcp')): mwcp.register_parser_directory(self.parserdir) self._interpreter_path = interpreter_path self._disable_debug = disabledebug self._disable_output_files = disableoutputfiles self._disable_temp_cleanup = disabletempcleanup self._disable_auto_subfield_parsing = disableautosubfieldparsing self._disable_value_dedup = disablevaluededup self._disable_module_search = disablemodulesearch self._base64_output_files = base64outputfiles # TODO: Move fields.json to shared data or config folder. fieldspath = os.path.join(os.path.dirname(mwcp.resources.__file__), "fields.json") with open(fieldspath, 'rb') as f: self.fields = json.load(f)
import subprocess from io import BytesIO from collections import Mapping, Iterable from lib.cuckoo.common.config import Config from lib.cuckoo.common.constants import CUCKOO_ROOT from lib.cuckoo.common.objects import CAPE_YARA_RULEPATH, File log = logging.getLogger(__name__) malware_parsers = dict() cape_malware_parsers = dict() #Import All config parsers try: import mwcp mwcp.register_parser_directory( os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "mwcp")) malware_parsers = { block.name.split(".")[-1]: block.name for block in mwcp.get_parser_descriptions(config_only=False) } HAS_MWCP = True #disable logging #[mwcp.parser] WARNING: Missing identify() function for: a35a622d01f83b53d0407a3960768b29.Emotet.Emotet except ImportError as e: HAS_MWCP = False log.info( "Missed MWCP -> pip3 install git+https://github.com/Defense-Cyber-Crime-Center/DC3-MWCP\nDetails: {}" .format(e)) try:
def main(args=None): warnings.warn("WARNING: mwcp-tool is deprecated. Please use \"mwcp parse\" instead.") argparser = get_arg_parser() args, input_files = argparser.parse_known_args(args) # Setup logging mwcp.setup_logging() if args.hidedebug: logging.root.setLevel(logging.WARNING) elif args.debug: logging.root.setLevel(logging.DEBUG) # This is a preliminary check before creating the reporter to establish how output # file prefixes should be set. if args.disableoutputfileprefix: args.outputfile_prefix = '' elif args.filelistindirection or len(input_files) > 1 or any([os.path.isdir(x) for x in input_files]): args.outputfile_prefix = 'md5' if args.fields: _print_fields(json_output=args.jsonoutput) sys.exit(0) # Register parsers mwcp.register_entry_points() if args.parserdir: mwcp.register_parser_directory(args.parserdir, config_file_path=args.parserconfig) if args.parsersource: mwcp.set_default_source(args.parsersource) if args.list: _print_parsers(json_output=args.jsonoutput, config_only=args.list < 2) sys.exit(0) if not input_files or not args.parser: argparser.print_help() sys.exit(0) file_paths = _get_file_paths(input_files, is_filelist=args.filelistindirection) # Run MWCP try: reporter = mwcp.Reporter( outputdir=args.outputdir, outputfile_prefix=args.outputfile_prefix, tempdir=args.tempdir, disable_output_files=args.disableoutputfiles, disable_temp_cleanup=args.disabletempcleanup, base64_output_files=args.base64outputfiles) results = [] for file_path in file_paths: result = _parse_file( reporter, file_path, args.parser, include_filename=args.includefilename) results.append(result) if not args.jsonoutput: reporter.print_report() if args.csvwrite: csv_path = args.csvwrite _write_csv(file_paths, results, csv_path, args.base64outputfiles) if not args.jsonoutput: print('Wrote csv file: {}'.format(csv_path)) if args.jsonoutput: print(json.dumps(results, indent=4)) except Exception as e: error_message = "Error running DC3-MWCP: {}".format(e) traceback.print_exc() if args.jsonoutput: print(json.dumps({'errors': [error_message]})) else: print(error_message) sys.exit(1)
def main(args=None): warnings.warn( "WARNING: mwcp-tool is deprecated. Please use \"mwcp parse\" instead.") argparser = get_arg_parser() args, input_files = argparser.parse_known_args(args) # Setup logging mwcp.setup_logging() if args.hidedebug: logging.root.setLevel(logging.WARNING) elif args.debug: logging.root.setLevel(logging.DEBUG) # This is a preliminary check before creating the reporter to establish how output # file prefixes should be set. if args.disableoutputfileprefix: args.outputfile_prefix = '' elif args.filelistindirection or len(input_files) > 1 or any( [os.path.isdir(x) for x in input_files]): args.outputfile_prefix = 'md5' if args.fields: _print_fields(json_output=args.jsonoutput) sys.exit(0) # Register parsers mwcp.register_entry_points() if args.parserdir: mwcp.register_parser_directory(args.parserdir, config_file_path=args.parserconfig) if args.parsersource: mwcp.set_default_source(args.parsersource) if args.list: _print_parsers(json_output=args.jsonoutput, config_only=args.list < 2) sys.exit(0) if not input_files or not args.parser: argparser.print_help() sys.exit(0) file_paths = _get_file_paths(input_files, is_filelist=args.filelistindirection) # Run MWCP try: reporter = mwcp.Reporter(outputdir=args.outputdir, outputfile_prefix=args.outputfile_prefix, tempdir=args.tempdir, disable_output_files=args.disableoutputfiles, disable_temp_cleanup=args.disabletempcleanup, base64_output_files=args.base64outputfiles) results = [] for file_path in file_paths: result = _parse_file(reporter, file_path, args.parser, include_filename=args.includefilename) results.append(result) if not args.jsonoutput: reporter.print_report() if args.csvwrite: csv_path = args.csvwrite _write_csv(file_paths, results, csv_path, args.base64outputfiles) if not args.jsonoutput: print('Wrote csv file: {}'.format(csv_path)) if args.jsonoutput: print(json.dumps(results, indent=4)) except Exception as e: error_message = "Error running DC3-MWCP: {}".format(e) traceback.print_exc() if args.jsonoutput: print(json.dumps({'errors': [error_message]})) else: print(error_message) sys.exit(1)
indexed = sorted(indexed) for entry in indexed: if (category, entry) == indexed[-1]: log.debug("\t `-- %s %s", category, entry) else: log.debug("\t |-- %s %s", category, entry) HAS_MWCP = False if process_cfg.mwcp.enabled: # Import All config parsers try: import mwcp logging.getLogger("mwcp").setLevel(logging.CRITICAL) mwcp.register_parser_directory(os.path.join(CUCKOO_ROOT, process_cfg.mwcp.modules_path)) malware_parsers = {block.name.split(".")[-1]: block.name for block in mwcp.get_parser_descriptions(config_only=False)} HAS_MWCP = True except ImportError as e: logging.info( "Missed MWCP -> pip3 install git+https://github.com/Defense-Cyber-Crime-Center/DC3-MWCP\nDetails: {}".format(e) ) HAS_MALWARECONFIGS = False if process_cfg.ratdecoders.enabled: try: from malwareconfig import fileparser from malwareconfig.modules import __decoders__, __preprocessors__ HAS_MALWARECONFIGS = True if process_cfg.ratdecoders.modules_path:
def main(): """Run tool.""" warnings.warn("WARNING: mwcp-test is deprecated. Please use \"mwcp test\" instead.") print('') # Get command line arguments argparser = get_arg_parser() args, input_files = argparser.parse_known_args() # Setup logging mwcp.setup_logging() logging.root.setLevel(logging.ERROR - (args.verbose * 10)) if args.all_tests or not args.parser_name: parsers = [None] else: parsers = [args.parser_name] if args.parserdir and args.parserconfig: mwcp.register_parser_directory(args.parserdir, args.parserconfig) elif args.parserdir or args.parserconfig: raise ValueError('Both --parserdir and --parserconfig must be specified.') else: mwcp.register_entry_points() if args.parsersource: mwcp.set_default_source(args.parsersource) # Configure reporter based on args reporter = mwcp.Reporter(disable_output_files=True) # Configure test object tester = Tester( reporter=reporter, results_dir=args.test_case_dir, parser_names=parsers, nprocs=args.nprocs, field_names=filter(None, args.field_names.split(",")), ignore_field_names=filter(None, args.exclude_field_names.split(",")) ) # Gather all our input files if args.input_file: input_files = read_input_list(input_files[0]) # Delete files from test cases if args.delete: removed_files = tester.remove_test_results( args.parser_name, input_files) for filename in removed_files: print(u"Removing results for {} in {}".format( filename, tester.get_results_filepath(args.parser_name))) # Update previously existing test cases elif args.update and args.parser_name: print("Updating test cases. May take a while...") results_file_path = tester.get_results_filepath(args.parser_name) if os.path.isfile(results_file_path): input_files = tester.list_test_files(args.parser_name) else: sys.exit(u"No test case file found for parser '{}'. " u"No update could be made.".format(args.parser_name)) update_tests(tester, input_files, args.parser_name) # Add/update test cases for specified input files and specified parser elif args.parser_name and not args.delete and input_files: update_tests(tester, input_files, args.parser_name) # Run test cases else: print("Running test cases. May take a while...") start_time = timeit.default_timer() test_results = [] all_passed = True total = tester.total failed = [] # Generate format string. digits = len(str(total)) if not tester.test_cases: parser_len = 10 filename_len = 10 else: parser_len = max(len(test_case.parser_name) for test_case in tester.test_cases) filename_len = max(len(test_case.filename) for test_case in tester.test_cases) msg_format = "{{parser:{0}}} {{filename:{1}}} {{run_time:.4f}}s".format(parser_len, filename_len) format_str = "{{count:> {0}d}}/{{total:0{0}d}} - ".format(digits) + msg_format # Run tests and output progress results. for count, test_result in enumerate(tester, start=1): all_passed &= test_result.passed if not test_result.passed: failed.append((count, test_result.parser_name, test_result.filename)) if test_result.run_time: # Ignore missing tests from stat summary. test_results.append(test_result) if not args.silent: message = format_str.format( count=count, total=total, parser=test_result.parser_name, filename=test_result.filename, run_time=test_result.run_time ) # Skip print() to immediately flush stdout buffer (issue in Docker containers) sys.stdout.write(message + '\n') sys.stdout.flush() test_result.print( failed_tests=True, passed_tests=not args.only_failed_tests, json_format=args.json ) end_time = timeit.default_timer() # Present test statistics if not args.silent and test_results: print('\nTest stats:') print('\nTop 10 Slowest Test Cases:') format_str = "{index:2}. " + msg_format # Cases sorted slowest first sorted_cases = sorted(test_results, key=lambda x: x.run_time, reverse=True) for i, test_result in enumerate(sorted_cases[:10], start=1): print(format_str.format( index=i, parser=test_result.parser_name, filename=test_result.filename, run_time=test_result.run_time )) print('\nTop 10 Fastest Test Cases:') for i, test_result in enumerate(list(reversed(sorted_cases))[:10], start=1): print(format_str.format( index=i, parser=test_result.parser_name, filename=test_result.filename, run_time=test_result.run_time )) run_times = [test_result.run_time for test_result in test_results] print('\nMean Running Time: {:.4f}s'.format( sum(run_times) / len(test_results) )) print('Median Running Time: {:.4f}s'.format( _median(run_times) )) print('Cumulative Running Time: {}'.format(datetime.timedelta(seconds=sum(run_times)))) print() print("Total Running Time: {}".format(datetime.timedelta(seconds=end_time - start_time))) if failed: print() print("Failed tests:") for test_info in failed: print("#{} - {}\t{}".format(*test_info)) print() print("All Passed = {0}\n".format(all_passed)) exit(0 if all_passed else 1)
def test_external_source(make_sample_parser): """Tests importing a parser from an external source.""" registry.clear() parser_path, config_file = make_sample_parser("acme") parser_dir = str(parser_path.dirname) parser2_path, config2_file = make_sample_parser("acme2", parser_name="Sample2", parser_code=u''' from mwcp import Parser class Decoy(Parser): DESCRIPTION = "TestParser2 Decoy" ''', config_text=r''' Sample2: description: Another test parser author: Mrs. Tester parsers: - .Decoy - acme:Sample.Downloader # imports individual component - acme:Sample # imports parser group Sample: description: Another test parser author: Mrs. Tester parsers: - Sample2.Decoy - acme:Sample ''') parser2_dir = str(parser2_path.dirname) # Register 2 parsers. mwcp.register_parser_directory(parser_dir, config_file_path=str(config_file), source_name="acme") mwcp.register_parser_directory(parser2_dir, config_file_path=str(config2_file), source_name="acme2") # Test that Sample2 has Sample and Sample.Downloader in it's sub-parsers. parsers = list(mwcp.iter_parsers("Sample2")) assert len(parsers) == 1 Sample2_parser = parsers[0][1] assert len(Sample2_parser.parsers) == 3 assert [(p.name, p.source) for p in Sample2_parser.parsers] == [ ("Sample2.Decoy", "acme2"), ("Sample.Downloader", "acme"), ("Sample", "acme"), ] # Test we don't hit a recursion error when we reference a parser with the same name. parsers = list(mwcp.iter_parsers("Sample", source="acme2")) assert len(parsers) == 1 Sample_parser = parsers[0][1] assert len(Sample_parser.parsers) == 2 assert [(p.name, p.source) for p in Sample_parser.parsers] == [ ("Sample2.Decoy", "acme2"), ("Sample", "acme"), ]
def main(args=None): argparser = get_arg_parser() args, input_files = argparser.parse_known_args(args) # This is a preliminary check before creating the reporter to establish how output # file prefixes should be set. if args.disableoutputfileprefix: args.outputfile_prefix = '' elif args.filelistindirection or len(input_files) > 1 or any( [os.path.isdir(x) for x in input_files]): args.outputfile_prefix = 'md5' if args.list: if args.parserdir: mwcp.register_parser_directory(args.parserdir) _print_parsers(json_output=args.jsonoutput) sys.exit(0) if args.fields: _print_fields(json_output=args.jsonoutput) sys.exit(0) if not input_files or not args.parser: argparser.print_help() sys.exit(0) file_paths = _get_file_paths(input_files, is_filelist=args.filelistindirection) kwargs = {} if args.kwargs_raw: kwargs = dict(json.loads(args.kwargs_raw)) for key, value in list(kwargs.items()): if value and value.startswith('b64file(') and value.endswith(')'): tmp_filename = value[len('b64file('):-1] with open(tmp_filename, 'rb') as f: kwargs[key] = base64.b64encode(f.read()) # Run MWCP try: reporter = mwcp.Reporter(parserdir=args.parserdir, outputdir=args.outputdir, outputfile_prefix=args.outputfile_prefix, tempdir=args.tempdir, disabledebug=args.hidedebug, disableoutputfiles=args.disableoutputfiles, disabletempcleanup=args.disabletempcleanup, base64outputfiles=args.base64outputfiles) results = [] for file_path in file_paths: result = _parse_file(reporter, file_path, args.parser, options=kwargs, include_filename=args.includefilename) results.append(result) if not args.jsonoutput: reporter.print_report() if args.csvwrite: csv_path = args.csvwrite _write_csv(input_files, results, csv_path, args.base64outputfiles) if not args.jsonoutput: print('Wrote csv file: {}'.format(csv_path)) if args.jsonoutput: print(json.dumps(results, indent=4)) except Exception as e: error_message = "Error running DC3-MWCP: {}".format(e) traceback.print_exc() if args.jsonoutput: print(json.dumps({'errors': [error_message]})) else: print(error_message) sys.exit(1)
def register(): global report mwcp.register_entry_points() mwcp.register_parser_directory(MWCP_PARSERS_DIR_PATH) report = mwcp.Report(output_directory=os.getcwd()) return report