def test_crawlphish_000_basic_download(self): from saq.modules.url import CrawlphishAnalysisV2 engine = AnalysisEngine() engine.enable_module('analysis_module_crawlphish') self.start_engine(engine) root = create_root_analysis() root.initialize_storage() url = root.add_observable(F_URL, 'http://localhost:{}/test_data/crawlphish.000'.format(LOCAL_PORT)) url.add_directive(DIRECTIVE_CRAWL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url = root.get_observable(url.id) analysis = url.get_analysis(CrawlphishAnalysisV2) self.assertEquals(analysis.status_code, 200) self.assertEquals(analysis.file_name, 'crawlphish.000') self.assertTrue(analysis.downloaded) self.assertIsNone(analysis.error_reason) # there should be a single F_FILE observable file_observables = analysis.get_observables_by_type(F_FILE) self.assertEquals(len(file_observables), 1) file_observable = file_observables[0] self.assertTrue(file_observable.has_directive(DIRECTIVE_EXTRACT_URLS)) self.assertTrue(file_observable.has_relationship(R_DOWNLOADED_FROM))
def test_carbon_black_asset_ident_000(self): from saq.modules.asset import CarbonBlackAssetIdentAnalysis # find an IP address in the past 24 hours to use q, result = splunk_query( """index=carbonblack | dedup local_ip | head limit=1 | fields local_ip""" ) self.assertTrue(result) self.assertTrue(isinstance(q.json(), list)) self.assertEquals(len(q.json()), 1) ipv4 = q.json()[0]['local_ip'] logging.info("using ipv4 {} for test".format(ipv4)) engine = AnalysisEngine() engine.enable_module('analysis_module_carbon_black_asset_ident') self.start_engine(engine) root = create_root_analysis(event_time=datetime.datetime.now()) root.initialize_storage() o_uuid = root.add_observable(F_IPV4, ipv4).id root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() ipv4 = root.get_observable(o_uuid) self.assertIsNotNone(ipv4) analysis = ipv4.get_analysis(CarbonBlackAssetIdentAnalysis) self.assertIsNotNone(analysis) self.assertIsNotNone(analysis.details) self.assertEquals(len(analysis.discovered_hostnames), 1)
def test_crawlphish_001_download_404(self): """We should not extract URLs from data downloaded from URLs that returned a 404.""" from saq.modules.url import CrawlphishAnalysisV2 engine = AnalysisEngine() engine.enable_module('analysis_module_crawlphish') self.start_engine(engine) root = create_root_analysis() root.initialize_storage() url = root.add_observable(F_URL, 'http://localhost:{}/test_data/crawlphish.001'.format(LOCAL_PORT)) url.add_directive(DIRECTIVE_CRAWL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url = root.get_observable(url.id) analysis = url.get_analysis(CrawlphishAnalysisV2) self.assertEquals(analysis.proxy_results['GLOBAL'].status_code, 404) if 'tor' in analysis.proxy_results: self.assertIsNone(analysis.proxy_results['tor'].status_code) self.assertIsNone(analysis.file_name) # no file should have been downloaded self.assertFalse(analysis.downloaded) self.assertIsNotNone(analysis.error_reason) file_observables = analysis.get_observables_by_type(F_FILE) self.assertEquals(len(file_observables), 0)
def test_protected_url_002_google_drive(self): engine = AnalysisEngine() engine.enable_module('analysis_module_protected_url_analyzer') self.start_engine(engine) root = create_root_analysis() root.initialize_storage() # taken from an actual sample url = root.add_observable(F_URL, 'https://drive.google.com/file/d/1ls_eBCsmf3VG_e4dgQiSh_5VUM10b9s2/view') url.add_directive(DIRECTIVE_CRAWL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url = root.get_observable(url.id) from saq.modules.url import ProtectedURLAnalysis, PROTECTION_TYPE_GOOGLE_DRIVE analysis = url.get_analysis(ProtectedURLAnalysis) self.assertIsNotNone(analysis) self.assertEquals(analysis.protection_type, PROTECTION_TYPE_GOOGLE_DRIVE) self.assertEquals(analysis.extracted_url, 'https://drive.google.com/uc?authuser=0&id=1ls_eBCsmf3VG_e4dgQiSh_5VUM10b9s2&export=download') extracted_url = analysis.get_observables_by_type(F_URL) self.assertEquals(len(extracted_url), 1) extracted_url = extracted_url[0] self.assertTrue(extracted_url.has_directive(DIRECTIVE_CRAWL))
def test_protected_url_003_sharepoint(self): engine = AnalysisEngine() engine.enable_module('analysis_module_protected_url_analyzer') self.start_engine(engine) root = create_root_analysis() root.initialize_storage() # taken from an actual sample url = root.add_observable(F_URL, 'https://lahia-my.sharepoint.com/:b:/g/personal/secure_onedrivemsw_bid/EVdjoBiqZTxMnjAcDW6yR4gBqJ59ALkT1C2I3L0yb_n0uQ?e=naeXYD') url.add_directive(DIRECTIVE_CRAWL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url = root.get_observable(url.id) from saq.modules.url import ProtectedURLAnalysis, PROTECTION_TYPE_SHAREPOINT analysis = url.get_analysis(ProtectedURLAnalysis) self.assertIsNotNone(analysis) self.assertEquals(analysis.protection_type, PROTECTION_TYPE_SHAREPOINT) from urllib.parse import urlparse, parse_qs parsed_url = urlparse(analysis.extracted_url) self.assertEquals(parsed_url.path, '/personal/secure_onedrivemsw_bid/_layouts/15/download.aspx') parsed_qs = parse_qs(parsed_url.query) self.assertEquals(parsed_qs['e'][0], 'naeXYD') self.assertEquals(parsed_qs['share'][0], 'EVdjoBiqZTxMnjAcDW6yR4gBqJ59ALkT1C2I3L0yb_n0uQ') extracted_url = analysis.get_observables_by_type(F_URL) self.assertEquals(len(extracted_url), 1) extracted_url = extracted_url[0] self.assertTrue(extracted_url.has_directive(DIRECTIVE_CRAWL))
def test_protected_url_000_outlook_safelinks(self): engine = AnalysisEngine() engine.enable_module('analysis_module_protected_url_analyzer') self.start_engine(engine) root = create_root_analysis() root.initialize_storage() # taken from an actual sample url = root.add_observable(F_URL, 'https://na01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.getbusinessready.com.au%2FInvoice-Number-49808%2F&data=02%7C01%7Ccyoung%40northernaviationservices.aero%7C8a388036cbf34f90ec5808d5724be7ed%7Cfc01978435d14339945c4161ac91c300%7C0%7C0%7C636540592704791165&sdata=%2FNQGqAp09WTNgnVnpoWIPcYNVAYsJ11ULuSS7cCsS3Q%3D&reserved=0') url.add_directive(DIRECTIVE_CRAWL) # not actually going to crawl, just testing that it gets copied over root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url = root.get_observable(url.id) from saq.modules.url import ProtectedURLAnalysis, PROTECTION_TYPE_OUTLOOK_SAFELINKS analysis = url.get_analysis(ProtectedURLAnalysis) self.assertIsNotNone(analysis) self.assertEquals(analysis.protection_type, PROTECTION_TYPE_OUTLOOK_SAFELINKS) self.assertEquals(analysis.extracted_url, 'http://www.getbusinessready.com.au/Invoice-Number-49808/') extracted_url = analysis.get_observables_by_type(F_URL) self.assertEquals(len(extracted_url), 1) extracted_url = extracted_url[0] self.assertTrue(extracted_url.has_directive(DIRECTIVE_CRAWL))
def test_vx_000_hash_lookup(self): engine = AnalysisEngine() engine.enable_module('analysis_module_vxstream_hash_analyzer') self.start_engine(engine) root = create_root_analysis(event_time=datetime.datetime.now()) root.initialize_storage() sha2 = root.add_observable(F_SHA256, SAMPLE_HASH) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() sha2 = root.get_observable(sha2.id) from saq.modules.vx import VxStreamHashAnalysis analysis = sha2.get_analysis(VxStreamHashAnalysis) self.assertIsNotNone(analysis) self.assertEquals(analysis.sha256, sha2.value) self.assertEquals(analysis.environment_id, saq.CONFIG['vxstream']['environmentid']) self.assertEquals(analysis.status, VXSTREAM_STATUS_SUCCESS) self.assertIsNotNone(analysis.submit_date) self.assertIsNotNone(analysis.complete_date) self.assertIsNone(analysis.fail_date) self.assertIsNotNone(analysis.vxstream_threat_level) self.assertIsNotNone(analysis.vxstream_threat_score)
def test_protected_url_001_dropbox(self): engine = AnalysisEngine() engine.enable_module('analysis_module_protected_url_analyzer') self.start_engine(engine) root = create_root_analysis() root.initialize_storage() # taken from an actual sample url_with_dl0 = root.add_observable(F_URL, 'https://www.dropbox.com/s/ezdhsvdxf6wrxk6/RFQ-012018-000071984-13-Rev.1.zip?dl=0') url_with_dl1 = root.add_observable(F_URL, 'https://www.dropbox.com/s/ezdhsvdxf6wrxk6/RFQ-012018-000071984-13-Rev.1.zip?dl=1') url_without_dl = root.add_observable(F_URL, 'https://www.dropbox.com/s/ezdhsvdxf6wrxk6/RFQ-012018-000071984-13-Rev.1.zip') url_with_dl0.add_directive(DIRECTIVE_CRAWL) # not actually going to crawl, just testing that it gets copied over url_with_dl1.add_directive(DIRECTIVE_CRAWL) url_without_dl.add_directive(DIRECTIVE_CRAWL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url_with_dl0 = root.get_observable(url_with_dl0.id) url_with_dl1 = root.get_observable(url_with_dl1.id) url_without_dl = root.get_observable(url_without_dl.id) from saq.modules.url import ProtectedURLAnalysis, PROTECTION_TYPE_DROPBOX analysis = url_with_dl0.get_analysis(ProtectedURLAnalysis) self.assertIsNotNone(analysis) self.assertEquals(analysis.protection_type, PROTECTION_TYPE_DROPBOX) self.assertEquals(analysis.extracted_url, 'https://www.dropbox.com/s/ezdhsvdxf6wrxk6/RFQ-012018-000071984-13-Rev.1.zip?dl=1') extracted_url = analysis.get_observables_by_type(F_URL) self.assertEquals(len(extracted_url), 1) extracted_url = extracted_url[0] self.assertTrue(extracted_url.has_directive(DIRECTIVE_CRAWL)) analysis = url_with_dl1.get_analysis(ProtectedURLAnalysis) self.assertFalse(analysis) analysis = url_without_dl.get_analysis(ProtectedURLAnalysis) self.assertIsNotNone(analysis) self.assertEquals(analysis.protection_type, PROTECTION_TYPE_DROPBOX) self.assertEquals(analysis.extracted_url, 'https://www.dropbox.com/s/ezdhsvdxf6wrxk6/RFQ-012018-000071984-13-Rev.1.zip?dl=1') extracted_url = analysis.get_observables_by_type(F_URL) self.assertEquals(len(extracted_url), 1) extracted_url = extracted_url[0] self.assertTrue(extracted_url.has_directive(DIRECTIVE_CRAWL))
def test_email_000_splunk_logging(self): # clear splunk logging directory splunk_log_dir = os.path.join( saq.CONFIG['splunk_logging']['splunk_log_dir'], 'smtp') if os.path.isdir(splunk_log_dir): shutil.rmtree(splunk_log_dir) os.mkdir(splunk_log_dir) engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_email_logger') engine.enable_module('analysis_module_url_extraction') self.start_engine(engine) root = create_root_analysis(alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() # we should expect three files in this directory now splunk_files = os.listdir(splunk_log_dir) self.assertEquals(len(splunk_files), 3) smtp_file = None url_file = None fields_file = None for _file in splunk_files: if _file.startswith('smtp-'): smtp_file = os.path.join(splunk_log_dir, _file) elif _file.startswith('url-'): url_file = os.path.join(splunk_log_dir, _file) elif _file == 'fields': fields_file = os.path.join(splunk_log_dir, _file) self.assertIsNotNone(smtp_file) self.assertIsNotNone(url_file) self.assertIsNotNone(fields_file) with open(smtp_file, 'r') as fp: smtp_logs = fp.read() with open(url_file, 'r') as fp: url_logs = fp.read() smtp_logs = [_ for _ in smtp_logs.split('\n') if _] url_logs = [_ for _ in url_logs.split('\n') if _] self.assertEquals(len(smtp_logs), 1) self.assertEquals(len(url_logs), 3) url_fields = url_logs[0].split('\x1e') self.assertEquals(len(url_fields), 3) smtp_fields = smtp_logs[0].split('\x1e') self.assertEquals(len(smtp_fields), 25) with open(fields_file, 'r') as fp: fields = fp.readline().strip() self.assertEquals( fields, 'date,attachment_count,attachment_hashes,attachment_names,attachment_sizes,attachment_types,bcc,' 'cc,env_mail_from,env_rcpt_to,extracted_urls,first_received,headers,last_received,mail_from,' 'mail_to,message_id,originating_ip,path,reply_to,size,subject,user_agent,archive_path,x_mailer' )
def test_live_browser_001_404(self): """We should not download screenshots for URLs that returned a 404 error message.""" from saq.modules.url import CrawlphishAnalysisV2 from saq.modules.url import LiveBrowserAnalysis engine = AnalysisEngine() engine.enable_module('analysis_module_crawlphish') engine.enable_module('analysis_module_live_browser_analyzer') self.start_engine(engine) root = create_root_analysis() root.initialize_storage() # this file does not exist url = root.add_observable(F_URL, 'http://localhost:{}/test_data/live_browser.dne.html'.format(LOCAL_PORT)) url.add_directive(DIRECTIVE_CRAWL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url = root.get_observable(url.id) analysis = url.get_analysis(CrawlphishAnalysisV2) file_observables = analysis.get_observables_by_type(F_FILE) self.assertEquals(len(file_observables), 0)
def test_live_browser_000_basic(self): """Basic test of LiveBrowserAnalysis.""" from saq.modules.url import CrawlphishAnalysisV2 from saq.modules.url import LiveBrowserAnalysis engine = AnalysisEngine() engine.enable_module('analysis_module_crawlphish') engine.enable_module('analysis_module_live_browser_analyzer') self.start_engine(engine) root = create_root_analysis() root.initialize_storage() url = root.add_observable(F_URL, 'http://localhost:{}/test_data/live_browser.000.html'.format(LOCAL_PORT)) url.add_directive(DIRECTIVE_CRAWL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url = root.get_observable(url.id) analysis = url.get_analysis(CrawlphishAnalysisV2) file_observables = analysis.get_observables_by_type(F_FILE) self.assertEquals(len(file_observables), 1) file_observable = file_observables[0] analysis = file_observable.get_analysis(LiveBrowserAnalysis) file_observables = analysis.get_observables_by_type(F_FILE) self.assertEquals(len(file_observables), 1) file_observable = file_observables[0] self.assertEquals(file_observable.value, 'crawlphish/localhost_0/localhost_000.png')
def test_vx_001_file_lookup(self): engine = AnalysisEngine() engine.enable_module('analysis_module_vxstream_file_analyzer') engine.enable_module('analysis_module_vxstream_hash_analyzer') engine.enable_module('analysis_module_file_hash_analyzer') engine.enable_module('analysis_module_file_type') self.start_engine(engine) root = create_root_analysis(event_time=datetime.datetime.now()) root.initialize_storage() shutil.copy2('test_data/sample.jar', root.storage_dir) _file = root.add_observable(F_FILE, 'sample.jar') _file.add_directive(DIRECTIVE_SANDBOX) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() _file = root.get_observable(_file.id) from saq.modules.file_analysis import FileHashAnalysis from saq.modules.vx import VxStreamHashAnalysis hash_analysis = _file.get_analysis(FileHashAnalysis) self.assertIsNotNone(hash_analysis) sha2 = hash_analysis.get_observables_by_type(F_SHA256) self.assertIsInstance(sha2, list) self.assertEquals(len(sha2), 1) sha2 = sha2[0] analysis = sha2.get_analysis(VxStreamHashAnalysis) self.assertIsNotNone(analysis) self.assertEquals(analysis.sha256, sha2.value) self.assertEquals(analysis.environment_id, saq.CONFIG['vxstream']['environmentid']) self.assertEquals(analysis.status, VXSTREAM_STATUS_SUCCESS) self.assertIsNotNone(analysis.submit_date) self.assertIsNotNone(analysis.complete_date) self.assertIsNone(analysis.fail_date) self.assertIsNotNone(analysis.vxstream_threat_level) self.assertIsNotNone(analysis.vxstream_threat_score)
def test_vx_003_file_with_hash_analysis(self): engine = AnalysisEngine() engine.enable_module('analysis_module_vxstream_hash_analyzer') engine.enable_module('analysis_module_vxstream_file_analyzer') engine.enable_module('analysis_module_file_hash_analyzer') engine.enable_module('analysis_module_file_type') self.start_engine(engine) root = create_root_analysis(event_time=datetime.datetime.now()) root.initialize_storage() with open('/dev/urandom', 'rb') as fp_in: # using an extension here that doesn't get hash anlaysis with open('test_data/invalid.pcap', 'wb') as fp_out: fp_out.write(fp_in.read(4096)) shutil.copy('test_data/invalid.pcap', root.storage_dir) _file = root.add_observable(F_FILE, 'invalid.pcap') _file.add_directive(DIRECTIVE_SANDBOX) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() _file = root.get_observable(_file.id) from saq.modules.vx import VxStreamFileAnalysis analysis = _file.get_analysis(VxStreamFileAnalysis) self.assertFalse(analysis)
def test_vx_002_invalid_file_upload(self): engine = AnalysisEngine() engine.enable_module('analysis_module_vxstream_file_analyzer') engine.enable_module('analysis_module_vxstream_hash_analyzer') engine.enable_module('analysis_module_file_hash_analyzer') engine.enable_module('analysis_module_file_type') self.start_engine(engine) root = create_root_analysis(event_time=datetime.datetime.now()) root.initialize_storage() with open('/dev/urandom', 'rb') as fp_in: with open('test_data/invalid.exe', 'wb') as fp_out: fp_out.write(fp_in.read(4096)) shutil.copy('test_data/invalid.exe', root.storage_dir) _file = root.add_observable(F_FILE, 'invalid.exe') _file.add_directive(DIRECTIVE_SANDBOX) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() _file = root.get_observable(_file.id) from saq.modules.vx import VxStreamFileAnalysis analysis = _file.get_analysis(VxStreamFileAnalysis) self.assertIsNotNone(analysis) #self.assertEquals(analysis.sha256, sha2.value) self.assertEquals(analysis.environment_id, saq.CONFIG['vxstream']['environmentid']) self.assertEquals(analysis.status, VXSTREAM_STATUS_ERROR) self.assertIsNotNone(analysis.submit_date) self.assertIsNone(analysis.complete_date) self.assertIsNotNone(analysis.fail_date) self.assertIsNone(analysis.vxstream_threat_level) self.assertIsNone(analysis.vxstream_threat_score)
def test_email_002_archive(self): # clear email archive with get_db_connection('email_archive') as db: c = db.cursor() c.execute("DELETE FROM archive") db.commit() hostname = socket.gethostname().lower() archive_dir = os.path.join( saq.SAQ_HOME, saq.CONFIG['analysis_module_email_archiver']['archive_dir'], hostname) if os.path.isdir(archive_dir): try: shutil.rmtree(archive_dir) os.mkdir(archive_dir) except Exception as e: self.fail("unable to clear archive dir {}: {}".format( archive_dir, e)) engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_file_hash_analyzer') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_email_archiver') engine.enable_module('analysis_module_url_extraction') engine.enable_module('analysis_module_pdf_analyzer') self.start_engine(engine) root = create_root_analysis(alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'pdf_attachment.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) file_observable.add_directive(DIRECTIVE_ARCHIVE) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() # there should be a single entry in the archive with get_db_connection('email_archive') as db: c = db.cursor() c.execute("SELECT archive_id FROM archive") row = c.fetchone() archive_id = row[0] # check the index and make sure all the expected values are there expected_values = [ ('env_to', b'*****@*****.**'), ('body_from', b'*****@*****.**'), ('body_to', b'*****@*****.**'), ('subject', b'canary #1'), ('decoded_subject', b'canary #1'), ('message_id', b'<*****@*****.**>' ), #('url', b'mailto:[email protected]'), ('content', b'6967810094670a0978da20db86fbfadc'), ('url', b'http://www.ams.org') ] for field_name, field_value in expected_values: c.execute( "SELECT value FROM archive_search WHERE field = %s AND archive_id = %s AND value = %s", (field_name, archive_id, field_value)) row = c.fetchone() self.assertIsNotNone(row) value = row[0] self.assertEquals(value, field_value)
def test_email_008_whitelisting_001_mail_to(self): import saq whitelist_path = os.path.join('var', 'tmp', 'brotex.whitelist') saq.CONFIG['analysis_module_email_analyzer'][ 'whitelist_path'] = whitelist_path if os.path.exists(whitelist_path): os.remove(whitelist_path) with open(whitelist_path, 'w') as fp: fp.write('smtp_to:[email protected]') engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'o365_journaled.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() from saq.modules.email import EmailAnalysis file_observable = root.get_observable(file_observable.id) self.assertIsNotNone(file_observable) email_analysis = file_observable.get_analysis(EmailAnalysis) self.assertFalse(email_analysis)
def test_email_007_o365_journal_email_parsing(self): # parse an office365 journaled message engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'o365_journaled.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() from saq.modules.email import EmailAnalysis file_observable = root.get_observable(file_observable.id) self.assertIsNotNone(file_observable) email_analysis = file_observable.get_analysis(EmailAnalysis) self.assertIsNotNone(email_analysis) self.assertIsNone(email_analysis.parsing_error) self.assertIsNotNone(email_analysis.email) self.assertIsNone(email_analysis.env_mail_from) self.assertTrue(isinstance(email_analysis.env_rcpt_to, list)) self.assertEquals(len(email_analysis.env_rcpt_to), 1) self.assertEquals(email_analysis.env_rcpt_to[0], '*****@*****.**') self.assertEquals(email_analysis.mail_from, 'Bobbie Fruitypie <*****@*****.**>') self.assertTrue(isinstance(email_analysis.mail_to, list)) self.assertEquals(len(email_analysis.mail_to), 1) self.assertEquals(email_analysis.mail_to[0], '<*****@*****.**>') self.assertIsNone(email_analysis.reply_to) self.assertEquals(email_analysis.subject, 'INVOICE PDL-06-38776') self.assertEquals(email_analysis.decoded_subject, email_analysis.subject) self.assertEquals( email_analysis.message_id, '<*****@*****.**>') self.assertIsNone(email_analysis.originating_ip, None) self.assertTrue(isinstance(email_analysis.received, list)) self.assertEquals(len(email_analysis.received), 7) self.assertTrue(isinstance(email_analysis.headers, list)) self.assertTrue(isinstance(email_analysis.log_entry, dict)) self.assertIsNone(email_analysis.x_mailer) self.assertIsNotNone(email_analysis.body) self.assertIsInstance(email_analysis.attachments, list) self.assertEquals(len(email_analysis.attachments), 0)
def test_email_006_basic_email_parsing(self): # parse a basic email message engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() from saq.modules.email import EmailAnalysis file_observable = root.get_observable(file_observable.id) self.assertIsNotNone(file_observable) email_analysis = file_observable.get_analysis(EmailAnalysis) self.assertIsNotNone(email_analysis) self.assertIsNone(email_analysis.parsing_error) self.assertIsNotNone(email_analysis.email) self.assertIsNone(email_analysis.env_mail_from) self.assertTrue(isinstance(email_analysis.env_rcpt_to, list)) self.assertEquals(len(email_analysis.env_rcpt_to), 1) self.assertEquals(email_analysis.env_rcpt_to[0], '*****@*****.**') self.assertEquals(email_analysis.mail_from, 'John Davison <*****@*****.**>') self.assertTrue(isinstance(email_analysis.mail_to, list)) self.assertEquals(len(email_analysis.mail_to), 1) self.assertEquals(email_analysis.mail_to[0], '*****@*****.**') self.assertIsNone(email_analysis.reply_to) self.assertEquals(email_analysis.subject, 'canary #3') self.assertEquals(email_analysis.decoded_subject, email_analysis.subject) self.assertEquals( email_analysis.message_id, '<CANTOGZsMiMb+7aB868zXSen_fO=NS-qFTUMo9h2eHtOexY8Qhw@mail.gmail.com>' ) self.assertIsNone(email_analysis.originating_ip, None) self.assertTrue(isinstance(email_analysis.received, list)) self.assertEquals(len(email_analysis.received), 6) self.assertTrue(isinstance(email_analysis.headers, list)) self.assertTrue(isinstance(email_analysis.log_entry, dict)) self.assertIsNone(email_analysis.x_mailer) self.assertIsNotNone(email_analysis.body) self.assertIsInstance(email_analysis.attachments, list) self.assertEquals(len(email_analysis.attachments), 0)
def test_email_005_message_id(self): # make sure we extract the correct message-id # this test email has an attachment that contains a message-id # we need to make sure we do not extract that one as the message-id observable engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'extra_message_id.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() from saq.modules.email import EmailAnalysis file_observable = root.get_observable(file_observable.id) self.assertIsNotNone(file_observable) email_analysis = file_observable.get_analysis(EmailAnalysis) self.assertIsNotNone(email_analysis) message_id = email_analysis.get_observables_by_type(F_MESSAGE_ID) self.assertTrue(isinstance(message_id, list) and len(message_id) > 0) message_id = message_id[0] self.assertEquals( message_id.value, "<*****@*****.**>" )
def test_email_004_email_pivot_excessive_emails(self): # process the email first -- we'll find it when we pivot engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_file_hash_analyzer') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_email_archiver') engine.enable_module('analysis_module_url_extraction') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) file_observable.add_directive(DIRECTIVE_ARCHIVE) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() saq.load_configuration() # force this to exceed the limit saq.CONFIG['analysis_module_url_email_pivot_analyzer'][ 'result_limit'] = '0' engine = AnalysisEngine() engine.enable_module('analysis_module_url_email_pivot_analyzer') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='cloudphish') root.initialize_storage() # make up some details root.details = { 'alertable': 1, 'context': { 'c': '1c38af75-0c42-4ae3-941d-de3975f68602', 'd': '1', 'i': 'ashland', 's': 'email_scanner' }, 'sha256_url': '0061537d578e4f65d13e31e190e1079e00dadd808e9fa73f77e3308fdb0e1485', 'url': 'https://www.alienvault.com', # <-- the important part } url_observable = root.add_observable(F_URL, 'https://www.alienvault.com') root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url_observable = root.get_observable(url_observable.id) from saq.modules.email import URLEmailPivotAnalysis_v2 analysis = url_observable.get_analysis(URLEmailPivotAnalysis_v2) self.assertIsNotNone(analysis) self.assertEquals(analysis.count, 1) # this should not have the details since it exceeded the limit self.assertIsNone(analysis.emails)
def test_email_003_email_pivot(self): # process the email first -- we'll find it when we pivot engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_file_hash_analyzer') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_email_archiver') engine.enable_module('analysis_module_url_extraction') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) file_observable.add_directive(DIRECTIVE_ARCHIVE) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() saq.load_configuration() engine = AnalysisEngine() engine.enable_module('analysis_module_url_email_pivot_analyzer') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='cloudphish') root.initialize_storage() # make up some details root.details = { 'alertable': 1, 'context': { 'c': '1c38af75-0c42-4ae3-941d-de3975f68602', 'd': '1', 'i': 'ashland', 's': 'email_scanner' }, 'sha256_url': '0061537d578e4f65d13e31e190e1079e00dadd808e9fa73f77e3308fdb0e1485', 'url': 'https://www.alienvault.com', # <-- the important part } url_observable = root.add_observable(F_URL, 'https://www.alienvault.com') root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() url_observable = root.get_observable(url_observable.id) from saq.modules.email import URLEmailPivotAnalysis_v2 analysis = url_observable.get_analysis(URLEmailPivotAnalysis_v2) self.assertIsNotNone(analysis) self.assertEquals(analysis.count, 1) self.assertIsNotNone(analysis.emails) self.assertTrue('email_archive' in analysis.emails) archive_id = list(analysis.emails['email_archive'].keys())[0] entry = analysis.emails['email_archive'][archive_id] self.assertEquals(int(archive_id), entry['archive_id']) self.assertEquals('canary #3', entry['subject']) self.assertEquals('*****@*****.**', entry['recipient']) self.assertEquals( '<CANTOGZsMiMb+7aB868zXSen_fO=NS-qFTUMo9h2eHtOexY8Qhw@mail.gmail.com>', entry['message_id']) self.assertEquals('*****@*****.**', entry['sender']) self.assertEquals(len(entry['remediation_history']), 0) self.assertFalse(entry['remediated'])
def process(self, work_item): if isinstance(work_item, AnalysisRequest): ACE.process(self, work_item) else: AnalysisEngine.process(self, work_item)
def test_email_001_archive(self): # clear email archive with get_db_connection('email_archive') as db: c = db.cursor() c.execute("DELETE FROM archive") db.commit() hostname = socket.gethostname().lower() archive_dir = os.path.join( saq.SAQ_HOME, saq.CONFIG['analysis_module_email_archiver']['archive_dir'], hostname) if os.path.isdir(archive_dir): try: shutil.rmtree(archive_dir) os.mkdir(archive_dir) except Exception as e: self.fail("unable to clear archive dir {}: {}".format( archive_dir, e)) engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_file_hash_analyzer') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_email_archiver') engine.enable_module('analysis_module_url_extraction') self.start_engine(engine) root = create_root_analysis(alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) file_observable.add_directive(DIRECTIVE_ARCHIVE) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() # there should be a single entry in the archive with get_db_connection('email_archive') as db: c = db.cursor() c.execute("SELECT archive_id FROM archive") row = c.fetchone() archive_id = row[0] # check the index and make sure all the expected values are there expected_values = [ ('body_from', b'*****@*****.**'), ('body_to', b'*****@*****.**'), ('decoded_subject', b'canary #3'), ('env_to', b'*****@*****.**'), ('message_id', b'<CANTOGZsMiMb+7aB868zXSen_fO=NS-qFTUMo9h2eHtOexY8Qhw@mail.gmail.com>' ), ('subject', b'canary #3'), ('url', b'http://tldp.org/LDP/abs/html'), ('url', b'https://www.alienvault.com'), ('url', b'http://197.210.28.107') ] for field_name, field_value in expected_values: c.execute( "SELECT value FROM archive_search WHERE field = %s AND archive_id = %s AND value = %s", (field_name, archive_id, field_value)) row = c.fetchone() self.assertIsNotNone(row) value = row[0] self.assertEquals(value, field_value)
def test_email_009_live_browser_no_render(self): # we usually render HTML attachments to emails # but not if it has a tag of "no_render" assigned by a yara rule engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_yara_scanner_v3_4') self.start_engine(engine) root = create_root_analysis(uuid=str(uuid.uuid4()), alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'phish_me.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() from saq.modules.email import EmailAnalysis file_observable = root.get_observable(file_observable.id) self.assertIsNotNone(file_observable) self.assertTrue(file_observable.has_tag('no_render')) from saq.modules.url import LiveBrowserAnalysis self.assertFalse(file_observable.get_analysis(LiveBrowserAnalysis))
def test_email_000b_elk_logging(self): # clear elk logging directory elk_log_dir = os.path.join(saq.SAQ_HOME, saq.CONFIG['elk_logging']['elk_log_dir']) if os.path.isdir(elk_log_dir): shutil.rmtree(elk_log_dir) os.mkdir(elk_log_dir) engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_email_logger') engine.enable_module('analysis_module_url_extraction') self.start_engine(engine) root = create_root_analysis(alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() wait_for_log_count('creating json logging directory ', 1, 5) entry = search_log('creating json logging directory ') target_dir = entry[0].getMessage( )[len('creating json logging directory '):] # we should expect three files in this directory now elk_files = [ os.path.join(target_dir, _) for _ in os.listdir(target_dir) ] self.assertEquals(len(elk_files), 1) with open(elk_files[0], 'r') as fp: log_entry = json.load(fp) for field in [ 'date', 'first_received', 'last_received', 'env_mail_from', 'env_rcpt_to', 'mail_from', 'mail_to', 'reply_to', 'cc', 'bcc', 'message_id', 'subject', 'path', 'size', 'user_agent', 'x_mailer', 'originating_ip', 'headers', 'attachment_count', 'attachment_sizes', 'attachment_types', 'attachment_names', 'attachment_hashes', 'thread_topic', 'thread_index', 'refereneces', 'x_sender' ]: self.assertTrue(field in log_entry)
def test_email_000a_update_brocess(self): # make sure we update the brocess database when we can scan email self.reset_brocess() engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_email_logger') self.start_engine(engine) root = create_root_analysis(alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() root.load() file_observable = root.get_observable(file_observable.id) from saq.modules.email import EmailAnalysis analysis = file_observable.get_analysis(EmailAnalysis) self.assertIsNotNone(analysis) # get the source and dest of the email so we can look it up in the brocess database from saq.email import normalize_email_address mail_from = normalize_email_address(analysis.mail_from) env_rcpt_to = normalize_email_address(analysis.env_rcpt_to[0]) # we should see a count of 1 here with get_db_connection('brocess') as db: c = db.cursor() c.execute( """SELECT numconnections FROM smtplog WHERE source = %s AND destination = %s""", (mail_from, env_rcpt_to)) count = c.fetchone() self.assertEquals(count[0], 1) # and then we do it again and make sure the count increased engine = AnalysisEngine() engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_email_analyzer') engine.enable_module('analysis_module_email_logger') self.start_engine(engine) root = create_root_analysis(alert_type='mailbox') root.initialize_storage() shutil.copy( os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), os.path.join(root.storage_dir, 'email.rfc822')) file_observable = root.add_observable(F_FILE, 'email.rfc822') file_observable.add_directive(DIRECTIVE_ORIGINAL_EMAIL) root.save() engine.queue_work_item(root.storage_dir) engine.queue_work_item(TerminatingMarker()) engine.wait() with get_db_connection('brocess') as db: c = db.cursor() c.execute( """SELECT numconnections FROM smtplog WHERE source = %s AND destination = %s""", (mail_from, env_rcpt_to)) count = c.fetchone() self.assertEquals(count[0], 2)
def collect(self): ACE.collect(self) AnalysisEngine.collect(self)