def test_send_analysis_and_get_sub_analyses(self): # Arrange with responses.RequestsMock() as mock: mock.add('POST', url=self.full_url + '/analyze-by-hash', status=201, json={'result_url': 'a/sd/asd'}) mock.add('GET', url=self.full_url + '/analyses/asd/sub-analyses', status=200, json={ 'sub_analyses': [{ 'source': 'root', 'sub_analysis_id': 'ab', 'sha256': 'axaxaxax' }, { 'source': 'static_extraction', 'sub_analysis_id': 'ac', 'sha256': 'ba' }] }) analysis = FileAnalysis(file_hash='a' * 64) # Act analysis.send() analysis.get_sub_analyses() # Assert self.assertEqual(analysis.status, consts.AnalysisStatusCode.CREATED) self.assertEqual(len(analysis.get_sub_analyses()), 1) self.assertIsNotNone(analysis.get_root_analysis())
def find_largest_family(analysis: FileAnalysis) -> dict: largest_family_by_software_type = collections.defaultdict(lambda: {'reused_gene_count': 0}) for sub_analysis in itertools.chain([analysis.get_root_analysis()], analysis.get_sub_analyses()): if not sub_analysis.code_reuse: continue for family in sub_analysis.code_reuse['families']: software_type = family['family_type'] if family['reused_gene_count'] > largest_family_by_software_type[software_type]['reused_gene_count']: largest_family_by_software_type[software_type] = family return largest_family_by_software_type
def get_analysis_family_by_family_id(analysis: FileAnalysis, family_id: str) -> int: reused_gene_count = 0 for sub_analysis in itertools.chain([analysis.get_root_analysis()], analysis.get_sub_analyses()): if not sub_analysis.code_reuse: continue for family in sub_analysis.code_reuse['families']: if family['family_id'] == family_id: if family['reused_gene_count'] > reused_gene_count: reused_gene_count = family['reused_gene_count'] break return reused_gene_count
def get_analysis_summary_metadata(analysis: FileAnalysis, use_hash_link: bool = False, should_use_largest_families: bool = True) -> Dict[str, any]: result = analysis.result() verdict = result['verdict'].lower() sub_verdict = result['sub_verdict'].lower() analysis_url = f"{ANALYZE_URL}/files/{result['sha256']}?private=true" if use_hash_link else result['analysis_url'] main_family = None gene_count = None iocs = None dynamic_ttps = None related_samples_unique_count = None software_type_priorities_by_verdict = { 'malicious': ['malware', 'malicious_packer'], 'trusted': ['application', 'library', 'interpreter', 'installer'], 'suspicious': ['administration_tool', 'packer'] } software_type_priorities = software_type_priorities_by_verdict.get(verdict) if software_type_priorities: main_family, gene_count = get_analysis_family(analysis, software_type_priorities, should_use_largest_families) if verdict in ('malicious', 'suspicious'): iocs = analysis.iocs dynamic_ttps = analysis.dynamic_ttps related_samples = [sub_analysis.get_account_related_samples(wait=True) for sub_analysis in analysis.get_sub_analyses()] if related_samples: related_samples_unique_count = len({analysis['analysis']['sha256'] for analysis in itertools.chain.from_iterable( sample.result['related_samples'] for sample in related_samples if sample is not None)}) return { 'verdict': verdict, 'sub_verdict': sub_verdict, 'analysis_url': analysis_url, 'main_family': main_family, 'gene_count': gene_count, 'iocs': iocs, 'dynamic_ttps': dynamic_ttps, 'related_samples_unique_count': related_samples_unique_count }
def get_analysis_summary(analysis: FileAnalysis, no_emojis: bool = False, short: bool = False, use_hash_link=False) -> str: result = analysis.result() metadata = analysis.get_root_analysis().metadata verdict = result['verdict'].lower() sub_verdict = result['sub_verdict'].lower() emoji = '' note = _get_title(short) if not no_emojis: emoji = get_emoji(verdict) if verdict == 'malicious': main_family, gene_count = get_analysis_family(analysis, []) elif verdict == 'trusted': main_family, gene_count = get_analysis_family(analysis, ['application', 'library', 'interpreter', 'installer']) elif verdict == 'suspicious': main_family, gene_count = get_analysis_family(analysis, ['administration_tool', 'packer']) else: main_family = None gene_count = None note = f'{note}{emoji} {verdict.capitalize()}' if verdict in ('suspicious', 'unknown'): note = f'{note} - {sub_verdict.replace("_", " ").title()}' if main_family: note = f'{note} - {main_family}' if gene_count and not short: note = f'{note} ({gene_count} shared code genes)' if use_hash_link: analysis_url = f"{ANALYZE_URL}/files/{result['sha256']}?private=true" else: analysis_url = result['analysis_url'] if short: return f'{note} > {analysis_url}' note = f'{note}\n\nSize: {human_readable_size(metadata["size_in_bytes"])}\n' if 'file_type' in metadata: note = f'{note}File type: {metadata["file_type"]}\n' if verdict in ('malicious', 'suspicious'): iocs = analysis.iocs if iocs: iocs_count = 0 files = iocs.get('files') network = iocs.get('network') if files: iocs_count += len(files) if network: iocs_count += len(network) if iocs_count > 1: note = f'{note}IOCs: {iocs_count} Indicators\n' if analysis.dynamic_ttps: note = f'{note}TTPs: {len(analysis.dynamic_ttps)} techniques\n' related_samples = [sub_analysis.get_account_related_samples(wait=True) for sub_analysis in analysis.get_sub_analyses()] if related_samples: related_samples_unique_count = len({analysis['analysis']['sha256'] for analysis in itertools.chain.from_iterable( sample.result['related_samples'] for sample in related_samples if sample is not None)}) note = f'{note}Similar previous uploads: {related_samples_unique_count} files \n' note = (f'{note}\nFull report:\n' f'{"" if no_emojis else get_emoji("result_url")} {analysis_url}') return note