def test_sha512_extract(self): content = 'be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09' self.assertEquals(list(iocextract.extract_sha512_hashes(content))[0], content) self.assertEquals(list(iocextract.extract_sha512_hashes(_wrap_spaces(content)))[0], content) self.assertEquals(list(iocextract.extract_sha512_hashes(_wrap_tabs(content)))[0], content) self.assertEquals(list(iocextract.extract_sha512_hashes(_wrap_newlines(content)))[0], content) self.assertEquals(list(iocextract.extract_sha512_hashes(_wrap_words(content)))[0], content) self.assertEquals(list(iocextract.extract_sha512_hashes(_wrap_nonwords(content)))[0], content)
def test_sha512(self): content = "2960827d026a8488fd663cd23b8d71957275c296fd8dfd47e84a820d864d172c2b34" \ "19724d58d5151f597d1bf2a932a9a83a30aefc0dcc05a91d1f23fb747fab" result = list(iocextract.extract_sha512_hashes(content)) self.assertEqual(len(result), 1) self.assertEqual(result[0], content)
# Extract text from pdf filepath = os.path.join(path, filename) content = convert_pdf_txt(filepath) # Extract Indicators of Compromise from text, recording time extracted_files[filename] = {} extract_start_time = time.time() extracted_files[filename]["urls"] = list(iocextract.extract_urls(content, refang=True)) extracted_files[filename]["email_addresses"] = list(iocextract.extract_emails(content, refang=True)) extracted_files[filename]["ipv4s"] = list(iocextract.extract_ipv4s(content, refang=True)) extracted_files[filename]["ipv6s"] = list(iocextract.extract_ipv6s(content)) extracted_files[filename]["md5s"] = list(iocextract.extract_md5_hashes(content)) extracted_files[filename]["sha1s"] = list(iocextract.extract_sha1_hashes(content)) extracted_files[filename]["sha256s"] = list(iocextract.extract_sha256_hashes(content)) extracted_files[filename]["sha512s"] = list(iocextract.extract_sha512_hashes(content)) extracted_files[filename]["yara"] = list(iocextract.extract_yara_rules(content)) extract_avg_numerator += time.time() - extract_start_time count += 1 process_end_time = time.time() # add some meta info on process run time extracted_files["meta"] = { "tool": "iocextract", "files_examined": count, "elapsed_time": process_end_time - process_start_time, "average_time": extract_avg_numerator / count, }