def test_process_query_view_parity(self, process_props: ProcessProps): local_client = MasterGraphClient() created_proc = get_or_create_process( self, local_client, process_props, ) queried_proc = (ProcessQuery().with_node_key( eq=created_proc.node_key).query_first(local_client)) assert queried_proc assert process_props["node_key"] == queried_proc.node_key assert "Process" == queried_proc.get_node_type() assert process_props["process_id"] == queried_proc.get_process_id() assert process_props["arguments"] == escape_dgraph_str( queried_proc.get_arguments()) assert (process_props["created_timestamp"] == queried_proc.get_created_timestamp()) assert None == queried_proc.get_asset() assert process_props[ "terminate_time"] == queried_proc.get_terminate_time() assert process_props["image_name"] == escape_dgraph_str( queried_proc.get_image_name()) assert process_props["process_name"] == escape_dgraph_str( queried_proc.get_process_name())
def test_single_process_connected_to_asset_node( self, asset_props: AssetProps, process_props: ProcessProps, ): local_client = MasterGraphClient() created_asset = get_or_create_asset(self, local_client, asset_props) created_proc = get_or_create_process(self, local_client, process_props) create_edge( local_client, created_asset.uid, "asset_processes", created_proc.uid, ) # Setup complete, do some queries queried_proc = (ProcessQuery().with_asset(AssetQuery().with_hostname( created_asset.get_hostname())).query_first( local_client, contains_node_key=created_proc.node_key)) assert queried_proc fetch_all_properties(queried_proc) assert_equal_props(created_proc, queried_proc) queried_asset = queried_proc.get_asset() assert_equal_identity(created_asset, queried_asset)
def test_with_wrote_files(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = MasterGraphClient() created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc", parent_process, ) wrote_file = { "file_path": "/folder/file.txt", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] wrote_file_view = upsert( local_client, "File", FileView, "test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833", wrote_file, ) create_edge( local_client, parent_process_view.uid, "wrote_files", wrote_file_view.uid, ) queried_process = (ProcessQuery().with_node_key( eq="test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc" ).with_process_id(eq=100).with_process_name( contains="word" ).with_created_timestamp(eq=created_timestamp).with_wrote_files( FileQuery().with_node_key( eq="test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833" ).with_file_path(eq="/folder/file.txt")).query_first(local_client)) assert queried_process assert (queried_process.node_key == "test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc") assert queried_process.process_id == 100 assert queried_process.process_name == "word.exe" assert len(queried_process.wrote_files) == 1 assert (queried_process.wrote_files[0].node_key == "test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833") assert queried_process.wrote_files[0].file_path == "/folder/file.txt"
def test_with_bin_file(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = MasterGraphClient() created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "635952af-87f3-4a2a-a65d-3f1859db9525", parent_process, ) bin_file = { "file_path": "/folder/file.txt", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] bin_file_view = upsert( local_client, "File", FileView, "9f16e0c9-33c0-4d18-9878-ef686373570b", bin_file, ) create_edge( local_client, parent_process_view.uid, "bin_file", bin_file_view.uid, ) queried_process = (ProcessQuery().with_node_key( eq="635952af-87f3-4a2a-a65d-3f1859db9525" ).with_process_id(eq=100).with_process_name( contains="word").with_created_timestamp( eq=created_timestamp).with_bin_file(FileQuery().with_node_key( eq="9f16e0c9-33c0-4d18-9878-ef686373570b").with_file_path( eq="/folder/file.txt")).query_first(local_client)) assert queried_process assert "635952af-87f3-4a2a-a65d-3f1859db9525" assert queried_process.process_id == 100 assert queried_process.process_name == "word.exe" assert queried_process.created_timestamp == created_timestamp bin_file = queried_process.bin_file assert bin_file.node_key == "9f16e0c9-33c0-4d18-9878-ef686373570b" assert bin_file.file_path == "/folder/file.txt"
def test_with_read_files(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = MasterGraphClient() created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "test_with_read_files-669a3693-d960-401c-8d29-5d669ffcd660", parent_process, ) read_file = { "file_path": "/folder/file.txt", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] read_file_view = upsert( local_client, "File", FileView, "test_with_read_files-aa9248ec-36ee-4177-ba1a-999de735e682", read_file, ) create_edge( local_client, parent_process_view.uid, "read_files", read_file_view.uid, ) queried_process = (ProcessQuery().with_process_id( eq=100).with_process_name(contains="word").with_created_timestamp( eq=created_timestamp).with_read_files( FileQuery().with_file_path( eq="/folder/file.txt")).query_first(local_client)) assert queried_process assert (queried_process.node_key == "test_with_read_files-669a3693-d960-401c-8d29-5d669ffcd660") assert queried_process.process_id == 100 assert queried_process.process_name == "word.exe" assert len(queried_process.read_files) == 1 assert (queried_process.read_files[0].node_key == "test_with_read_files-aa9248ec-36ee-4177-ba1a-999de735e682") assert queried_process.read_files[0].file_path == "/folder/file.txt"
def test_process_with_created_files(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = MasterGraphClient() created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "763ddbda-8812-4a07-acfe-83402b92379d", parent_process, ) created_file = { "file_path": "/folder/file.txt", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] created_file_view = upsert( local_client, "File", FileView, "575f103e-1a11-4650-9f1b-5b72e44dfec3", created_file, ) create_edge( local_client, parent_process_view.uid, "created_files", created_file_view.uid, ) queried_process = ( ProcessQuery().with_node_key( eq="763ddbda-8812-4a07-acfe-83402b92379d").with_process_id( eq=100).with_process_name(contains="word"). with_created_timestamp(eq=created_timestamp).with_created_files( FileQuery().with_node_key( eq="575f103e-1a11-4650-9f1b-5b72e44dfec3").with_file_path( eq="/folder/file.txt")).query_first(local_client)) assert queried_process assert queried_process.process_id == 100 assert len(queried_process.created_files) == 1 created_file = queried_process.created_files[0] assert created_file.file_path == "/folder/file.txt"
def upload_plugins(s3_client, plugin_files: Dict[str, str]): raw_schemas = [ contents for path, contents in plugin_files.items() if path.endswith("schema.py") or path.endswith("schemas.py") ] provision_schemas( LocalMasterGraphClient() if IS_LOCAL else MasterGraphClient(), raw_schemas, ) for path, file in plugin_files.items(): upload_plugin(s3_client, path, file)
def lambda_handler(s3_event: S3Event, context: Any) -> None: mg_client = MasterGraphClient() s3 = get_s3_client() metrics = create_metrics_client() for event in s3_event["Records"]: with metrics.time_to_process_event(): try: _process_one_event(event, s3, mg_client) except: metrics.event_processed(status="failure") raise else: metrics.event_processed(status="success")
def test_process_query_view_parity_contains( self, node_key, process_id, created_timestamp, terminate_time, image_name, process_name, arguments, ): node_key = "test_process_query_view_parity_contains" + str(node_key) local_client = MasterGraphClient() get_or_create_process_node_deprecated( local_client, node_key, process_id, arguments, created_timestamp, terminate_time, image_name, process_name, ) query = ProcessQuery().with_node_key(eq=node_key) # Don't f**k with newlines due to a dgraph bug # https://github.com/dgraph-io/dgraph/issues/4694 for prop in [arguments, image_name, process_name]: hypothesis.assume(len(prop) > 3) hypothesis.assume("\n" not in prop) hypothesis.assume("\\" not in prop) # These fail because dgraph doesn't like the query # (regexp(process_name, /00\\//)) query.with_arguments(contains=arguments[:len(arguments) - 1]) query.with_image_name(contains=image_name[:len(image_name) - 1]) query.with_process_name(contains=process_name[:len(process_name) - 1]) queried_proc = query.query_first(local_client) assert queried_proc assert "Process" == queried_proc.get_node_type() assert process_id == queried_proc.get_process_id() assert node_key == queried_proc.node_key assert arguments == queried_proc.get_arguments() assert created_timestamp == queried_proc.get_created_timestamp() assert terminate_time == queried_proc.get_terminate_time() assert image_name == queried_proc.get_image_name() assert process_name == queried_proc.get_process_name()
def test_with_deleted_files(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = MasterGraphClient() created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "test_with_deleted_files-47527d73-22c4-4e0f-bf7d-184bf1f206e2", parent_process, ) deleted_file = { "file_path": "/folder/file.txt", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] deleted_file_view = upsert( local_client, "File", FileView, "test_with_deleted_files8b8364ea-9b47-476b-8cf0-0f724adff10f", deleted_file, ) create_edge( local_client, parent_process_view.uid, "deleted_files", deleted_file_view.uid, ) queried_process = (ProcessQuery().with_process_id( eq=100).with_process_name(contains="word").with_created_timestamp( eq=created_timestamp).with_deleted_files( FileQuery().with_file_path( eq="/folder/file.txt")).query_first(local_client)) assert queried_process assert queried_process.process_id == 100
def prune_expired_subgraphs(event, lambda_context) -> None: if GRAPL_DGRAPH_TTL_S > 0: client = LocalMasterGraphClient() if IS_LOCAL else MasterGraphClient() node_count = 0 edge_count = 0 for entities in expired_entities( client, now=datetime.datetime.utcnow(), ttl_s=GRAPL_DGRAPH_TTL_S, batch_size=GRAPL_TTL_DELETE_BATCH_SIZE, ): edge_count += delete_edges(client, edges(entities)) node_count += delete_nodes(client, nodes(entities)) app.log.info(f"Pruned {node_count} nodes and {edge_count} edges") else: app.log.warn("GRAPL_DGRAPH_TTL_S is not set, exiting.")
def test_process_query_view_parity_eq( self, node_key, process_id, created_timestamp, terminate_time, image_name, process_name, arguments, ): node_key = "test_process_query_view_parity_eq" + str(node_key) local_client = MasterGraphClient() get_or_create_process_node_deprecated( local_client, node_key, process_id, arguments, created_timestamp, terminate_time, image_name, process_name, ) queried_proc = (ProcessQuery().with_node_key( eq=node_key).with_process_id(eq=process_id).with_arguments( eq=arguments).with_created_timestamp( eq=created_timestamp).with_terminate_time( eq=terminate_time).with_image_name( eq=image_name).with_process_name( eq=process_name).query_first(local_client)) # assert process_view.process_id == queried_proc.get_process_id() assert node_key == queried_proc.node_key assert "Process" == queried_proc.get_node_type() assert process_id == queried_proc.get_process_id() assert arguments == queried_proc.get_arguments() assert created_timestamp == queried_proc.get_created_timestamp() assert terminate_time == queried_proc.get_terminate_time() assert image_name == queried_proc.get_image_name() assert process_name == queried_proc.get_process_name()
def test_process_query_view_miss(self, process_props: ProcessProps) -> None: local_client = MasterGraphClient() created_proc = get_or_create_process(self, local_client, process_props) assert (created_proc.process_id is not None and created_proc.arguments is not None and created_proc.created_timestamp is not None and created_proc.terminate_time is not None and created_proc.image_name is not None and created_proc.process_name is not None) queried_proc = (ProcessQuery().with_node_key( eq=created_proc.node_key ).with_process_id(eq=Not(created_proc.process_id)).with_arguments( eq=Not(created_proc.arguments)).with_created_timestamp( eq=Not(created_proc.created_timestamp)).with_terminate_time( eq=Not(created_proc.terminate_time)).with_image_name( eq=Not(created_proc.image_name)).with_process_name( eq=Not(created_proc.process_name)).query_first( local_client)) assert not queried_proc
def test__single_ip_addr_node__query_by_node_key( self, node_key, first_seen_timestamp, last_seen_timestamp, ip_address, ): # current function's name, but don't need to copy-paste replace node_key = node_key_for_test(self, node_key) local_client = MasterGraphClient() created = get_or_create_ip_address_node( local_client=local_client, node_key=node_key, first_seen_timestamp=as_millis(first_seen_timestamp), last_seen_timestamp=as_millis(last_seen_timestamp), ip_address=str(ip_address), ) queried_ip_address_node = (IpAddressQuery().with_ip_address( ).with_first_seen_timestamp().with_last_seen_timestamp().query_first( local_client, contains_node_key=node_key)) assert_views_equal(expected=created, actual=queried_ip_address_node)
def test_single_process_contains_key(self, process_props: ProcessProps) -> None: local_client = MasterGraphClient() created_proc = get_or_create_process(self, local_client, process_props) # Setup complete, do some queries queried_proc = ProcessQuery().query_first( local_client, contains_node_key=created_proc.node_key) assert queried_proc assert created_proc.get_process_id() == queried_proc.get_process_id() assert created_proc.node_key == queried_proc.node_key assert "Process" == queried_proc.get_node_type() assert created_proc.get_arguments() == queried_proc.get_arguments() assert (created_proc.get_created_timestamp() == queried_proc.get_created_timestamp()) assert created_proc.get_terminate_time( ) == queried_proc.get_terminate_time() assert created_proc.get_image_name() == queried_proc.get_image_name() assert created_proc.get_process_name( ) == queried_proc.get_process_name() assert not queried_proc.get_asset()
try: provision_sqs(sqs, service) sqs_succ.discard(service) except Exception as e: if i > 10: print(e) time.sleep(1) if not sqs_succ: return raise Exception("Failed to provision sqs") if __name__ == "__main__": time.sleep(5) local_dg_provision_client = MasterGraphClient() print("Provisioning graph database") for i in range(0, 150): try: drop_all(local_dg_provision_client) break except Exception as e: time.sleep(2) print("Failed to drop", e) mg_succ = False sqs_t = threading.Thread(target=sqs_provision_loop) s3_t = threading.Thread(target=bucket_provision_loop)
def wait_for_lens(): local_client = MasterGraphClient() query = LensQuery().with_lens_name(LENS_NAME) return WaitForLens(local_client, query)
def test_single_file_contains_key( self, node_key, file_path, file_extension, file_mime_type, file_size, file_version, file_description, file_product, file_company, file_directory, file_inode, file_hard_links, signed, signed_status, md5_hash, sha1_hash, sha256_hash, ): node_key = "test_single_file_contains_key" + str(node_key) signed = "true" if signed else "false" local_client = MasterGraphClient() get_or_create_file_node( local_client, node_key, file_path=file_path, file_extension=file_extension, file_mime_type=file_mime_type, file_size=file_size, file_version=file_version, file_description=file_description, file_product=file_product, file_company=file_company, file_directory=file_directory, file_inode=file_inode, file_hard_links=file_hard_links, signed=signed, signed_status=signed_status, md5_hash=md5_hash, sha1_hash=sha1_hash, sha256_hash=sha256_hash, ) queried_proc = FileQuery().query_first(local_client, contains_node_key=node_key) assert node_key == queried_proc.node_key assert file_path == queried_proc.get_file_path() or "" assert file_extension == queried_proc.get_file_extension() or "" assert file_mime_type == queried_proc.get_file_mime_type() or "" assert file_version == queried_proc.get_file_version() or "" assert file_description == queried_proc.get_file_description() or "" assert file_product == queried_proc.get_file_product() or "" assert file_company == queried_proc.get_file_company() or "" assert file_directory == queried_proc.get_file_directory() or "" assert file_hard_links == queried_proc.get_file_hard_links() or "" assert signed == queried_proc.get_signed() or "" assert signed_status == queried_proc.get_signed_status() or "" assert md5_hash == queried_proc.get_md5_hash() or "" assert sha1_hash == queried_proc.get_sha1_hash() or "" assert sha256_hash == queried_proc.get_sha256_hash() or "" assert file_size == queried_proc.get_file_size() assert file_inode == queried_proc.get_file_inode()
def test_parent_children_edge(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = MasterGraphClient() created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "0e84f2ce-f711-46ce-bc9e-1b13c9ba6d6c", parent_process, ) child_process = { "process_id": 110, "process_name": "malware.exe", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] child_process_view = upsert( local_client, "Process", ProcessView, "46d2862f-cb58-4062-b35e-bb310b8d5b0d", child_process, ) create_edge( local_client, parent_process_view.uid, "children", child_process_view.uid, ) queried_process = (ProcessQuery().with_node_key( eq="0e84f2ce-f711-46ce-bc9e-1b13c9ba6d6c").with_process_id( eq=100).with_process_name( contains="word").with_created_timestamp( eq=created_timestamp).with_children( ProcessQuery().with_node_key( eq="46d2862f-cb58-4062-b35e-bb310b8d5b0d"). with_process_id(eq=110).with_process_name( eq="malware.exe").with_created_timestamp( eq=created_timestamp + 1000)).query_first(local_client)) assert queried_process assert queried_process.node_key == "0e84f2ce-f711-46ce-bc9e-1b13c9ba6d6c" assert queried_process.process_id == 100 assert queried_process.process_name == "word.exe" assert queried_process.created_timestamp == created_timestamp assert len(queried_process.children) == 1 child = queried_process.children[0] assert child.node_key == "46d2862f-cb58-4062-b35e-bb310b8d5b0d" assert child.process_id == 110 assert child.process_name == "malware.exe" assert child.created_timestamp == created_timestamp + 1000
def test_single_file_view_parity_eq( self, node_key, file_path, file_extension, file_mime_type, file_size, file_version, file_description, file_product, file_company, file_directory, file_inode, file_hard_links, signed, signed_status, md5_hash, sha1_hash, sha256_hash, ): node_key = "test_single_file_view_parity_eq" + str(node_key) signed = "true" if signed else "false" local_client = MasterGraphClient() get_or_create_file_node( local_client, node_key, file_path=file_path, file_extension=file_extension, file_mime_type=file_mime_type, file_size=file_size, file_version=file_version, file_description=file_description, file_product=file_product, file_company=file_company, file_directory=file_directory, file_inode=file_inode, file_hard_links=file_hard_links, signed=signed, signed_status=signed_status, md5_hash=md5_hash, sha1_hash=sha1_hash, sha256_hash=sha256_hash, ) queried_file = ( FileQuery() .with_node_key(eq=node_key) .with_file_path(eq=file_path) .with_file_extension(eq=file_extension) .with_file_mime_type(eq=file_mime_type) .with_file_size(eq=file_size) .with_file_version(eq=file_version) .with_file_description(eq=file_description) .with_file_product(eq=file_product) .with_file_company(eq=file_company) .with_file_directory(eq=file_directory) .with_file_inode(eq=file_inode) .with_file_hard_links(eq=file_hard_links) .with_signed(eq=signed) .with_signed_status(eq=signed_status) .with_md5_hash(eq=md5_hash) .with_sha1_hash(eq=sha1_hash) .with_sha256_hash(eq=sha256_hash) .query_first(local_client) ) assert node_key == queried_file.node_key assert file_path == queried_file.get_file_path() assert file_extension == queried_file.get_file_extension() assert file_mime_type == queried_file.get_file_mime_type() assert file_size == queried_file.get_file_size() assert file_version == queried_file.get_file_version() assert file_description == queried_file.get_file_description() assert file_product == queried_file.get_file_product() assert file_company == queried_file.get_file_company() assert file_directory == queried_file.get_file_directory() assert file_inode == queried_file.get_file_inode() assert file_hard_links == queried_file.get_file_hard_links() assert signed == queried_file.get_signed() assert signed_status == queried_file.get_signed_status() assert md5_hash == queried_file.get_md5_hash() assert sha1_hash == queried_file.get_sha1_hash() assert sha256_hash == queried_file.get_sha256_hash()
def __init__(self, query: BaseQuery, dgraph_client: Any = None) -> None: self.query = query self.dgraph_client = dgraph_client or MasterGraphClient()