def lambda_handler(event, context): graph_names_list = get_required_lambda_event_var(event, "graph_names") if not isinstance(graph_names_list, list): raise ValueError(f"Value for graph_names should be a list. Is {type(graph_names_list)}") graph_names = set(graph_names_list) query = get_required_lambda_event_var(event, "query") if not isinstance(query, str): raise ValueError(f"Value for query should be a str. Is {type(query)}") max_age_min = get_required_lambda_event_var(event, "max_age_min") if not isinstance(max_age_min, int): raise ValueError(f"Value for max_age_min should be an int. Is {type(max_age_min)}") host = get_required_lambda_env_var("NEPTUNE_HOST") port = get_required_lambda_env_var("NEPTUNE_PORT") region = get_required_lambda_env_var("NEPTUNE_REGION") results_bucket = get_required_lambda_env_var("RESULTS_BUCKET") endpoint = NeptuneEndpoint(host=host, port=port, region=region) client = AltimeterNeptuneClient(max_age_min=max_age_min, neptune_endpoint=endpoint) query_result = client.run_query(graph_names=graph_names, query=query) csv_results = query_result.to_csv() query_hash = hashlib.sha256(query.encode()).hexdigest() now_str = str(int(time.time())) results_key = "/".join(("-".join(graph_names), query_hash, f"{now_str}.csv")) s3_client = boto3.Session().client("s3") s3_client.put_object(Bucket=results_bucket, Key=results_key, Body=csv_results) return { "results_bucket": results_bucket, "results_key": results_key, "num_results": query_result.get_length(), }
def test_write_to_neptune_lpg_graph(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1", ssl=False) client = AltimeterNeptuneClient(0, endpoint) # An exception is expected here since there is no graph to write to with self.assertRaises(NeptuneLoadGraphException): client.write_to_neptune_lpg({"vertices": [ {"~id": "123", "~label": "test"}], "edges" : []}, "")
def test_write_to_neptune_lpg_no_graph(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1", ssl=False) client = AltimeterNeptuneClient(0, endpoint) with self.assertRaises(NeptuneNoGraphsFoundException): client.write_to_neptune_lpg({}, "")
def test_parse_arn_resource_only(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1", ssl=False) client = AltimeterNeptuneClient(0, endpoint) actual_dict = client.parse_arn("test") self.assertEqual("test", actual_dict["resource"])
def test_connect_to_gremlin_ssl_disabled(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1", ssl=False) client = AltimeterNeptuneClient(0, endpoint) g, conn = client.connect_to_gremlin() self.assertEqual("ws://host:5555/gremlin", conn.url)
def run_query(job: schemas.Job, config: QueryConfig) -> QueryResult: """Run a query and return a QueryResult object""" endpoint = NeptuneEndpoint(host=config.neptune_host, port=config.neptune_port, region=config.neptune_region) neptune_client = AltimeterNeptuneClient(max_age_min=int( job.max_graph_age_sec / 60.0), neptune_endpoint=endpoint) query_result = neptune_client.run_query(graph_names=set( job.graph_spec.graph_names), query=job.query) return query_result
def lambda_handler(event, context): host = get_required_lambda_env_var("NEPTUNE_HOST") port = get_required_lambda_env_var("NEPTUNE_PORT") region = get_required_lambda_env_var("NEPTUNE_REGION") max_age_min = get_required_lambda_env_var("MAX_AGE_MIN") graph_name = get_required_lambda_env_var("GRAPH_NAME") try: max_age_min = int(max_age_min) except ValueError as ve: raise Exception(f"env var MAX_AGE_MIN must be an int: {ve}") now = int(datetime.now().timestamp()) oldest_acceptable_graph_epoch = now - max_age_min * 60 endpoint = NeptuneEndpoint(host=host, port=port, region=region) client = AltimeterNeptuneClient(max_age_min=max_age_min, neptune_endpoint=endpoint) logger = Logger() uncleared = [] # first prune metadata - if clears below are partial we want to make sure no clients # consider this a valid graph still. logger.info(event=LogEvent.PruneNeptuneMetadataGraphStart) client.clear_old_graph_metadata(name=graph_name, max_age_min=max_age_min) logger.info(event=LogEvent.PruneNeptuneMetadataGraphEnd) # now clear actual graphs with logger.bind(neptune_endpoint=endpoint): logger.info(event=LogEvent.PruneNeptuneGraphsStart) for graph_metadata in client.get_graph_metadatas(name=graph_name): assert graph_metadata.name == graph_name graph_epoch = graph_metadata.end_time with logger.bind(graph_uri=graph_metadata.uri, graph_epoch=graph_epoch): if graph_epoch < oldest_acceptable_graph_epoch: logger.info(event=LogEvent.PruneNeptuneGraphStart) try: client.clear_graph(graph_uri=graph_metadata.uri) logger.info(event=LogEvent.PruneNeptuneGraphEnd) except Exception as ex: logger.error( event=LogEvent.PruneNeptuneGraphError, msg= f"Error pruning graph {graph_metadata.uri}: {ex}", ) uncleared.append(graph_metadata.uri) continue else: logger.info(event=LogEvent.PruneNeptuneGraphSkip) logger.info(event=LogEvent.PruneNeptuneGraphsEnd) if uncleared: msg = f"Errors were found pruning {uncleared}." logger.error(event=LogEvent.PruneNeptuneGraphsError, msg=msg) raise Exception(msg)
def lambda_handler(event, context): rdf_bucket = event["Records"][0]["s3"]["bucket"]["name"] rdf_key = urllib.parse.unquote(event["Records"][0]["s3"]["object"]["key"]) neptune_host = get_required_lambda_env_var("NEPTUNE_HOST") neptune_port = get_required_lambda_env_var("NEPTUNE_PORT") neptune_region = get_required_lambda_env_var("NEPTUNE_REGION") neptune_load_iam_role_arn = get_required_lambda_env_var( "NEPTUNE_LOAD_IAM_ROLE_ARN") on_success_sns_topic_arn = get_required_lambda_env_var( "ON_SUCCESS_SNS_TOPIC_ARN") endpoint = NeptuneEndpoint(host=neptune_host, port=neptune_port, region=neptune_region) neptune_client = AltimeterNeptuneClient(max_age_min=1440, neptune_endpoint=endpoint) graph_metadata = neptune_client.load_graph( bucket=rdf_bucket, key=rdf_key, load_iam_role_arn=neptune_load_iam_role_arn) logger = Logger() logger.info(event=LogEvent.GraphLoadedSNSNotificationStart) sns_client = boto3.client("sns") message_dict = { "uri": graph_metadata.uri, "name": graph_metadata.name, "version": graph_metadata.version, "start_time": graph_metadata.start_time, "end_time": graph_metadata.end_time, "neptune_endpoint": endpoint.get_endpoint_str(), } message_dict["default"] = json.dumps(message_dict) sns_client.publish(TopicArn=on_success_sns_topic_arn, MessageStructure="json", Message=json.dumps(message_dict)) logger.info(event=LogEvent.GraphLoadedSNSNotificationEnd)
def aws2neptune_lpg(scan_id: str, config: AWSConfig, muxer: AWSScanMuxer) -> None: """Scan AWS resources to json, convert to RDF and load into Neptune if config.neptune is defined""" artifact_reader = ArtifactReader.from_artifact_path(config.artifact_path) artifact_writer = ArtifactWriter.from_artifact_path( artifact_path=config.artifact_path, scan_id=scan_id) aws_resource_region_mapping_repo = build_aws_resource_region_mapping_repo( global_region_whitelist=config.scan.regions, preferred_account_scan_regions=config.scan. preferred_account_scan_regions, services_regions_json_url=config.services_regions_json_url, ) logger.info( AWSLogEvents.ScanConfigured, config=str(config), reader=str(artifact_reader.__class__), writer=str(artifact_writer.__class__), ) print("Beginning AWS Account Scan") _, graph_set = run_scan( muxer=muxer, config=config, aws_resource_region_mapping_repo=aws_resource_region_mapping_repo, artifact_writer=artifact_writer, artifact_reader=artifact_reader, ) print("AWS Account Scan Complete. Beginning write to Amazon Neptune.") logger.info(LogEvent.NeptuneGremlinWriteStart) graph = graph_set.to_neptune_lpg(scan_id) if config.neptune is None: raise Exception( "Can not load to Neptune because config.neptune is empty.") endpoint = NeptuneEndpoint( host=config.neptune.host, port=config.neptune.port, region=config.neptune.region, ssl=bool(config.neptune.ssl), auth_mode=str(config.neptune.auth_mode), ) neptune_client = AltimeterNeptuneClient(max_age_min=1440, neptune_endpoint=endpoint) neptune_client.write_to_neptune_lpg(graph, scan_id) logger.info(LogEvent.NeptuneGremlinWriteEnd) print("Write to Amazon Neptune Complete")
def test_parse_arn_resource_type_ami(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1", ssl=False) client = AltimeterNeptuneClient(0, endpoint) expected_dict = { "arn": "arn", "partition": "aws", "service": "ec2", "region": "us-east-1", "account": "123", "resource": "ami", "resource_type": "ami", } actual_dict = client.parse_arn("arn:aws:ec2:us-east-1:123:ami:ami-123") self.assertDictEqual(expected_dict, actual_dict)
def run_query(job: schemas.Job, config: QueryConfig) -> QueryResult: """Run a query and return a QueryResult object""" endpoint = NeptuneEndpoint(host=config.neptune_host, port=config.neptune_port, region=config.neptune_region) neptune_client = AltimeterNeptuneClient(max_age_min=int( job.max_graph_age_sec / 60.0), neptune_endpoint=endpoint) if job.raw_query: all_graph_metadatas = neptune_client.get_all_graph_metadatas() graph_uris_load_times: Dict[str, int] = { graph_metadata.uri: graph_metadata.end_time for graph_metadata in all_graph_metadatas } query_result_set = neptune_client.run_raw_query(query=job.query) query_result = QueryResult(graph_uris_load_times, query_result_set) else: query_result = neptune_client.run_query( graph_names=set(job.graph_spec.graph_names), query=job.query, ) return query_result
def aws2neptune_rdf(scan_id: str, config: Config, muxer: AWSScanMuxer) -> None: """Scan AWS resources to json, convert to RDF and load into Neptune if config.neptune is defined""" artifact_reader = ArtifactReader.from_artifact_path(config.artifact_path) artifact_writer = ArtifactWriter.from_artifact_path( artifact_path=config.artifact_path, scan_id=scan_id ) logger.info( AWSLogEvents.ScanConfigured, config=str(config), reader=str(artifact_reader.__class__), writer=str(artifact_writer.__class__), ) print("Beginning AWS Account Scan") scan_manifest, graph_set = run_scan( muxer=muxer, config=config, artifact_writer=artifact_writer, artifact_reader=artifact_reader, ) print("AWS Account Scan Complete. Beginning write to Amazon Neptune.") logger.info(LogEvent.NeptuneRDFWriteStart) graph = graph_set.to_rdf() if config.neptune is None: raise Exception("Can not load to Neptune because config.neptune is empty.") endpoint = NeptuneEndpoint( host=config.neptune.host, port=config.neptune.port, region=config.neptune.region, ssl=bool(config.neptune.ssl), auth_mode=str(config.neptune.auth_mode), ) neptune_client = AltimeterNeptuneClient(max_age_min=1440, neptune_endpoint=endpoint) neptune_client.write_to_neptune_rdf(graph) logger.info(LogEvent.NeptuneRDFWriteEnd) print("Write to Amazon Neptune Complete")
def main(argv: Optional[List[str]] = None) -> int: if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser() parser.add_argument("query_file", type=str) parser.add_argument("--graph_names", type=str, default=["alti"], nargs="+") parser.add_argument("--max_age_min", type=int, default=1440) parser.add_argument("--raw", default=False, action="store_true") parser.add_argument("--neptune_endpoint", help="Neptune endpoint specified as host:port:region") args_ns = parser.parse_args(argv) with open(args_ns.query_file, "r") as query_fp: query = query_fp.read() if args_ns.neptune_endpoint is not None: try: host, port_str, region = args_ns.neptune_endpoint.split(":") port: int = int(port_str) except ValueError: print( f"neptune_endpoint should be a string formatted as host:port:region" ) return 1 endpoint = NeptuneEndpoint(host=host, port=port, region=region) else: endpoint = discover_neptune_endpoint() client = AltimeterNeptuneClient(max_age_min=args_ns.max_age_min, neptune_endpoint=endpoint) if args_ns.raw: raw_results = client.run_raw_query(query=query) print(raw_results.to_csv(), end="") else: results = client.run_query(graph_names=args_ns.graph_names, query=query) print(results.to_csv(), end="") return 0
def lambda_handler(event: Dict[str, Any], context: Any) -> None: """Entrypoint""" root = logging.getLogger() if root.handlers: for handler in root.handlers: root.removeHandler(handler) config_path = get_required_str_env_var("CONFIG_PATH") config = Config.from_path(path=config_path) if config.neptune is None: raise InvalidConfigException("Configuration missing neptune section.") now = int(datetime.now().timestamp()) oldest_acceptable_graph_epoch = now - config.pruner_max_age_min * 60 endpoint = NeptuneEndpoint(host=config.neptune.host, port=config.neptune.port, region=config.neptune.region) client = AltimeterNeptuneClient(max_age_min=config.pruner_max_age_min, neptune_endpoint=endpoint) logger = Logger() uncleared = [] logger.info(event=LogEvent.PruneNeptuneGraphsStart) all_graph_metadatas = client.get_graph_metadatas(name=config.graph_name) with logger.bind(neptune_endpoint=str(endpoint)): for graph_metadata in all_graph_metadatas: assert graph_metadata.name == config.graph_name graph_epoch = graph_metadata.end_time with logger.bind(graph_uri=graph_metadata.uri, graph_epoch=graph_epoch): if graph_epoch < oldest_acceptable_graph_epoch: logger.info(event=LogEvent.PruneNeptuneGraphStart) try: client.clear_registered_graph(name=config.graph_name, uri=graph_metadata.uri) logger.info(event=LogEvent.PruneNeptuneGraphEnd) except Exception as ex: logger.error( event=LogEvent.PruneNeptuneGraphError, msg= f"Error pruning graph {graph_metadata.uri}: {ex}", ) uncleared.append(graph_metadata.uri) continue else: logger.info(event=LogEvent.PruneNeptuneGraphSkip) # now find orphaned graphs - these are in neptune but have no metadata registered_graph_uris = [g_m.uri for g_m in all_graph_metadatas] all_graph_uris = client.get_graph_uris(name=config.graph_name) orphaned_graphs = set(all_graph_uris) - set(registered_graph_uris) if orphaned_graphs: for orphaned_graph_uri in orphaned_graphs: with logger.bind(graph_uri=orphaned_graph_uri): logger.info(event=LogEvent.PruneOrphanedNeptuneGraphStart) try: client.clear_graph_data(uri=orphaned_graph_uri) logger.info( event=LogEvent.PruneOrphanedNeptuneGraphEnd) except Exception as ex: logger.error( event=LogEvent.PruneNeptuneGraphError, msg= f"Error pruning graph {orphaned_graph_uri}: {ex}", ) uncleared.append(orphaned_graph_uri) continue logger.info(event=LogEvent.PruneNeptuneGraphsEnd) if uncleared: msg = f"Errors were found pruning {uncleared}." logger.error(event=LogEvent.PruneNeptuneGraphsError, msg=msg) raise Exception(msg)
def test_get_gremlin_endpoint_ssl_false(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1") expected_loader_endpoint = "ws://host:5555/gremlin" loader_endpoint = endpoint.get_gremlin_endpoint(ssl=False) self.assertEqual(expected_loader_endpoint, loader_endpoint)
def test_get_loader_endpoint_ssl_true(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1") expected_loader_endpoint = "https://host:5555/loader" loader_endpoint = endpoint.get_loader_endpoint(ssl=True) self.assertEqual(expected_loader_endpoint, loader_endpoint)
def test_get_sparql_endpoint_ssl_false(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1") expected_sparql_endpoint = "http://host:5555/sparql" sparql_endpoint = endpoint.get_sparql_endpoint(ssl=False) self.assertEqual(expected_sparql_endpoint, sparql_endpoint)
def test_get_endpoint_str(self): endpoint = NeptuneEndpoint(host="host", port=5555, region="us-east-1") expected_endpoint_str = "host:5555" endpoint_str = endpoint.get_endpoint_str() self.assertEqual(expected_endpoint_str, endpoint_str)
def prune_graph(graph_pruner_config: GraphPrunerConfig) -> GraphPrunerResults: config = Config.from_path(path=graph_pruner_config.config_path) if config.neptune is None: raise InvalidConfigException("Configuration missing neptune section.") now = int(datetime.now().timestamp()) oldest_acceptable_graph_epoch = now - config.pruner_max_age_min * 60 endpoint = NeptuneEndpoint( host=config.neptune.host, port=config.neptune.port, region=config.neptune.region ) client = AltimeterNeptuneClient( max_age_min=config.pruner_max_age_min, neptune_endpoint=endpoint ) logger = Logger() uncleared = [] pruned_graph_uris = [] skipped_graph_uris = [] logger.info(event=LogEvent.PruneNeptuneGraphsStart) all_graph_metadatas = client.get_graph_metadatas(name=config.graph_name) with logger.bind(neptune_endpoint=str(endpoint)): for graph_metadata in all_graph_metadatas: assert graph_metadata.name == config.graph_name graph_epoch = graph_metadata.end_time with logger.bind(graph_uri=graph_metadata.uri, graph_epoch=graph_epoch): if graph_epoch < oldest_acceptable_graph_epoch: logger.info(event=LogEvent.PruneNeptuneGraphStart) try: client.clear_registered_graph( name=config.graph_name, uri=graph_metadata.uri ) logger.info(event=LogEvent.PruneNeptuneGraphEnd) pruned_graph_uris.append(graph_metadata.uri) except Exception as ex: logger.error( event=LogEvent.PruneNeptuneGraphError, msg=f"Error pruning graph {graph_metadata.uri}: {ex}", ) uncleared.append(graph_metadata.uri) continue else: logger.info(event=LogEvent.PruneNeptuneGraphSkip) skipped_graph_uris.append(graph_metadata.uri) # now find orphaned graphs - these are in neptune but have no metadata registered_graph_uris = [g_m.uri for g_m in all_graph_metadatas] all_graph_uris = client.get_graph_uris(name=config.graph_name) orphaned_graphs = set(all_graph_uris) - set(registered_graph_uris) if orphaned_graphs: for orphaned_graph_uri in orphaned_graphs: with logger.bind(graph_uri=orphaned_graph_uri): logger.info(event=LogEvent.PruneOrphanedNeptuneGraphStart) try: client.clear_graph_data(uri=orphaned_graph_uri) logger.info(event=LogEvent.PruneOrphanedNeptuneGraphEnd) pruned_graph_uris.append(orphaned_graph_uri) except Exception as ex: logger.error( event=LogEvent.PruneNeptuneGraphError, msg=f"Error pruning graph {orphaned_graph_uri}: {ex}", ) uncleared.append(orphaned_graph_uri) continue logger.info(event=LogEvent.PruneNeptuneGraphsEnd) if uncleared: msg = f"Errors were found pruning {uncleared}." logger.error(event=LogEvent.PruneNeptuneGraphsError, msg=msg) raise Exception(msg) return GraphPrunerResults( pruned_graph_uris=pruned_graph_uris, skipped_graph_uris=skipped_graph_uris, )
def aws2n(scan_id: str, config: Config, muxer: AWSScanMuxer, load_neptune: bool) -> AWS2NResult: """Scan AWS resources to json, convert to RDF and load into Neptune if config.neptune is defined""" artifact_reader = ArtifactReader.from_artifact_path(config.artifact_path) artifact_writer = ArtifactWriter.from_artifact_path( artifact_path=config.artifact_path, scan_id=scan_id) logger = Logger() logger.info( AWSLogEvents.ScanConfigured, config=str(config), reader=str(artifact_reader.__class__), writer=str(artifact_writer.__class__), ) scan_manifest, graph_set = run_scan( muxer=muxer, config=config, artifact_writer=artifact_writer, artifact_reader=artifact_reader, ) json_path = scan_manifest.master_artifact rdf_path = artifact_writer.write_graph_set(name="master", graph_set=graph_set, compression=GZIP) graph_metadata = None if load_neptune: if config.neptune is None: raise Exception( "Can not load to Neptune because config.neptune is empty.") endpoint = NeptuneEndpoint(host=config.neptune.host, port=config.neptune.port, region=config.neptune.region) neptune_client = AltimeterNeptuneClient(max_age_min=1440, neptune_endpoint=endpoint) rdf_bucket, rdf_key = parse_s3_uri(rdf_path) if rdf_key is None: raise Exception(f"Invalid rdf s3 path {rdf_path}") graph_metadata = neptune_client.load_graph( bucket=rdf_bucket, key=rdf_key, load_iam_role_arn=str(config.neptune.iam_role_arn)) logger.info(event=LogEvent.GraphLoadedSNSNotificationStart) sns_client = boto3.client("sns") message_dict = { "uri": graph_metadata.uri, "name": graph_metadata.name, "version": graph_metadata.version, "start_time": graph_metadata.start_time, "end_time": graph_metadata.end_time, "neptune_endpoint": endpoint.get_endpoint_str(), } message_dict["default"] = json.dumps(message_dict) sns_client.publish( TopicArn=config.neptune.graph_load_sns_topic_arn, MessageStructure="json", Message=json.dumps(message_dict), ) logger.info(event=LogEvent.GraphLoadedSNSNotificationEnd) return AWS2NResult(json_path=json_path, rdf_path=rdf_path, graph_metadata=graph_metadata)