def _sync_one_account(neo4j_session, boto3_session, account_id, sync_tag, common_job_parameters): iam.sync(neo4j_session, boto3_session, account_id, sync_tag, common_job_parameters) s3.sync(neo4j_session, boto3_session, account_id, sync_tag, common_job_parameters) try: regions = ec2.get_ec2_regions(boto3_session) except botocore.exceptions.ClientError as e: logger.debug("Error occurred getting EC2 regions.", exc_info=True) logger.error( ("Failed to retrieve AWS region list, an error occurred: %s. Could not get regions for account %s." ), e, account_id, ) return dynamodb.sync(neo4j_session, boto3_session, regions, account_id, sync_tag, common_job_parameters) ec2.sync(neo4j_session, boto3_session, regions, account_id, sync_tag, common_job_parameters) rds.sync(neo4j_session, boto3_session, regions, account_id, sync_tag, common_job_parameters) # NOTE each of the below will generate DNS records route53.sync(neo4j_session, boto3_session, account_id, sync_tag) elasticsearch.sync(neo4j_session, boto3_session, account_id, sync_tag) # NOTE clean up all DNS records, regardless of which job created them run_cleanup_job('aws_account_dns_cleanup.json', neo4j_session, common_job_parameters)
def _sync_multiple_accounts(neo4j_session, accounts, sync_tag, common_job_parameters): logger.debug("Syncing AWS accounts: %s", ', '.join(accounts.values())) organizations.sync(neo4j_session, accounts, sync_tag, common_job_parameters) for profile_name, account_id in accounts.items(): logger.info( "Syncing AWS account with ID '%s' using configured profile '%s'.", account_id, profile_name) common_job_parameters["AWS_ID"] = account_id boto3_session = boto3.Session(profile_name=profile_name) _sync_one_account(neo4j_session, boto3_session, account_id, sync_tag, common_job_parameters) del common_job_parameters["AWS_ID"] # There may be orphan Principals which point outside of known AWS accounts. This job cleans # up those nodes after all AWS accounts have been synced. run_cleanup_job('aws_post_ingestion_principals_cleanup.json', neo4j_session, common_job_parameters) # There may be orphan DNS entries that point outside of known AWS zones. This job cleans # up those entries after all AWS accounts have been synced. run_cleanup_job('aws_post_ingestion_dns_cleanup.json', neo4j_session, common_job_parameters)
def _sync_multiple_accounts( neo4j_session: neo4j.Session, accounts: Dict[str, str], sync_tag: int, common_job_parameters: Dict[str, Any], aws_requested_syncs: List[str] = [], ) -> None: logger.debug("Syncing AWS accounts: %s", ', '.join(accounts.values())) organizations.sync(neo4j_session, accounts, sync_tag, common_job_parameters) for profile_name, account_id in accounts.items(): logger.info("Syncing AWS account with ID '%s' using configured profile '%s'.", account_id, profile_name) common_job_parameters["AWS_ID"] = account_id boto3_session = boto3.Session(profile_name=profile_name) _autodiscover_accounts(neo4j_session, boto3_session, account_id, sync_tag, common_job_parameters) _sync_one_account( neo4j_session, boto3_session, account_id, sync_tag, common_job_parameters, aws_requested_syncs=aws_requested_syncs, # Could be replaced later with per-account requested syncs ) del common_job_parameters["AWS_ID"] # There may be orphan Principals which point outside of known AWS accounts. This job cleans # up those nodes after all AWS accounts have been synced. run_cleanup_job('aws_post_ingestion_principals_cleanup.json', neo4j_session, common_job_parameters) # There may be orphan DNS entries that point outside of known AWS zones. This job cleans # up those entries after all AWS accounts have been synced. run_cleanup_job('aws_post_ingestion_dns_cleanup.json', neo4j_session, common_job_parameters)
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: run_cleanup_job( 'aws_import_volumes_cleanup.json', neo4j_session, common_job_parameters, )
def load_kms_key_details( neo4j_session: neo4j.Session, policy_alias_grants_data: List[Tuple[Any, Any, Any, Any]], region: str, aws_account_id: str, update_tag: int, ) -> None: """ Create dictionaries for all KMS key policies, aliases and grants so we can import them in a single query for each """ policies = [] aliases: List[str] = [] grants: List[str] = [] for key, policy, alias, grant in policy_alias_grants_data: parsed_policy = parse_policy(key, policy) if parsed_policy is not None: policies.append(parsed_policy) if len(alias) > 0: aliases.extend(alias) if len(grants) > 0: grants.extend(grant) # cleanup existing policy properties run_cleanup_job( 'aws_kms_details.json', neo4j_session, {'UPDATE_TAG': update_tag, 'AWS_ID': aws_account_id}, ) _load_kms_key_policies(neo4j_session, policies, update_tag) _load_kms_key_aliases(neo4j_session, aliases, update_tag) _load_kms_key_grants(neo4j_session, grants, update_tag) _set_default_values(neo4j_session, aws_account_id)
def _sync_one_account(session, boto3_session, account_id, regions, sync_tag, common_job_parameters): # IAM iam.sync(session, boto3_session, account_id, sync_tag, common_job_parameters) # S3 s3.sync(session, boto3_session, account_id, sync_tag, common_job_parameters) # Dynamo dynamodb.sync_dynamodb_tables(session, boto3_session, regions, account_id, sync_tag, common_job_parameters) # EC2 # TODO move this to EC2 module logger.info("Syncing EC2 for account '%s'.", account_id) ec2.sync_vpc(session, boto3_session, regions, account_id, sync_tag, common_job_parameters) ec2.sync_ec2_security_groupinfo(session, boto3_session, regions, account_id, sync_tag, common_job_parameters) ec2.sync_ec2_instances(session, boto3_session, regions, account_id, sync_tag, common_job_parameters) ec2.sync_ec2_auto_scaling_groups(session, boto3_session, regions, account_id, sync_tag, common_job_parameters) ec2.sync_load_balancers(session, boto3_session, regions, account_id, sync_tag, common_job_parameters) ec2.sync_vpc_peering(session, boto3_session, regions, sync_tag, account_id, common_job_parameters) # RDS rds.sync_rds_instances(session, boto3_session, regions, account_id, sync_tag, common_job_parameters) # NOTE each of the below will generate DNS records # Route53 route53.sync_route53(session, boto3_session, account_id, sync_tag) # Elasticsearch elasticsearch.sync(session, boto3_session, account_id, sync_tag) # NOTE clean up all DNS records, regardless of which job created them run_cleanup_job('aws_account_dns_cleanup.json', session, common_job_parameters)
def cleanup_rds_instances_and_db_subnet_groups(neo4j_session, common_job_parameters): """ Remove RDS graph nodes and DBSubnetGroups that were created from other ingestion runs """ run_cleanup_job('aws_import_rds_instances_cleanup.json', neo4j_session, common_job_parameters)
def test_run_cleanup_job_on_nodes(mock_read_text: mock.MagicMock, neo4j_session): # Arrange: we are now at time T3, and node id1 exists but node id2 no longer exists neo4j_session.run( """ MATCH (a:TypeA{id:"id1"}) SET a.lastupdated={UPDATE_TAG_T3} """, UPDATE_TAG_T3=UPDATE_TAG_T3, ) # Act: delete all nodes and rels where `lastupdated` != UPDATE_TAG_T3 job_parameters = {'UPDATE_TAG': UPDATE_TAG_T3} run_cleanup_job(SAMPLE_JOB_FILENAME, neo4j_session, job_parameters) # Assert: Node id1 is the only node that still exists nodes = neo4j_session.run( """ MATCH (n) RETURN n.id, n.lastupdated """, ) actual_nodes = {(n['n.id'], n['n.lastupdated']) for n in nodes} expected_nodes = { ('id1', UPDATE_TAG_T3), } assert actual_nodes == expected_nodes mock_read_text.assert_called_once()
def load_s3_details( neo4j_session: neo4j.Session, s3_details_iter: Generator[Any, Any, Any], aws_account_id: str, update_tag: int, ) -> None: """ Create dictionaries for all bucket ACLs and all bucket policies so we can import them in a single query for each """ acls: List[Dict] = [] policies: List[Dict] = [] encryption_configs: List[Dict] = [] for bucket, acl, policy, encryption in s3_details_iter: parsed_acls = parse_acl(acl, bucket, aws_account_id) if parsed_acls is not None: acls.extend(parsed_acls) parsed_policy = parse_policy(bucket, policy) if parsed_policy is not None: policies.append(parsed_policy) parsed_encryption = parse_encryption(bucket, encryption) if parsed_encryption is not None: encryption_configs.append(parsed_encryption) # cleanup existing policy properties set on S3 Buckets run_cleanup_job( 'aws_s3_details.json', neo4j_session, {'UPDATE_TAG': update_tag, 'AWS_ID': aws_account_id}, ) _load_s3_acls(neo4j_session, acls, aws_account_id, update_tag) _load_s3_policies(neo4j_session, policies, update_tag) _load_s3_encryption(neo4j_session, encryption_configs, update_tag) _set_default_values(neo4j_session, aws_account_id)
def cleanup_reserved_instances(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: run_cleanup_job( 'aws_import_reserved_instances_cleanup.json', neo4j_session, common_job_parameters, )
def sync_role_policies(neo4j_session, boto3_session, current_aws_account_id, aws_update_tag, common_job_parameters): logger.debug("Syncing IAM role policies for account '%s'.", current_aws_account_id) query = """ MATCH (role:AWSRole)<-[:AWS_ROLE]-(AWSAccount{id: {AWS_ACCOUNT_ID}}) WHERE exists(role.name) RETURN role.name AS name, role.arn AS arn; """ roles = neo4j_session.run(query, AWS_ACCOUNT_ID=current_aws_account_id) roles_policies = {} for role in roles: role_arn = role["arn"] role_name = role["name"] roles_policies[role_arn] = {} for policy_name in get_role_policies(boto3_session, role_name)['PolicyNames']: roles_policies[role_arn][policy_name] = get_role_policy_info( boto3_session, role_name, policy_name) load_role_policies(neo4j_session, roles_policies, aws_update_tag) run_cleanup_job( 'aws_import_roles_policy_cleanup.json', neo4j_session, common_job_parameters, )
def start_extension_ingestion(session, config): """ If this module is configured, perform ingestion of CRXcavator data. Otherwise warn and exit :param session: Neo4J session for database interface :param config: A cartography.config object :return: None """ if not CRXCAVATOR_API_BASE_URL or not CRXCAVATOR_API_KEY: logger.warning( 'CRXcavator import is not configured - skipping this module. See docs to configure.' ) return common_job_parameters = { "UPDATE_TAG": config.update_tag, } # while we typically want to crash sync on failure of module, # the crxcavator API is still in beta and is not always available. # if we receive a requests exception from raise_for_status # we'll handle and continue with other modules, otherwise crash sync try: sync_extensions(session, common_job_parameters, CRXCAVATOR_API_KEY, CRXCAVATOR_API_BASE_URL) run_cleanup_job( 'crxcavator_import_cleanup.json', session, common_job_parameters, ) except exceptions.RequestException as e: logger.error("Could not complete request to the CRXcavator API: {}", e)
def load_s3_details(neo4j_session, s3_details_iter, aws_account_id, update_tag): """ Create dictionaries for all bucket ACLs and all bucket policies so we can import them in a single query for each """ acls = [] policies = [] for bucket, acl, policy in s3_details_iter: if acl is None: continue parsed_acls = parse_acl(acl, bucket, aws_account_id) if parsed_acls: acls.extend(parsed_acls) else: continue parsed_policy = parse_policy(bucket, policy) if parsed_policy is not None: policies.append(parsed_policy) # cleanup existing policy properties set on S3 Buckets run_cleanup_job( 'aws_s3_details.json', neo4j_session, { 'UPDATE_TAG': update_tag, 'AWS_ID': aws_account_id }, ) _load_s3_acls(neo4j_session, acls, aws_account_id, update_tag) _load_s3_policies(neo4j_session, policies, update_tag) _set_default_values(neo4j_session, aws_account_id)
def cleanup_rds_clusters(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: """ Remove RDS cluster graph nodes """ run_cleanup_job('aws_import_rds_clusters_cleanup.json', neo4j_session, common_job_parameters)
def sync_roles(neo4j_session, boto3_session, current_aws_account_id, aws_update_tag, common_job_parameters): logger.debug("Syncing IAM roles for account '%s'.", current_aws_account_id) data = get_role_list_data(boto3_session) load_roles(neo4j_session, data['Roles'], current_aws_account_id, aws_update_tag) run_cleanup_job('aws_import_roles_cleanup.json', neo4j_session, common_job_parameters)
def cleanup_gcp_subnets(neo4j_session, common_job_parameters): """ Delete out-of-date GCP VPC subnet nodes and relationships :param neo4j_session: The Neo4j session :param common_job_parameters: dict of other job parameters to pass to Neo4j :return: Nothing """ run_cleanup_job('gcp_compute_vpc_subnet_cleanup.json', neo4j_session, common_job_parameters)
def cleanup_gcp_projects(neo4j_session, common_job_parameters): """ Remove stale GCP projects and their relationships :param neo4j_session: The Neo4j session :param common_job_parameters: Parameters to carry to the cleanup job :return: Nothing """ run_cleanup_job('gcp_crm_project_cleanup.json', neo4j_session, common_job_parameters)
def sync(neo4j_session, common_job_parameters, github_api_key, github_url, organization): logger.info("Syncing GitHub users") user_data, org_data = get(github_api_key, github_url, organization) load_organization_users(neo4j_session, user_data, org_data, common_job_parameters['UPDATE_TAG']) run_cleanup_job('github_users_cleanup.json', neo4j_session, common_job_parameters)
def cleanup_gcp_firewall_rules(neo4j_session, common_job_parameters): """ Delete out of date GCP firewalls and their relationships :param neo4j_session: The Neo4j session :param common_job_parameters: dict of other job parameters to pass to Neo4j :return: Nothing """ run_cleanup_job('gcp_compute_firewall_cleanup.json', neo4j_session, common_job_parameters)
def cleanup_route53(session, current_aws_id, update_tag): run_cleanup_job( 'aws_dns_cleanup.json', session, { 'UPDATE_TAG': update_tag, 'AWS_ID': current_aws_id }, )
def cleanup_gcp_folders(session, common_job_parameters): """ Remove stale GCP folders and their relationships :param session: The Neo4j session :param common_job_parameters: Parameters to carry to the cleanup job :return: Nothing """ run_cleanup_job('gcp_crm_folder_cleanup.json', session, common_job_parameters)
def cleanup_droplets(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None: """ Delete out-of-date DigitalOcean droplets and relationships :param neo4j_session: The Neo4j session :param common_job_parameters: dict of other job parameters to pass to Neo4j :return: Nothing """ run_cleanup_job('digitalocean_droplet_cleanup.json', neo4j_session, common_job_parameters) return
def cleanup_gcp_forwarding_rules(neo4j_session, common_job_parameters): """ Delete out-of-date GCP forwarding rules and relationships :param neo4j_session: The Neo4j session :param common_job_parameters: dict of other job parameters to pass to Neo4j :return: Nothing """ run_cleanup_job('gcp_compute_forwarding_rules_cleanup.json', neo4j_session, common_job_parameters)
def _cleanup_okta_organizations(session, common_job_parameters): """ Remove stale Okta organization :param session: The Neo4j session :param common_job_parameters: Parameters to carry to the cleanup job :return: Nothing """ run_cleanup_job('okta_import_cleanup.json', session, common_job_parameters)
def cleanup(neo4j_session, update_tag, aws_account_id): run_cleanup_job( 'aws_import_es_cleanup.json', neo4j_session, { 'UPDATE_TAG': update_tag, 'AWS_ID': aws_account_id }, )
def cleanup_gcp_instances(session, common_job_parameters): """ Delete out-of-date GCP instance nodes and relationships :param session: The Neo4j session :param common_job_parameters: dict of other job parameters to pass to Neo4j :return: Nothing """ run_cleanup_job('gcp_compute_instance_cleanup.json', session, common_job_parameters)
def cleanup_route53(neo4j_session: neo4j.Session, current_aws_id: str, update_tag: int) -> None: run_cleanup_job( 'aws_dns_cleanup.json', neo4j_session, { 'UPDATE_TAG': update_tag, 'AWS_ID': current_aws_id }, )
def sync_groups(neo4j_session, boto3_session, current_aws_account_id, aws_update_tag, common_job_parameters): logger.debug("Syncing IAM groups for account '%s'.", current_aws_account_id) data = get_group_list_data(boto3_session) load_groups(neo4j_session, data['Groups'], current_aws_account_id, aws_update_tag) sync_groups_inline_policies(boto3_session, data, neo4j_session, aws_update_tag) sync_group_managed_policies(boto3_session, data, neo4j_session, aws_update_tag) run_cleanup_job('aws_import_groups_cleanup.json', neo4j_session, common_job_parameters)
def cleanup(neo4j_session: neo4j.Session, update_tag: int, aws_account_id: int) -> None: run_cleanup_job( 'aws_import_es_cleanup.json', neo4j_session, { 'UPDATE_TAG': update_tag, 'AWS_ID': aws_account_id }, )
def cleanup(neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int) -> None: run_cleanup_job( 'aws_import_elasticache_cleanup.json', neo4j_session, { 'UPDATE_TAG': update_tag, 'AWS_ID': current_aws_account_id }, )