def sync( neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str, update_tag: int, common_job_parameters: Dict, ) -> None: logger.info("Syncing S3 for account '%s'.", current_aws_account_id) bucket_data = get_s3_bucket_list(boto3_session) load_s3_buckets(neo4j_session, bucket_data, current_aws_account_id, update_tag) cleanup_s3_buckets(neo4j_session, common_job_parameters) acl_and_policy_data_iter = get_s3_bucket_details(boto3_session, bucket_data) load_s3_details(neo4j_session, acl_and_policy_data_iter, current_aws_account_id, update_tag) cleanup_s3_bucket_acl_and_policy(neo4j_session, common_job_parameters) merge_module_sync_metadata( neo4j_session, group_type='AWSAccount', group_id=current_aws_account_id, synced_type='S3Bucket', update_tag=update_tag, stat_handler=stat_handler, )
def sync( neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str, update_tag: int, common_job_parameters: Dict, ) -> None: logger.info("Syncing IAM for account '%s'.", current_aws_account_id) # This module only syncs IAM information that is in use. # As such only policies that are attached to a user, role or group are synced sync_users(neo4j_session, boto3_session, current_aws_account_id, update_tag, common_job_parameters) sync_groups(neo4j_session, boto3_session, current_aws_account_id, update_tag, common_job_parameters) sync_roles(neo4j_session, boto3_session, current_aws_account_id, update_tag, common_job_parameters) sync_group_memberships(neo4j_session, boto3_session, current_aws_account_id, update_tag, common_job_parameters) sync_assumerole_relationships(neo4j_session, current_aws_account_id, update_tag, common_job_parameters) sync_user_access_keys(neo4j_session, boto3_session, current_aws_account_id, update_tag, common_job_parameters) run_cleanup_job('aws_import_principals_cleanup.json', neo4j_session, common_job_parameters) merge_module_sync_metadata( neo4j_session, group_type='AWSAccount', group_id=current_aws_account_id, synced_type='AWSPrincipal', update_tag=update_tag, stat_handler=stat_handler, )
def sync( neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str, update_tag: int, common_job_parameters: Dict, ) -> None: sync_rds_clusters( neo4j_session, boto3_session, regions, current_aws_account_id, update_tag, common_job_parameters, ) sync_rds_instances( neo4j_session, boto3_session, regions, current_aws_account_id, update_tag, common_job_parameters, ) merge_module_sync_metadata( neo4j_session, group_type='AWSAccount', group_id=current_aws_account_id, synced_type='RDSCluster', update_tag=update_tag, stat_handler=stat_handler, )
def sync_namespaces(session: Session, client: K8sClient, update_tag: int) -> Dict: cluster, namespaces = get_namespaces(client) load_namespaces(session, cluster, namespaces, update_tag) merge_module_sync_metadata( session, group_type='KubernetesCluster', group_id=cluster['uid'], synced_type='KubernetesCluster', update_tag=update_tag, stat_handler=stat_handler, ) return cluster
def start_crowdstrike_ingestion( neo4j_session: neo4j.Session, config: Config, ) -> None: """ Perform ingestion of crowdstrike data. :param neo4j_session: Neo4J session for database interface :param config: A cartography.config object :return: None """ common_job_parameters = { "UPDATE_TAG": config.update_tag, } if (not config.crowdstrike_client_id or not config.crowdstrike_client_secret): logger.error("crowdstrike config not found") return authorization = get_authorization( config.crowdstrike_client_id, config.crowdstrike_client_secret, config.crowdstrike_api_url, ) sync_hosts( neo4j_session, config.update_tag, authorization, ) sync_vulnerabilities( neo4j_session, config.update_tag, authorization, ) run_cleanup_job( "crowdstrike_import_cleanup.json", neo4j_session, common_job_parameters, ) group_id = "public" if config.crowdstrike_api_url: group_id = config.crowdstrike_api_url merge_module_sync_metadata( neo4j_session, group_type='crowdstrike', group_id=group_id, synced_type='crowdstrike', update_tag=config.update_tag, stat_handler=stat_handler, )
def test_merge_module_sync_metadata(mock_stat_incr, neo4j_session): # Arrange group_type = 'AWSAccount' group_id = TEST_ACCOUNT_ID synced_type = 'S3Bucket' stat_handler = get_stats_client(__name__) expected_nodes = { ( f'AWSAccount_{TEST_ACCOUNT_ID}_S3Bucket', 'AWSAccount', TEST_ACCOUNT_ID, 'S3Bucket', TEST_UPDATE_TAG, ), } # Act merge_module_sync_metadata( neo4j_session=neo4j_session, group_type=group_type, group_id=group_id, synced_type=synced_type, update_tag=TEST_UPDATE_TAG, stat_handler=stat_handler, ) # Assert nodes = neo4j_session.run(f""" MATCH (m:ModuleSyncMetadata{{id:'AWSAccount_{TEST_ACCOUNT_ID}_S3Bucket'}}) RETURN m.id, m.syncedtype, m.grouptype, m.groupid, m.lastupdated """) # Assert actual_nodes = {( n['m.id'], n['m.grouptype'], n['m.groupid'], n['m.syncedtype'], n['m.lastupdated'], ) for n in nodes} assert actual_nodes == expected_nodes mock_stat_incr.assert_called_once_with( f'{group_type}_{group_id}_{synced_type}_lastupdated', TEST_UPDATE_TAG, )
def sync( neo4j_session: neo4j.Session, common_job_parameters: Dict, github_api_key: str, github_url: str, organization: str, ) -> None: logger.info("Syncing GitHub users") user_data, org_data = get(github_api_key, github_url, organization) load_organization_users(neo4j_session, user_data, org_data, common_job_parameters['UPDATE_TAG']) run_cleanup_job('github_users_cleanup.json', neo4j_session, common_job_parameters) merge_module_sync_metadata( neo4j_session, group_type='GitHubOrganization', group_id=org_data['url'], synced_type='GitHubOrganization', update_tag=common_job_parameters['UPDATE_TAG'], stat_handler=stat_handler, )
def sync( neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str, update_tag: int, common_job_parameters: Dict, ) -> None: for region in regions: logger.info( f"Syncing ElastiCache clusters for region '{region}' in account {current_aws_account_id}" ) clusters = get_elasticache_clusters(boto3_session, region) load_elasticache_clusters(neo4j_session, clusters, region, current_aws_account_id, update_tag) cleanup(neo4j_session, current_aws_account_id, update_tag) merge_module_sync_metadata( neo4j_session, group_type='AWSAccount', group_id=current_aws_account_id, synced_type='ElasticacheCluster', update_tag=update_tag, stat_handler=stat_handler, )
def start_pagerduty_ingestion( neo4j_session: neo4j.Session, config: Config, ) -> None: """ Perform ingestion of pagerduty data. :param neo4j_session: Neo4J session for database interface :param config: A cartography.config object :return: None """ common_job_parameters = { "UPDATE_TAG": config.update_tag, } if not config.pagerduty_api_key: logger.info('PagerDuty import is not configured - skipping this module. See docs to configure.') return session = APISession(config.pagerduty_api_key) sync_users(neo4j_session, config.update_tag, session) sync_teams(neo4j_session, config.update_tag, session) sync_vendors(neo4j_session, config.update_tag, session) sync_services(neo4j_session, config.update_tag, session) sync_schedules(neo4j_session, config.update_tag, session) sync_escalation_policies(neo4j_session, config.update_tag, session) run_cleanup_job( "pagerduty_import_cleanup.json", neo4j_session, common_job_parameters, ) merge_module_sync_metadata( neo4j_session, group_type='pagerduty', group_id='module', synced_type="pagerduty", update_tag=config.update_tag, stat_handler=stat_handler, )
def _sync_one_account( neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, current_aws_account_id: str, update_tag: int, common_job_parameters: Dict[str, Any], regions: List[str] = [], aws_requested_syncs: Iterable[str] = RESOURCE_FUNCTIONS.keys(), ) -> None: if not regions: regions = _autodiscover_account_regions(boto3_session, current_aws_account_id) sync_args = _build_aws_sync_kwargs( neo4j_session, boto3_session, regions, current_aws_account_id, update_tag, common_job_parameters, ) for func_name in aws_requested_syncs: if func_name in RESOURCE_FUNCTIONS: # Skip permission relationships and tags for now because they rely on data already being in the graph if func_name not in [ 'permission_relationships', 'resourcegroupstaggingapi' ]: RESOURCE_FUNCTIONS[func_name](**sync_args) else: continue else: raise ValueError( f'AWS sync function "{func_name}" was specified but does not exist. Did you misspell it?' ) # NOTE clean up all DNS records, regardless of which job created them run_cleanup_job('aws_account_dns_cleanup.json', neo4j_session, common_job_parameters) # MAP IAM permissions if 'permission_relationships' in aws_requested_syncs: RESOURCE_FUNCTIONS['permission_relationships'](**sync_args) # AWS Tags - Must always be last. if 'resourcegroupstaggingapi' in aws_requested_syncs: RESOURCE_FUNCTIONS['resourcegroupstaggingapi'](**sync_args) run_analysis_job( 'aws_ec2_iaminstanceprofile.json', neo4j_session, common_job_parameters, ) run_analysis_job( 'aws_lambda_ecr.json', neo4j_session, common_job_parameters, ) merge_module_sync_metadata( neo4j_session, group_type='AWSAccount', group_id=current_aws_account_id, synced_type='AWSAccount', update_tag=update_tag, stat_handler=stat_handler, )
def start_okta_ingestion(neo4j_session: neo4j.Session, config: Config) -> None: """ Starts the OKTA ingestion process :param neo4j_session: The Neo4j session :param config: A `cartography.config` object :return: Nothing """ if not config.okta_api_key: logger.warning( "No valid Okta credentials could be found. Exiting Okta sync stage.", ) return logger.debug(f"Starting Okta sync on {config.okta_org_id}") common_job_parameters = { "UPDATE_TAG": config.update_tag, "OKTA_ORG_ID": config.okta_org_id, } state = OktaSyncState() organization.create_okta_organization(neo4j_session, config.okta_org_id, config.update_tag) users.sync_okta_users(neo4j_session, config.okta_org_id, config.update_tag, config.okta_api_key, state) groups.sync_okta_groups(neo4j_session, config.okta_org_id, config.update_tag, config.okta_api_key, state) applications.sync_okta_applications(neo4j_session, config.okta_org_id, config.update_tag, config.okta_api_key) factors.sync_users_factors(neo4j_session, config.okta_org_id, config.update_tag, config.okta_api_key, state) origins.sync_trusted_origins(neo4j_session, config.okta_org_id, config.update_tag, config.okta_api_key) awssaml.sync_okta_aws_saml(neo4j_session, config.okta_saml_role_regex, config.update_tag) # need creds with permission # soft fail as some won't be able to get such high priv token # when we get the E0000006 error # see https://developer.okta.com/docs/reference/error-codes/ try: roles.sync_roles(neo4j_session, config.okta_org_id, config.update_tag, config.okta_api_key, state) except OktaError as okta_error: logger.warning(f"Unable to pull admin roles got {okta_error}") # Getting roles requires super admin which most won't be able to get easily if okta_error.error_code == "E0000006": logger.warning( "Unable to sync admin roles - api token needs admin rights to pull admin roles data" ) _cleanup_okta_organizations(neo4j_session, common_job_parameters) merge_module_sync_metadata( neo4j_session, group_type='OktaOrganization', group_id=config.okta_org_id, synced_type='OktaOrganization', update_tag=config.update_tag, stat_handler=stat_handler, )