Example #1
0
def _sync_one_account(neo4j_session, boto3_session, account_id, sync_tag,
                      common_job_parameters):
    iam.sync(neo4j_session, boto3_session, account_id, sync_tag,
             common_job_parameters)
    s3.sync(neo4j_session, boto3_session, account_id, sync_tag,
            common_job_parameters)

    try:
        regions = ec2.get_ec2_regions(boto3_session)
    except botocore.exceptions.ClientError as e:
        logger.debug("Error occurred getting EC2 regions.", exc_info=True)
        logger.error(
            ("Failed to retrieve AWS region list, an error occurred: %s. Could not get regions for account %s."
             ),
            e,
            account_id,
        )
        return

    dynamodb.sync(neo4j_session, boto3_session, regions, account_id, sync_tag,
                  common_job_parameters)
    ec2.sync(neo4j_session, boto3_session, regions, account_id, sync_tag,
             common_job_parameters)
    rds.sync(neo4j_session, boto3_session, regions, account_id, sync_tag,
             common_job_parameters)

    # NOTE each of the below will generate DNS records
    route53.sync(neo4j_session, boto3_session, account_id, sync_tag)
    elasticsearch.sync(neo4j_session, boto3_session, account_id, sync_tag)

    # NOTE clean up all DNS records, regardless of which job created them
    run_cleanup_job('aws_account_dns_cleanup.json', neo4j_session,
                    common_job_parameters)
Example #2
0
def _sync_multiple_accounts(neo4j_session, accounts, sync_tag,
                            common_job_parameters):
    logger.debug("Syncing AWS accounts: %s", ', '.join(accounts.values()))
    organizations.sync(neo4j_session, accounts, sync_tag,
                       common_job_parameters)

    for profile_name, account_id in accounts.items():
        logger.info(
            "Syncing AWS account with ID '%s' using configured profile '%s'.",
            account_id, profile_name)
        common_job_parameters["AWS_ID"] = account_id
        boto3_session = boto3.Session(profile_name=profile_name)

        _sync_one_account(neo4j_session, boto3_session, account_id, sync_tag,
                          common_job_parameters)

    del common_job_parameters["AWS_ID"]

    # There may be orphan Principals which point outside of known AWS accounts. This job cleans
    # up those nodes after all AWS accounts have been synced.
    run_cleanup_job('aws_post_ingestion_principals_cleanup.json',
                    neo4j_session, common_job_parameters)
    # There may be orphan DNS entries that point outside of known AWS zones. This job cleans
    # up those entries after all AWS accounts have been synced.
    run_cleanup_job('aws_post_ingestion_dns_cleanup.json', neo4j_session,
                    common_job_parameters)
Example #3
0
def _sync_multiple_accounts(
    neo4j_session: neo4j.Session,
    accounts: Dict[str, str],
    sync_tag: int,
    common_job_parameters: Dict[str, Any],
    aws_requested_syncs: List[str] = [],
) -> None:
    logger.debug("Syncing AWS accounts: %s", ', '.join(accounts.values()))
    organizations.sync(neo4j_session, accounts, sync_tag, common_job_parameters)

    for profile_name, account_id in accounts.items():
        logger.info("Syncing AWS account with ID '%s' using configured profile '%s'.", account_id, profile_name)
        common_job_parameters["AWS_ID"] = account_id
        boto3_session = boto3.Session(profile_name=profile_name)

        _autodiscover_accounts(neo4j_session, boto3_session, account_id, sync_tag, common_job_parameters)

        _sync_one_account(
            neo4j_session,
            boto3_session,
            account_id,
            sync_tag,
            common_job_parameters,
            aws_requested_syncs=aws_requested_syncs,  # Could be replaced later with per-account requested syncs
        )

    del common_job_parameters["AWS_ID"]

    # There may be orphan Principals which point outside of known AWS accounts. This job cleans
    # up those nodes after all AWS accounts have been synced.
    run_cleanup_job('aws_post_ingestion_principals_cleanup.json', neo4j_session, common_job_parameters)

    # There may be orphan DNS entries that point outside of known AWS zones. This job cleans
    # up those entries after all AWS accounts have been synced.
    run_cleanup_job('aws_post_ingestion_dns_cleanup.json', neo4j_session, common_job_parameters)
Example #4
0
def cleanup_volumes(neo4j_session: neo4j.Session,
                    common_job_parameters: Dict) -> None:
    run_cleanup_job(
        'aws_import_volumes_cleanup.json',
        neo4j_session,
        common_job_parameters,
    )
Example #5
0
def load_kms_key_details(
        neo4j_session: neo4j.Session, policy_alias_grants_data: List[Tuple[Any, Any, Any, Any]], region: str,
        aws_account_id: str, update_tag: int,
) -> None:
    """
    Create dictionaries for all KMS key policies, aliases and grants so we can import them in a single query for each
    """
    policies = []
    aliases: List[str] = []
    grants: List[str] = []
    for key, policy, alias, grant in policy_alias_grants_data:
        parsed_policy = parse_policy(key, policy)
        if parsed_policy is not None:
            policies.append(parsed_policy)
        if len(alias) > 0:
            aliases.extend(alias)
        if len(grants) > 0:
            grants.extend(grant)

    # cleanup existing policy properties
    run_cleanup_job(
        'aws_kms_details.json',
        neo4j_session,
        {'UPDATE_TAG': update_tag, 'AWS_ID': aws_account_id},
    )

    _load_kms_key_policies(neo4j_session, policies, update_tag)
    _load_kms_key_aliases(neo4j_session, aliases, update_tag)
    _load_kms_key_grants(neo4j_session, grants, update_tag)
    _set_default_values(neo4j_session, aws_account_id)
Example #6
0
def _sync_one_account(session, boto3_session, account_id, regions, sync_tag, common_job_parameters):
    # IAM
    iam.sync(session, boto3_session, account_id, sync_tag, common_job_parameters)

    # S3
    s3.sync(session, boto3_session, account_id, sync_tag, common_job_parameters)

    # Dynamo
    dynamodb.sync_dynamodb_tables(session, boto3_session, regions, account_id, sync_tag, common_job_parameters)

    # EC2
    # TODO move this to EC2 module
    logger.info("Syncing EC2 for account '%s'.", account_id)
    ec2.sync_vpc(session, boto3_session, regions, account_id, sync_tag, common_job_parameters)
    ec2.sync_ec2_security_groupinfo(session, boto3_session, regions, account_id, sync_tag, common_job_parameters)
    ec2.sync_ec2_instances(session, boto3_session, regions, account_id, sync_tag, common_job_parameters)
    ec2.sync_ec2_auto_scaling_groups(session, boto3_session, regions, account_id, sync_tag, common_job_parameters)
    ec2.sync_load_balancers(session, boto3_session, regions, account_id, sync_tag, common_job_parameters)
    ec2.sync_vpc_peering(session, boto3_session, regions, sync_tag, account_id, common_job_parameters)

    # RDS
    rds.sync_rds_instances(session, boto3_session, regions, account_id, sync_tag, common_job_parameters)

    # NOTE each of the below will generate DNS records
    # Route53
    route53.sync_route53(session, boto3_session, account_id, sync_tag)

    # Elasticsearch
    elasticsearch.sync(session, boto3_session, account_id, sync_tag)

    # NOTE clean up all DNS records, regardless of which job created them
    run_cleanup_job('aws_account_dns_cleanup.json', session, common_job_parameters)
Example #7
0
def cleanup_rds_instances_and_db_subnet_groups(neo4j_session,
                                               common_job_parameters):
    """
    Remove RDS graph nodes and DBSubnetGroups that were created from other ingestion runs
    """
    run_cleanup_job('aws_import_rds_instances_cleanup.json', neo4j_session,
                    common_job_parameters)
Example #8
0
def test_run_cleanup_job_on_nodes(mock_read_text: mock.MagicMock,
                                  neo4j_session):
    # Arrange: we are now at time T3, and node id1 exists but node id2 no longer exists
    neo4j_session.run(
        """
        MATCH (a:TypeA{id:"id1"}) SET a.lastupdated={UPDATE_TAG_T3}
        """,
        UPDATE_TAG_T3=UPDATE_TAG_T3,
    )

    # Act: delete all nodes and rels where `lastupdated` != UPDATE_TAG_T3
    job_parameters = {'UPDATE_TAG': UPDATE_TAG_T3}
    run_cleanup_job(SAMPLE_JOB_FILENAME, neo4j_session, job_parameters)

    # Assert: Node id1 is the only node that still exists
    nodes = neo4j_session.run(
        """
        MATCH (n) RETURN n.id, n.lastupdated
        """, )
    actual_nodes = {(n['n.id'], n['n.lastupdated']) for n in nodes}
    expected_nodes = {
        ('id1', UPDATE_TAG_T3),
    }
    assert actual_nodes == expected_nodes
    mock_read_text.assert_called_once()
Example #9
0
def load_s3_details(
    neo4j_session: neo4j.Session, s3_details_iter: Generator[Any, Any, Any], aws_account_id: str,
    update_tag: int,
) -> None:
    """
    Create dictionaries for all bucket ACLs and all bucket policies so we can import them in a single query for each
    """
    acls: List[Dict] = []
    policies: List[Dict] = []
    encryption_configs: List[Dict] = []
    for bucket, acl, policy, encryption in s3_details_iter:
        parsed_acls = parse_acl(acl, bucket, aws_account_id)
        if parsed_acls is not None:
            acls.extend(parsed_acls)
        parsed_policy = parse_policy(bucket, policy)
        if parsed_policy is not None:
            policies.append(parsed_policy)
        parsed_encryption = parse_encryption(bucket, encryption)
        if parsed_encryption is not None:
            encryption_configs.append(parsed_encryption)

    # cleanup existing policy properties set on S3 Buckets
    run_cleanup_job(
        'aws_s3_details.json',
        neo4j_session,
        {'UPDATE_TAG': update_tag, 'AWS_ID': aws_account_id},
    )

    _load_s3_acls(neo4j_session, acls, aws_account_id, update_tag)
    _load_s3_policies(neo4j_session, policies, update_tag)
    _load_s3_encryption(neo4j_session, encryption_configs, update_tag)
    _set_default_values(neo4j_session, aws_account_id)
Example #10
0
def cleanup_reserved_instances(neo4j_session: neo4j.Session,
                               common_job_parameters: Dict) -> None:
    run_cleanup_job(
        'aws_import_reserved_instances_cleanup.json',
        neo4j_session,
        common_job_parameters,
    )
Example #11
0
def sync_role_policies(neo4j_session, boto3_session, current_aws_account_id,
                       aws_update_tag, common_job_parameters):
    logger.debug("Syncing IAM role policies for account '%s'.",
                 current_aws_account_id)
    query = """
    MATCH (role:AWSRole)<-[:AWS_ROLE]-(AWSAccount{id: {AWS_ACCOUNT_ID}})
    WHERE exists(role.name)
    RETURN role.name AS name, role.arn AS arn;
    """
    roles = neo4j_session.run(query, AWS_ACCOUNT_ID=current_aws_account_id)
    roles_policies = {}
    for role in roles:
        role_arn = role["arn"]
        role_name = role["name"]
        roles_policies[role_arn] = {}
        for policy_name in get_role_policies(boto3_session,
                                             role_name)['PolicyNames']:
            roles_policies[role_arn][policy_name] = get_role_policy_info(
                boto3_session, role_name, policy_name)
    load_role_policies(neo4j_session, roles_policies, aws_update_tag)
    run_cleanup_job(
        'aws_import_roles_policy_cleanup.json',
        neo4j_session,
        common_job_parameters,
    )
Example #12
0
def start_extension_ingestion(session, config):
    """
    If this module is configured, perform ingestion of CRXcavator data. Otherwise warn and exit
    :param session: Neo4J session for database interface
    :param config: A cartography.config object
    :return: None
    """
    if not CRXCAVATOR_API_BASE_URL or not CRXCAVATOR_API_KEY:
        logger.warning(
            'CRXcavator import is not configured - skipping this module. See docs to configure.'
        )
        return

    common_job_parameters = {
        "UPDATE_TAG": config.update_tag,
    }
    # while we typically want to crash sync on failure of module,
    # the crxcavator API is still in beta and is not always available.
    # if we receive a requests exception from raise_for_status
    # we'll handle and continue with other modules, otherwise crash sync
    try:
        sync_extensions(session, common_job_parameters, CRXCAVATOR_API_KEY,
                        CRXCAVATOR_API_BASE_URL)
        run_cleanup_job(
            'crxcavator_import_cleanup.json',
            session,
            common_job_parameters,
        )
    except exceptions.RequestException as e:
        logger.error("Could not complete request to the CRXcavator API: {}", e)
Example #13
0
def load_s3_details(neo4j_session, s3_details_iter, aws_account_id,
                    update_tag):
    """
    Create dictionaries for all bucket ACLs and all bucket policies so we can import them in a single query for each
    """
    acls = []
    policies = []
    for bucket, acl, policy in s3_details_iter:
        if acl is None:
            continue
        parsed_acls = parse_acl(acl, bucket, aws_account_id)
        if parsed_acls:
            acls.extend(parsed_acls)
        else:
            continue
        parsed_policy = parse_policy(bucket, policy)
        if parsed_policy is not None:
            policies.append(parsed_policy)

    # cleanup existing policy properties set on S3 Buckets
    run_cleanup_job(
        'aws_s3_details.json',
        neo4j_session,
        {
            'UPDATE_TAG': update_tag,
            'AWS_ID': aws_account_id
        },
    )

    _load_s3_acls(neo4j_session, acls, aws_account_id, update_tag)
    _load_s3_policies(neo4j_session, policies, update_tag)
    _set_default_values(neo4j_session, aws_account_id)
Example #14
0
def cleanup_rds_clusters(neo4j_session: neo4j.Session,
                         common_job_parameters: Dict) -> None:
    """
    Remove RDS cluster graph nodes
    """
    run_cleanup_job('aws_import_rds_clusters_cleanup.json', neo4j_session,
                    common_job_parameters)
Example #15
0
def sync_roles(neo4j_session, boto3_session, current_aws_account_id,
               aws_update_tag, common_job_parameters):
    logger.debug("Syncing IAM roles for account '%s'.", current_aws_account_id)
    data = get_role_list_data(boto3_session)
    load_roles(neo4j_session, data['Roles'], current_aws_account_id,
               aws_update_tag)
    run_cleanup_job('aws_import_roles_cleanup.json', neo4j_session,
                    common_job_parameters)
Example #16
0
def cleanup_gcp_subnets(neo4j_session, common_job_parameters):
    """
    Delete out-of-date GCP VPC subnet nodes and relationships
    :param neo4j_session: The Neo4j session
    :param common_job_parameters: dict of other job parameters to pass to Neo4j
    :return: Nothing
    """
    run_cleanup_job('gcp_compute_vpc_subnet_cleanup.json', neo4j_session, common_job_parameters)
Example #17
0
def cleanup_gcp_projects(neo4j_session, common_job_parameters):
    """
    Remove stale GCP projects and their relationships
    :param neo4j_session: The Neo4j session
    :param common_job_parameters: Parameters to carry to the cleanup job
    :return: Nothing
    """
    run_cleanup_job('gcp_crm_project_cleanup.json', neo4j_session, common_job_parameters)
Example #18
0
def sync(neo4j_session, common_job_parameters, github_api_key, github_url,
         organization):
    logger.info("Syncing GitHub users")
    user_data, org_data = get(github_api_key, github_url, organization)
    load_organization_users(neo4j_session, user_data, org_data,
                            common_job_parameters['UPDATE_TAG'])
    run_cleanup_job('github_users_cleanup.json', neo4j_session,
                    common_job_parameters)
Example #19
0
def cleanup_gcp_firewall_rules(neo4j_session, common_job_parameters):
    """
    Delete out of date GCP firewalls and their relationships
    :param neo4j_session: The Neo4j session
    :param common_job_parameters: dict of other job parameters to pass to Neo4j
    :return: Nothing
    """
    run_cleanup_job('gcp_compute_firewall_cleanup.json', neo4j_session, common_job_parameters)
Example #20
0
def cleanup_route53(session, current_aws_id, update_tag):
    run_cleanup_job(
        'aws_dns_cleanup.json',
        session,
        {
            'UPDATE_TAG': update_tag,
            'AWS_ID': current_aws_id
        },
    )
Example #21
0
def cleanup_gcp_folders(session, common_job_parameters):
    """
    Remove stale GCP folders and their relationships
    :param session: The Neo4j session
    :param common_job_parameters: Parameters to carry to the cleanup job
    :return: Nothing
    """
    run_cleanup_job('gcp_crm_folder_cleanup.json', session,
                    common_job_parameters)
Example #22
0
def cleanup_droplets(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
    """
        Delete out-of-date DigitalOcean droplets and relationships
        :param neo4j_session: The Neo4j session
        :param common_job_parameters: dict of other job parameters to pass to Neo4j
        :return: Nothing
        """
    run_cleanup_job('digitalocean_droplet_cleanup.json', neo4j_session, common_job_parameters)
    return
Example #23
0
def cleanup_gcp_forwarding_rules(neo4j_session, common_job_parameters):
    """
    Delete out-of-date GCP forwarding rules and relationships
    :param neo4j_session: The Neo4j session
    :param common_job_parameters: dict of other job parameters to pass to Neo4j
    :return: Nothing
    """
    run_cleanup_job('gcp_compute_forwarding_rules_cleanup.json', neo4j_session,
                    common_job_parameters)
Example #24
0
def _cleanup_okta_organizations(session, common_job_parameters):
    """
    Remove stale Okta organization
    :param session: The Neo4j session
    :param common_job_parameters: Parameters to carry to the cleanup job
    :return: Nothing
    """

    run_cleanup_job('okta_import_cleanup.json', session, common_job_parameters)
Example #25
0
def cleanup(neo4j_session, update_tag, aws_account_id):
    run_cleanup_job(
        'aws_import_es_cleanup.json',
        neo4j_session,
        {
            'UPDATE_TAG': update_tag,
            'AWS_ID': aws_account_id
        },
    )
Example #26
0
def cleanup_gcp_instances(session, common_job_parameters):
    """
    Delete out-of-date GCP instance nodes and relationships
    :param session: The Neo4j session
    :param common_job_parameters: dict of other job parameters to pass to Neo4j
    :return: Nothing
    """
    run_cleanup_job('gcp_compute_instance_cleanup.json', session,
                    common_job_parameters)
Example #27
0
def cleanup_route53(neo4j_session: neo4j.Session, current_aws_id: str,
                    update_tag: int) -> None:
    run_cleanup_job(
        'aws_dns_cleanup.json',
        neo4j_session,
        {
            'UPDATE_TAG': update_tag,
            'AWS_ID': current_aws_id
        },
    )
Example #28
0
def sync_groups(neo4j_session, boto3_session, current_aws_account_id, aws_update_tag, common_job_parameters):
    logger.debug("Syncing IAM groups for account '%s'.", current_aws_account_id)
    data = get_group_list_data(boto3_session)
    load_groups(neo4j_session, data['Groups'], current_aws_account_id, aws_update_tag)

    sync_groups_inline_policies(boto3_session, data, neo4j_session, aws_update_tag)

    sync_group_managed_policies(boto3_session, data, neo4j_session, aws_update_tag)

    run_cleanup_job('aws_import_groups_cleanup.json', neo4j_session, common_job_parameters)
Example #29
0
def cleanup(neo4j_session: neo4j.Session, update_tag: int,
            aws_account_id: int) -> None:
    run_cleanup_job(
        'aws_import_es_cleanup.json',
        neo4j_session,
        {
            'UPDATE_TAG': update_tag,
            'AWS_ID': aws_account_id
        },
    )
def cleanup(neo4j_session: neo4j.Session, current_aws_account_id: str,
            update_tag: int) -> None:
    run_cleanup_job(
        'aws_import_elasticache_cleanup.json',
        neo4j_session,
        {
            'UPDATE_TAG': update_tag,
            'AWS_ID': current_aws_account_id
        },
    )