Esempio n. 1
0
def test_iam_seed(mock_fetch_account):
    ds = IAMDatasource()
    arn = "pretend_arn"
    account_number = "123456789012"
    expected = {arn: {"a": "b"}}
    mock_fetch_account.return_value = expected
    ds.seed(account_number)
    mock_fetch_account.assert_called_once()
    assert mock_fetch_account.call_args[0][0] == account_number
    assert ds._data == expected
    # make sure fetched data gets cached
    assert arn in ds._data
    assert ds._data[arn] == {"a": "b"}
Esempio n. 2
0
def _repo_stats(output_file: str, account_number: str = "") -> None:
    """
    Create a csv file with stats about roles, total permissions, and applicable filters over time

    Args:
        output_file (string): the name of the csv file to write
        account_number (string): if specified only display roles from selected account, otherwise display all

    Returns:
        None
    """
    role_ids: Iterable[str]
    if account_number:
        access_advisor_datasource = AccessAdvisorDatasource()
        access_advisor_datasource.seed(account_number)
        iam_datasource = IAMDatasource()
        role_ids = iam_datasource.seed(account_number)
    else:
        role_ids = role_ids_for_all_accounts()

    headers = [
        "RoleId",
        "Role Name",
        "Account",
        "Active",
        "Date",
        "Source",
        "Permissions Count",
        "Repoable Permissions Count",
        "Disqualified By",
    ]
    rows = []
    roles = RoleList.from_ids(
        role_ids, fields=["RoleId", "RoleName", "Account", "Active", "Stats"])

    for role in roles:
        for stats_entry in role.stats:
            rows.append([
                role.role_id,
                role.role_name,
                role.account,
                role.active,
                stats_entry["Date"],
                stats_entry["Source"],
                stats_entry["PermissionsCount"],
                stats_entry.get("RepoablePermissionsCount", 0),
                stats_entry.get("DisqualifiedBy", []),
            ])

    try:
        with open(output_file, "w") as csvfile:
            csv_writer = csv.writer(csvfile)
            csv_writer.writerow(headers)
            for row in rows:
                csv_writer.writerow(row)
    except IOError as e:
        LOGGER.error("Unable to write file {}: {}".format(output_file, e),
                     exc_info=True)
    else:
        LOGGER.info("Successfully wrote stats to {}".format(output_file))
Esempio n. 3
0
def _update_role_cache(
    account_number: str,
    config: RepokidConfig,
    hooks: RepokidHooks,
) -> None:
    """
    Update data about all roles in a given account:
      1) list all the roles and initiate a role object with basic data including name and roleID
      2) get inline policies for each of the roles
      3) build a list of active roles - we'll want to keep data about roles that may have been deleted in case we
         need to restore them, so if we used to have a role and now we don't see it we'll mark it inactive
      4) update data about the roles in Dynamo
      5) mark inactive roles in Dynamo
      6) load and instantiate filter plugins
      7) for each filter determine the list of roles that it filters
      8) update data in Dynamo about filters
      9) get Aardvark data for each role
      10) update Dynamo with Aardvark data
      11) calculate repoable permissions/policies for all the roles
      12) update Dynamo with information about how many total and repoable permissions and which services are repoable
      13) update stats in Dynamo with basic information like total permissions and which filters are applicable

    Args:
        account_number (string): The current account number Repokid is being run against

    Returns:
        None
    """
    access_advisor_datasource = AccessAdvisorDatasource()
    access_advisor_datasource.seed(account_number)
    iam_datasource = IAMDatasource()
    role_ids = iam_datasource.seed(account_number)

    # We only iterate over the newly-seeded data (`role_ids`) so we don't duplicate work for runs on multiple accounts
    roles = RoleList([Role(**iam_datasource[role_id]) for role_id in role_ids])

    LOGGER.info("Updating role data for account {}".format(account_number))
    for role in tqdm(roles):
        role.account = account_number
        role.gather_role_data(hooks, config=config, source="Scan", store=False)

    LOGGER.info("Finding inactive roles in account {}".format(account_number))
    find_and_mark_inactive(account_number, roles)

    LOGGER.info("Filtering roles")
    plugins = get_filter_plugins(account_number, config=config)
    for plugin in plugins.filter_plugins:
        filtered_list = plugin.apply(roles)
        class_name = plugin.__class__.__name__
        for filtered_role in filtered_list:
            LOGGER.debug("Role {} filtered by {}".format(
                filtered_role.role_name, class_name))
            # There may be existing duplicate records, so we do a dance here to dedupe them.
            disqualified_by = set(filtered_role.disqualified_by)
            disqualified_by.add(class_name)
            filtered_role.disqualified_by = list(disqualified_by)

    for role in roles:
        LOGGER.debug(
            "Role {} in account {} has\nrepoable permissions: {}\nrepoable services: {}"
            .format(
                role.role_name,
                account_number,
                role.repoable_permissions,
                role.repoable_services,
            ))

    LOGGER.info(
        "Storing updated role data in account {}".format(account_number))
    roles.store()
Esempio n. 4
0
def _repo_all_roles(
    account_number: str,
    config: RepokidConfig,
    hooks: RepokidHooks,
    commit: bool = False,
    scheduled: bool = True,
    limit: int = -1,
) -> None:
    """
    Repo all scheduled or eligible roles in an account.  Collect any errors and display them at the end.

    Args:
        account_number (string)
        dynamo_table
        config
        commit (bool): actually make the changes
        scheduled (bool): if True only repo the scheduled roles, if False repo all the (eligible) roles
        limit (int): limit number of roles to be repoed per run (< 0 is unlimited)

    Returns:
        None
    """
    access_advisor_datasource = AccessAdvisorDatasource()
    access_advisor_datasource.seed(account_number)
    iam_datasource = IAMDatasource()
    role_arns = iam_datasource.seed(account_number)
    errors = []

    roles = RoleList.from_arns(role_arns, config=config)
    roles = roles.get_active()
    if scheduled:
        roles = roles.get_scheduled()
    if not roles:
        LOGGER.info(f"No roles to repo in account {account_number}")
        return

    LOGGER.info(
        "Repoing these {}roles from account {}:\n\t{}".format(
            "scheduled " if scheduled else "",
            account_number,
            ", ".join([role.role_name for role in roles]),
        )
    )

    repokid.hooks.call_hooks(
        hooks, "BEFORE_REPO_ROLES", {"account_number": account_number, "roles": roles}
    )

    count = 0
    repoed = RoleList([])
    for role in roles:
        if limit >= 0 and count == limit:
            break
        role_errors = role.repo(hooks, commit=commit, scheduled=scheduled)
        if role_errors:
            errors.extend(role_errors)
        repoed.append(role)
        count += 1

    if errors:
        LOGGER.error(f"Error(s) during repo in account: {account_number}: {errors}")
    LOGGER.info(f"Successfully repoed {count} roles in account {account_number}")

    repokid.hooks.call_hooks(
        hooks,
        "AFTER_REPO_ROLES",
        {"account_number": account_number, "roles": repoed, "errors": errors},
    )