def apply(self, input_list: RoleList) -> RoleList: blocklisted_roles = RoleList([]) for role in input_list: if (role.role_name.lower() in self.blocklisted_role_names or role.arn in self.blocklisted_arns): blocklisted_roles.append(role) return blocklisted_roles
def _repo_stats(output_file: str, account_number: str = "") -> None: """ Create a csv file with stats about roles, total permissions, and applicable filters over time Args: output_file (string): the name of the csv file to write account_number (string): if specified only display roles from selected account, otherwise display all Returns: None """ role_ids: Iterable[str] if account_number: access_advisor_datasource = AccessAdvisorDatasource() access_advisor_datasource.seed(account_number) iam_datasource = IAMDatasource() role_ids = iam_datasource.seed(account_number) else: role_ids = role_ids_for_all_accounts() headers = [ "RoleId", "Role Name", "Account", "Active", "Date", "Source", "Permissions Count", "Repoable Permissions Count", "Disqualified By", ] rows = [] roles = RoleList.from_ids( role_ids, fields=["RoleId", "RoleName", "Account", "Active", "Stats"]) for role in roles: for stats_entry in role.stats: rows.append([ role.role_id, role.role_name, role.account, role.active, stats_entry["Date"], stats_entry["Source"], stats_entry["PermissionsCount"], stats_entry.get("RepoablePermissionsCount", 0), stats_entry.get("DisqualifiedBy", []), ]) try: with open(output_file, "w") as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerow(headers) for row in rows: csv_writer.writerow(row) except IOError as e: LOGGER.error("Unable to write file {}: {}".format(output_file, e), exc_info=True) else: LOGGER.info("Successfully wrote stats to {}".format(output_file))
def _find_roles_with_permissions(permissions: List[str], output_file: str) -> None: """ Search roles in all accounts for a policy with any of the provided permissions, log the ARN of each role. Args: permissions (list[string]): The name of the permissions to find output_file (string): filename to write the output Returns: None """ arns: List[str] = list() role_ids = role_arns_for_all_accounts() roles = RoleList.from_ids(role_ids, fields=["Policies", "RoleName", "Arn", "Active"]) for role in roles: role_permissions, _ = role.get_permissions_for_policy_version() permissions_set = set([p.lower() for p in permissions]) found_permissions = permissions_set.intersection(role_permissions) if found_permissions and role.active: arns.append(role.arn) LOGGER.info("ARN {arn} has {permissions}".format( arn=role.arn, permissions=list(found_permissions))) if not output_file: return with open(output_file, "w") as fd: json.dump(arns, fd) LOGGER.info(f"Output written to file {output_file}")
def apply(self, input_list: RoleList) -> RoleList: lambda_roles: RoleList = RoleList([]) for role in input_list: if "lambda" in str(role.assume_role_policy_document).lower(): lambda_roles.append(role) return lambda_roles
def find_and_mark_inactive(account_number: str, active_roles: RoleList) -> None: """ Mark roles in the account that aren't currently active inactive. Do this by getting all roles in the account and subtracting the active roles, any that are left are inactive and should be marked thusly. Args: account_number (string) active_roles (set): the currently active roles discovered in the most recent scan Returns: None """ # TODO: clean up and simplify this logic. We're getting a RoleList, converting to a set, # and subtracting it from a set of known roles. This is strange and confusing. active_roles_set = set(active_roles) known_roles = set(get_all_role_ids_for_account(account_number)) inactive_role_ids = known_roles - active_roles_set inactive_roles = RoleList.from_ids(inactive_role_ids, fields=["Active", "Arn"]) for role in inactive_roles: if role.active: role.mark_inactive()
def apply(self, input_list: RoleList) -> RoleList: opt_out_roles: RoleList = RoleList([]) for role in input_list: if role.opt_out and role.opt_out[ "expire"] > self.current_time_epoch: opt_out_roles.append(role) return opt_out_roles
def test_age_too_young_no_tz(mock_role: Role): age_filter = AgeFilter() create_date = datetime.datetime.now() assert not create_date.tzinfo mock_role.create_date = create_date role_list = RoleList([mock_role]) result = age_filter.apply(role_list) assert len(result) == 1
def _cancel_scheduled_repo(account_number: str, role_name: str = "", is_all: bool = False) -> None: """ Cancel scheduled repo for a role in an account """ if not is_all and not role_name: LOGGER.error( "Either a specific role to cancel or all must be provided") return if is_all: role_ids = get_all_role_ids_for_account(account_number) roles = RoleList.from_ids(role_ids) # filter to show only roles that are scheduled roles = roles.get_scheduled() for role in roles: role.repo_scheduled = 0 role.scheduled_perms = [] try: role.store(["repo_scheduled", "scheduled_perms"]) except RoleStoreError: LOGGER.exception("failed to store role", exc_info=True) LOGGER.info("Canceled scheduled repo for roles: {}".format(", ".join( [role.role_name for role in roles]))) return role_id = find_role_in_cache(role_name, account_number) if not role_id: LOGGER.warning( f"Could not find role with name {role_name} in account {account_number}" ) return role = Role(role_id=role_id) role.fetch() if not role.repo_scheduled: LOGGER.warning( "Repo was not scheduled for role {} in account {}".format( role.role_name, account_number)) return role.repo_scheduled = 0 role.scheduled_perms = [] try: role.store(["repo_scheduled", "scheduled_perms"]) except RoleStoreError: LOGGER.exception("failed to store role", exc_info=True) raise LOGGER.info( "Successfully cancelled scheduled repo for role {} in account {}". format(role.role_name, role.account))
def test_age_with_tz(mock_role: Role): age_filter = AgeFilter() create_date = datetime.datetime.now(tz=tz.tzutc()) - datetime.timedelta( days=100) assert create_date.tzinfo mock_role.create_date = create_date role_list = RoleList([mock_role]) result = age_filter.apply(role_list) assert len(result) == 0
def apply(self, input_list: RoleList) -> RoleList: exclusive_roles = [] for role_glob in self.exclusive_role_globs: exclusive_roles += [ role for role in input_list if fnmatch.fnmatch(role.role_name.lower(), role_glob) ] filtered_roles = list(set(input_list) - set(exclusive_roles)) return RoleList(filtered_roles)
def _schedule_repo( account_number: str, config: RepokidConfig, hooks: RepokidHooks, ) -> None: """ Schedule a repo for a given account. Schedule repo for a time in the future (default 7 days) for any roles in the account with repoable permissions. """ scheduled_roles = [] role_ids = get_all_role_ids_for_account(account_number) roles = RoleList.from_ids(role_ids) roles.fetch_all(fetch_aa_data=True) scheduled_time = int( time.time()) + (86400 * config.get("repo_schedule_period_days", 7)) for role in roles: if not role.aa_data: LOGGER.warning("Not scheduling %s; missing Access Advisor data", role.arn) continue if not role.repoable_permissions > 0: LOGGER.debug("Not scheduling %s; no repoable permissions", role.arn) continue if role.repo_scheduled: LOGGER.debug( "Not scheduling %s; already scheduled for %s", role.arn, role.repo_scheduled, ) continue role.repo_scheduled = scheduled_time # freeze the scheduled perms to whatever is repoable right now role.repo_scheduled = scheduled_time role.scheduled_perms = role.repoable_services try: role.store(["repo_scheduled", "scheduled_perms"]) except RoleStoreError: logging.exception("failed to store role", exc_info=True) scheduled_roles.append(role) LOGGER.info( "Scheduled repo for {} days from now for account {} and these roles:\n\t{}" .format( config.get("repo_schedule_period_days", 7), account_number, ", ".join([r.role_name for r in scheduled_roles]), )) repokid.hooks.call_hooks(hooks, "AFTER_SCHEDULE_REPO", {"roles": scheduled_roles})
def _display_roles(account_number: str, inactive: bool = False) -> None: """ Display a table with data about all roles in an account and write a csv file with the data. Args: account_number (string) inactive (bool): show roles that have historically (but not currently) existed in the account if True Returns: None """ headers = [ "Name", "Refreshed", "Disqualified By", "Can be repoed", "Permissions", "Repoable", "Repoed", "Services", ] rows: List[List[Any]] = [] role_ids = get_all_role_ids_for_account(account_number) roles = RoleList.from_ids(role_ids) if not inactive: roles = roles.get_active() for role in roles: rows.append( [ role.role_name, role.refreshed, role.disqualified_by, len(role.disqualified_by) == 0, role.total_permissions, role.repoable_permissions, role.repoed, role.repoable_services, ] ) rows = sorted(rows, key=lambda x: (x[5], x[0], x[4])) rows.insert(0, headers) # print tabulate(rows, headers=headers) t.view(rows) with open("table.csv", "w") as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerow(headers) for row in rows: csv_writer.writerow(row)
def apply(self, input_list: RoleList) -> RoleList: now = datetime.datetime.now() if self.config: days_delta = self.config.get("minimum_age", 90) else: log.info("Minimum age not set in config, using default 90 days") days_delta = 90 ago = datetime.timedelta(days=days_delta) too_young = RoleList([]) for role in input_list: if not role.create_date: log.warning(f"Role {role.role_name} is missing create_date") too_young.append(role) continue # Ensure create_date is an offset-naive datetime create_date = datetime.datetime.fromtimestamp( role.create_date.timestamp()) if create_date > now - ago: log.info( f"Role {role.role_name} created too recently to cleanup. ({create_date})" ) too_young.append(role) return too_young
def test_schedule_repo( self, mock_time, mock_get_all_role_ids_for_account, mock_role_list_from_ids, mock_fetch, mock_role_store, mock_call_hooks, ): hooks = RepokidHooks mock_get_all_role_ids_for_account.return_value = [ "AROAABCDEFGHIJKLMNOPA", "AROAABCDEFGHIJKLMNOPB", ] # first role is not repoable, second role is repoable test_roles = [ ROLES_FOR_DISPLAY[0].copy(update=Role( **{ "RoleId": "AROAABCDEFGHIJKLMNOPA" }).dict()), ROLES_FOR_DISPLAY[1].copy(update=Role( **{ "RoleId": "AROAABCDEFGHIJKLMNOPB", "AAData": [{ "foo": "bar" }], "RepoablePermissions": 10, }).dict()), ] mock_role_list_from_ids.return_value = RoleList( [test_roles[0], test_roles[1]]) mock_time.return_value = 1 config = {"repo_schedule_period_days": 1} repokid.commands.schedule._schedule_repo("1234567890", config, hooks) mock_role_store.assert_called() assert mock_call_hooks.mock_calls == [ call( hooks, "AFTER_SCHEDULE_REPO", {"roles": [test_roles[1]]}, ) ]
def _show_scheduled_roles(account_number: str) -> None: """ Show scheduled repos for a given account. For each scheduled show whether scheduled time is elapsed or not. """ role_ids = get_all_role_ids_for_account(account_number) roles = RoleList.from_ids(role_ids) # filter to show only roles that are scheduled roles = roles.get_active().get_scheduled() header = ["Role name", "Scheduled", "Scheduled Time Elapsed?"] rows = [] curtime = int(time.time()) for role in roles: rows.append([ role.role_name, dt.fromtimestamp(role.repo_scheduled).strftime("%Y-%m-%d %H:%M"), role.repo_scheduled < curtime, ]) print(tabulate(rows, headers=header))
def _update_role_cache( account_number: str, config: RepokidConfig, hooks: RepokidHooks, ) -> None: """ Update data about all roles in a given account: 1) list all the roles and initiate a role object with basic data including name and roleID 2) get inline policies for each of the roles 3) build a list of active roles - we'll want to keep data about roles that may have been deleted in case we need to restore them, so if we used to have a role and now we don't see it we'll mark it inactive 4) update data about the roles in Dynamo 5) mark inactive roles in Dynamo 6) load and instantiate filter plugins 7) for each filter determine the list of roles that it filters 8) update data in Dynamo about filters 9) get Aardvark data for each role 10) update Dynamo with Aardvark data 11) calculate repoable permissions/policies for all the roles 12) update Dynamo with information about how many total and repoable permissions and which services are repoable 13) update stats in Dynamo with basic information like total permissions and which filters are applicable Args: account_number (string): The current account number Repokid is being run against Returns: None """ access_advisor_datasource = AccessAdvisorDatasource() access_advisor_datasource.seed(account_number) iam_datasource = IAMDatasource() role_ids = iam_datasource.seed(account_number) # We only iterate over the newly-seeded data (`role_ids`) so we don't duplicate work for runs on multiple accounts roles = RoleList([Role(**iam_datasource[role_id]) for role_id in role_ids]) LOGGER.info("Updating role data for account {}".format(account_number)) for role in tqdm(roles): role.account = account_number role.gather_role_data(hooks, config=config, source="Scan", store=False) LOGGER.info("Finding inactive roles in account {}".format(account_number)) find_and_mark_inactive(account_number, roles) LOGGER.info("Filtering roles") plugins = get_filter_plugins(account_number, config=config) for plugin in plugins.filter_plugins: filtered_list = plugin.apply(roles) class_name = plugin.__class__.__name__ for filtered_role in filtered_list: LOGGER.debug("Role {} filtered by {}".format( filtered_role.role_name, class_name)) # There may be existing duplicate records, so we do a dance here to dedupe them. disqualified_by = set(filtered_role.disqualified_by) disqualified_by.add(class_name) filtered_role.disqualified_by = list(disqualified_by) for role in roles: LOGGER.debug( "Role {} in account {} has\nrepoable permissions: {}\nrepoable services: {}" .format( role.role_name, account_number, role.repoable_permissions, role.repoable_services, )) LOGGER.info( "Storing updated role data in account {}".format(account_number)) roles.store()
def test_cancel_scheduled_repo( self, mock_get_all_role_ids_for_account, mock_find_role_in_cache, mock_role_list_from_ids, mock_role_store, mock_role_fetch, ): mock_get_all_role_ids_for_account.return_value = [ "AROAABCDEFGHIJKLMNOPA", "AROAABCDEFGHIJKLMNOPB", ] roles = RoleList( [ Role( **{ "Arn": "arn:aws:iam::123456789012:role/ROLE_A", "RoleId": "AROAABCDEFGHIJKLMNOPA", "Active": True, "RoleName": "ROLE_A", "RepoScheduled": 100, "CreateDate": datetime.datetime.now() - datetime.timedelta(days=100), } ), Role( **{ "Arn": "arn:aws:iam::123456789012:role/ROLE_B", "RoleId": "AROAABCDEFGHIJKLMNOPB", "Active": True, "RoleName": "ROLE_B", "RepoScheduled": 0, "CreateDate": datetime.datetime.now() - datetime.timedelta(days=100), } ), Role( **{ "Arn": "arn:aws:iam::123456789012:role/ROLE_C", "RoleId": "AROAABCDEFGHIJKLMNOPC", "Active": True, "RoleName": "ROLE_C", "RepoScheduled": 5, "CreateDate": datetime.datetime.now() - datetime.timedelta(days=100), } ), ] ) mock_role_list_from_ids.return_value = RoleList([roles[0], roles[2]]) # first check all repokid.commands.schedule._cancel_scheduled_repo("", role_name="", is_all=True) assert mock_role_store.call_count == 2 mock_role_store.reset_mock() # ensure all are cancelled mock_find_role_in_cache.return_value = "AROAABCDEFGHIJKLMNOPA" repokid.commands.schedule._cancel_scheduled_repo( "", role_name="ROLE_A", is_all=False )
def test_repo_all_roles( self, mock_time, mock_repo_role, mock_iam_datasource_seed, mock_aa_datasource, mock_role_list_fetch_all, mock_role_list_from_ids, mock_call_hooks, ): hooks = RepokidHooks() mock_iam_datasource_seed.return_value = [ "AROAABCDEFGHIJKLMNOPA", "AROAABCDEFGHIJKLMNOPB", "AROAABCDEFGHIJKLMNOPC", ] roles = RoleList( [ Role( **{ "Arn": "arn:aws:iam::123456789012:role/ROLE_A", "RoleId": "AROAABCDEFGHIJKLMNOPA", "Active": True, "RoleName": "ROLE_A", "RepoScheduled": 100, "CreateDate": datetime.datetime.now() - datetime.timedelta(days=100), } ), Role( **{ "Arn": "arn:aws:iam::123456789012:role/ROLE_B", "RoleId": "AROAABCDEFGHIJKLMNOPB", "Active": True, "RoleName": "ROLE_B", "RepoScheduled": 0, "CreateDate": datetime.datetime.now() - datetime.timedelta(days=100), } ), Role( **{ "Arn": "arn:aws:iam::123456789012:role/ROLE_C", "RoleId": "AROAABCDEFGHIJKLMNOPC", "Active": True, "RoleName": "ROLE_C", "RepoScheduled": 5, "CreateDate": datetime.datetime.now() - datetime.timedelta(days=100), } ), ] ) # time is past ROLE_C but before ROLE_A mock_time.return_value = 10 mock_role_list_from_ids.return_value = RoleList( [ roles[0], roles[1], roles[2], ] ) mock_repo_role.return_value = None # repo all roles in the account, should call repo with all roles repokid.commands.repo._repo_all_roles("", {}, hooks, scheduled=False) # repo only scheduled, should only call repo role with role C repokid.commands.repo._repo_all_roles("", {}, hooks, scheduled=True) assert mock_repo_role.mock_calls == [ call(hooks, commit=False, scheduled=False), call(hooks, commit=False, scheduled=False), call(hooks, commit=False, scheduled=False), call(hooks, commit=False, scheduled=True), ] assert mock_call_hooks.mock_calls == [ call( hooks, "BEFORE_REPO_ROLES", {"account_number": "", "roles": roles}, ), call( hooks, "AFTER_REPO_ROLES", {"account_number": "", "roles": roles, "errors": []}, ), call( hooks, "BEFORE_REPO_ROLES", {"account_number": "", "roles": RoleList([roles[2]])}, ), call( hooks, "AFTER_REPO_ROLES", { "account_number": "", "roles": RoleList([roles[2]]), "errors": [], }, ), ]
def test_repokid_display_roles( self, mock_get_all_role_ids_for_account, mock_role_list_from_ids, mock_tabview ): console_logger = logging.StreamHandler() console_logger.setLevel(logging.WARNING) repokid.cli.repokid_cli.logger = logging.getLogger("test") repokid.cli.repokid_cli.logger.addHandler(console_logger) mock_get_all_role_ids_for_account.return_value = [ "AROAABCDEFGHIJKLMNOPA", "AROAABCDEFGHIJKLMNOPB", "AROAABCDEFGHIJKLMNOPC", "AROAABCDEFGHIJKLMNOPD", ] test_roles = [] for x, role in enumerate(ROLES_FOR_DISPLAY): test_roles.append( role.copy(update=Role(**ROLES[x]).dict(exclude_unset=True)) ) # loop over all roles twice (one for each call below) mock_role_list_from_ids.return_value = RoleList( [ test_roles[0], test_roles[1], test_roles[2], test_roles[3], ] ) repokid.commands.role._display_roles("123456789012", inactive=True) repokid.commands.role._display_roles("123456789012", inactive=False) # first call has inactive role, second doesn't because it's filtered assert mock_tabview.mock_calls == [ call( [ [ "Name", "Refreshed", "Disqualified By", "Can be repoed", "Permissions", "Repoable", "Repoed", "Services", ], ["all_services_used", "Someday", [], True, 4, 0, "Never", []], ["inactive_role", "Someday", [], True, 4, 0, "Never", []], ["young_role", "Someday", [], True, 4, 0, "Never", []], ["unused_ec2", "Someday", [], True, 4, 2, "Never", ["ec2"]], ] ), call( [ [ "Name", "Refreshed", "Disqualified By", "Can be repoed", "Permissions", "Repoable", "Repoed", "Services", ], ["all_services_used", "Someday", [], True, 4, 0, "Never", []], ["young_role", "Someday", [], True, 4, 0, "Never", []], ["unused_ec2", "Someday", [], True, 4, 2, "Never", ["ec2"]], ] ), ]
def test_repokid_update_role_cache( self, mock_iam_datasource_fetch, mock_access_advisor_datasource, mock_gather_role_data, mock_role_list_store, mock_find_and_mark_inactive, ): hooks = {} role_data = ROLES[:3] role_data[0]["RolePolicyList"] = [ { "PolicyName": "all_services_used", "PolicyDocument": ROLE_POLICIES["all_services_used"], } ] role_data[1]["RolePolicyList"] = [ {"PolicyName": "unused_ec2", "PolicyDocument": ROLE_POLICIES["unused_ec2"]} ] role_data[2]["RolePolicyList"] = [ { "PolicyName": "all_services_used", "PolicyDocument": ROLE_POLICIES["all_services_used"], } ] role_data = {item["RoleId"]: item for item in role_data} mock_iam_datasource_fetch.return_value = role_data config = { "aardvark_api_location": "", "connection_iam": {}, "active_filters": ["repokid.filters.age:AgeFilter"], "filter_config": {"AgeFilter": {"minimum_age": 90}, "BlocklistFilter": {}}, } console_logger = logging.StreamHandler() console_logger.setLevel(logging.WARNING) repokid.cli.repokid_cli.logger = logging.getLogger("test") repokid.cli.repokid_cli.logger.addHandler(console_logger) account_number = "123456789012" repokid.commands.role_cache._update_role_cache(account_number, config, hooks) assert mock_gather_role_data.call_count == 3 # all roles active assert mock_find_and_mark_inactive.mock_calls[-1] == call( account_number, RoleList( [ Role(**ROLES[0]), Role(**ROLES[1]), Role(**ROLES[2]), ] ), )
def _repo_all_roles( account_number: str, config: RepokidConfig, hooks: RepokidHooks, commit: bool = False, scheduled: bool = True, limit: int = -1, ) -> None: """ Repo all scheduled or eligible roles in an account. Collect any errors and display them at the end. Args: account_number (string) dynamo_table config commit (bool): actually make the changes scheduled (bool): if True only repo the scheduled roles, if False repo all the (eligible) roles limit (int): limit number of roles to be repoed per run (< 0 is unlimited) Returns: None """ access_advisor_datasource = AccessAdvisorDatasource() access_advisor_datasource.seed(account_number) iam_datasource = IAMDatasource() role_arns = iam_datasource.seed(account_number) errors = [] roles = RoleList.from_arns(role_arns, config=config) roles = roles.get_active() if scheduled: roles = roles.get_scheduled() if not roles: LOGGER.info(f"No roles to repo in account {account_number}") return LOGGER.info( "Repoing these {}roles from account {}:\n\t{}".format( "scheduled " if scheduled else "", account_number, ", ".join([role.role_name for role in roles]), ) ) repokid.hooks.call_hooks( hooks, "BEFORE_REPO_ROLES", {"account_number": account_number, "roles": roles} ) count = 0 repoed = RoleList([]) for role in roles: if limit >= 0 and count == limit: break role_errors = role.repo(hooks, commit=commit, scheduled=scheduled) if role_errors: errors.extend(role_errors) repoed.append(role) count += 1 if errors: LOGGER.error(f"Error(s) during repo in account: {account_number}: {errors}") LOGGER.info(f"Successfully repoed {count} roles in account {account_number}") repokid.hooks.call_hooks( hooks, "AFTER_REPO_ROLES", {"account_number": account_number, "roles": repoed, "errors": errors}, )