def test_today_override(self): """Test today() with override.""" Config.DEBUG = True Config.MASU_DATE_OVERRIDE = '2018-01-01 15:47:33' accessor = DateAccessor() today = accessor.today() self.assertEqual(today.year, 2018) self.assertEqual(today.month, 1) self.assertEqual(today.day, 1)
def test_today_override_override_not_set_debug_false(self): """Test today() with override not set when debug is false.""" Config.DEBUG = False Config.MASU_DATE_OVERRIDE = None accessor = DateAccessor() today = accessor.today() expected_date = datetime.today() self.assertEqual(today.year, expected_date.year) self.assertEqual(today.month, expected_date.month) self.assertEqual(today.day, expected_date.day)
def test_today_override_override_not_set(self): """Test today() with override set when debug is true.""" Config.DEBUG = True Config.MASU_DATE_OVERRIDE = None accessor = DateAccessor() today = accessor.today() expected_date = datetime.now(tz=pytz.UTC) self.assertEqual(today.year, expected_date.year) self.assertEqual(today.month, expected_date.month) self.assertEqual(today.day, expected_date.day) self.assertEqual(today.tzinfo, pytz.UTC)
def test_today_override(self): """Test today() with override.""" fake_dt = self.fake.date_time(tzinfo=pytz.UTC) Config.DEBUG = True Config.MASU_DATE_OVERRIDE = fake_dt.strftime("%Y-%m-%d %H:%M:%S") accessor = DateAccessor() today = accessor.today() self.assertEqual(today.year, fake_dt.year) self.assertEqual(today.month, fake_dt.month) self.assertEqual(today.day, fake_dt.day) self.assertEqual(today.tzinfo.tzname(today), str(pytz.UTC))
def test_today_override_debug_false(self): """Test today() with override when debug is false.""" fake_tz = pytz.timezone(self.fake.timezone()) fake_dt = self.fake.date_time(tzinfo=fake_tz) Config.DEBUG = False Config.MASU_DATE_OVERRIDE = fake_dt accessor = DateAccessor() today = accessor.today() expected_date = datetime.now(tz=pytz.UTC) self.assertEqual(today.year, expected_date.year) self.assertEqual(today.month, expected_date.month) self.assertEqual(today.day, expected_date.day) self.assertEqual(today.tzinfo, pytz.UTC)
def test_today_override_with_iso8601(self): """Test today() with override and using ISO8601 format.""" fake_tz_name = self.fake.timezone() fake_tz = pytz.timezone(fake_tz_name) fake_dt = self.fake.date_time(tzinfo=fake_tz) Config.DEBUG = True Config.MASU_DATE_OVERRIDE = fake_dt.isoformat() accessor = DateAccessor() today = accessor.today() self.assertEqual(today.year, fake_dt.year) self.assertEqual(today.month, fake_dt.month) self.assertEqual(today.day, fake_dt.day) expected_offset = dateutil.tz.tzoffset(fake_tz_name, fake_tz.utcoffset(fake_dt, is_dst=False)) self.assertEqual(today.tzinfo, expected_offset)
class AWSOrgUnitCrawler(AccountCrawler): """AWS org unit crawler.""" def __init__(self, account): """ Object to crawl the org unit structure for accounts to org units. Args: account (String): AWS IAM RoleArn """ super().__init__(account) self._auth_cred = self.account.get("credentials", {}).get("role_arn") self._date_accessor = DateAccessor() self._client = None self._account_alias_map = None self._structure_yesterday = None self.account_id = None self.errors_raised = False self.provider = self.get_provider() def get_provider(self): """Given the provider_uuid it returns the provider object.""" with ProviderDBAccessor( self.account.get("provider_uuid")) as provider_accessor: return provider_accessor.get_provider() @transaction.atomic def crawl_account_hierarchy(self): error_message = ( "Unable to crawl AWS organizational structure with ARN {} and " "provider_uuid: {} and account_id: {}".format( self.account, self.account.get("provider_uud"), self.account_id)) try: self._init_session() self._build_accout_alias_map() self._compute_org_structure_yesterday() root_ou = self._client.list_roots()["Roots"][0] LOG.info( "Obtained the root identifier for account with provider_uuid: " "{} and account_id: {}. Root identifier: {}".format( self.account.get("provider_uuid"), self.account_id, root_ou["Id"])) self._crawl_org_for_accounts(root_ou, root_ou.get("Id"), level=0) if not self.errors_raised: self._mark_nodes_deleted() except ParamValidationError as param_error: LOG.warn(msg=error_message) LOG.warn(param_error) except ClientError as boto_error: LOG.warn(msg=error_message, exc_info=boto_error) except Exception as unknown_error: LOG.exception(msg=error_message, exc_info=unknown_error) def _mark_nodes_deleted(self): today = self._date_accessor.today() # Mark everything that is dict as deleted with schema_context(self.schema): for _, org_unit in self._structure_yesterday.items(): org_unit.deleted_timestamp = today org_unit.save() def _crawl_org_for_accounts(self, ou, prefix, level): """ Recursively crawls the org units and accounts. Args: ou (dict): A return from aws client that includes the Id prefix (str): The org unit path prefix level (int): The level of the tree """ # Save entry for current OU self._save_aws_org_method(ou, prefix, level) try: # process accounts for this org unit self._crawl_accounts_per_id(ou, prefix, level) level = level + 1 # recurse and look for sub org units ou_pager = self._client.get_paginator( "list_organizational_units_for_parent") for sub_ou in ou_pager.paginate(ParentId=ou.get( "Id")).build_full_result().get("OrganizationalUnits"): new_prefix = prefix + ("&%s" % sub_ou.get("Id")) LOG.info( "Organizational unit found for account with provider_uuid: {} and account_id: {}" " during crawl. org_unit_id: {}".format( self.account.get("provider_uuid"), self.account_id, sub_ou.get("Id"))) self._crawl_org_for_accounts(sub_ou, new_prefix, level) except Exception: self.errors_raised = True LOG.exception( "Failure processing org_unit_id: {} for account with account schema: {}," " provider_uuid: {}, and account_id: {}".format( ou.get("Id"), self.schema, self.account.get("provider_uuid"), self.account_id)) def _init_session(self): """ Set or get a session client for aws organizations Args: session = aws session """ awsarn = utils.AwsArn(self._auth_cred) session = utils.get_assume_role_session(awsarn) session_client = session.client("organizations") self.account_id = awsarn.account_id LOG.info( "Starting aws organizations session for crawler for account with" " provider_uuid: {} and account_id: {}.".format( self.account.get("provider_uuid"), self.account_id)) self._client = session_client def _depaginate_account_list(self, function, resource_key, **kwargs): """ Depaginates the results of the aws client. Args: function (AwsFunction): Amazon function name resource_key (DataKey): Key to grab from results kwargs: Parameters to pass to amazon function Returns: (list): List of accounts See: https://gist.github.com/lukeplausin/a3670cd8f115a783a822aa0094015781 """ response = function(**kwargs) results = response[resource_key] while response.get("NextToken", None) is not None: response = function(NextToken=response.get("NextToken"), **kwargs) results = results + response[resource_key] return results def _crawl_accounts_per_id(self, ou, prefix, level): """ List accounts for parents given an aws identifer. Args: ou: org unit you want to list. prefix: The org unit tree path Returns: (list): List of accounts for an org unit See: [1] https://docs.aws.amazon.com/cli/latest/reference/organizations/list-accounts-for-parent.html [2] https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/organizations.html """ parent_id = ou.get("Id") LOG.info( "Crawling account with provider_uuid: {} and account_id: {}. Obtaining accounts for org_unit_id: {}" .format(self.account.get("provider_uuid"), self.account_id, parent_id)) child_accounts = self._depaginate_account_list( function=self._client.list_accounts_for_parent, resource_key="Accounts", ParentId=parent_id) for act_info in child_accounts: self._save_aws_org_method(ou, prefix, level, act_info) def _save_aws_org_method(self, ou, unit_path, level, account=None): """ Recursively crawls the org units and accounts. Args: ou (dict): The aws organizational unit dictionary unit_path (str): The tree path to the org unit level (int): The level of the node in the org data tree account_id (str): The AWS account number. If internal node, None Returns: (AWSOrganizationalUnit): That was created or looked up """ unit_name = ou.get("Name", ou.get("Id")) unit_id = ou.get("Id") account_alias = None account_id = None with schema_context(self.schema): # This is a leaf node if account: # Look for an existing alias account_id = account.get("Id") account_name = account.get("Name") account_alias = self._account_alias_map.get(account_id) if not account_alias: # Create a new account alias (not cached account_alias, created = AWSAccountAlias.objects.get_or_create( account_id=account_id) self._account_alias_map[account_id] = account_alias LOG.info( f"Saving account alias {account_alias} (created={created})" ) if account_name and account_alias.account_alias != account_name: # The name was not set or changed since last scan. LOG.info( "Updating account alias for account_id=%s, old_account_alias=%s, new_account_alias=%s" % (account_id, account_alias.account_alias, account_name)) account_alias.account_alias = account_name account_alias.save() # If we add provider here right now it will duplicate the entries org_unit, created = AWSOrganizationalUnit.objects.get_or_create( org_unit_name=unit_name, org_unit_id=unit_id, org_unit_path=unit_path, account_alias=account_alias, level=level, ) # Remove key since we have seen it lookup_key = self._create_lookup_key(unit_id, account_id) self._structure_yesterday.pop(lookup_key, None) if created: # only log it was saved if was created to reduce logging on everyday calls LOG.info( "Saving account or org unit: unit_name={}, unit_id={}, " "unit_path={}, account_alias={}, provider_uuid={}, account_id={}, level={}" .format( unit_name, unit_id, unit_path, account_alias, self.account.get("provider_uuid"), self.account_id, level, )) elif org_unit.deleted_timestamp is not None: LOG.warning( "Org unit {} was found with a deleted_timestamp for account" " with provider_uuid={} and account_id={}. Setting deleted_timestamp to null!" .format(org_unit.org_unit_id, self.account.get("provider_uuid"), self.account_id)) org_unit.deleted_timestamp = None org_unit.save() # Since we didn't add the provider foreign key initially # we need to add a bit of self healing here to repair the # nodes that are currently in customer's databases. if not org_unit.provider and self.provider: org_unit.provider = self.provider org_unit.save() return org_unit def _delete_aws_account(self, account_id): """ Marks an account deleted. Args: account_id (str): The AWS account number. Returns: QuerySet(AWSOrganizationalUnit): That were marked deleted """ LOG.info("Marking account deleted: account_id=%s" % (account_id)) with schema_context(self.schema): account_alias = AWSAccountAlias.objects.filter( account_id=account_id).first() accounts = AWSOrganizationalUnit.objects.filter( account_alias=account_alias) # The can be multiple records for a single accounts due to changes in org structure for account in accounts: account.deleted_timestamp = self._date_accessor.today( ).strftime("%Y-%m-%d") account.save() return accounts def _delete_aws_org_unit(self, org_unit_id): """ Marks an account deleted. Args: org_unit_id (str): The AWS organization unit id. Returns: QuerySet(AWSOrganizationalUnit): That were marked deleted """ LOG.info( "Marking org unit deleted for provider_uuid=%s: org_unit_id=%s" % (self.account.get("provider_uuid"), org_unit_id)) with schema_context(self.schema): accounts = AWSOrganizationalUnit.objects.filter( org_unit_id=org_unit_id) # The can be multiple records for a single accounts due to changes in org structure for account in accounts: account.deleted_timestamp = self._date_accessor.today( ).strftime("%Y-%m-%d") account.save() return accounts def _build_accout_alias_map(self): """ Builds a map of account_id to account alias for foreign keys Returns: dict: account_id to AWSAccountAlias """ self._account_alias_map = {} with schema_context(self.schema): for alias in AWSAccountAlias.objects.all(): self._account_alias_map[alias.account_id] = alias def _compute_org_structure_yesterday(self): """ Construct the tree structure for yesterday Returns: dict: key built of org_unit (if account is none) or org_unit and account number to django AWSOrganizationalUnit model objects. """ yesterday = (self._date_accessor.today() - timedelta(1)).strftime("%Y-%m-%d") self._structure_yesterday = self._compute_org_structure_interval( yesterday) def _compute_org_structure_interval(self, start_date, end_date=None): """ Construct the tree structure for an interval Args: start_date (datetime.datetime): Interval start time. end_date (datetime.datetime): Interval end time. Returns: dict: key built of org_unit (if account is none) or org_unit and account number to django AWSOrganizationalUnit model objects. """ if not end_date: end_date = start_date with schema_context(self.schema): LOG.info( "Obtaining tree from {} to {} for account with provider_uuid: {} and account_id: {}" .format(start_date, end_date, self.account.get("provider_uuid"), self.account_id)) # Remove org units delete on date or before aws_node_qs = AWSOrganizationalUnit.objects.exclude( deleted_timestamp__lte=start_date) # Remove org units created after date aws_node_qs = aws_node_qs.exclude(created_timestamp__gt=end_date) aws_org_units = (aws_node_qs.filter( account_alias__isnull=True).order_by( "org_unit_id", "-created_timestamp").distinct("org_unit_id")) aws_accounts = (aws_node_qs.filter( account_alias__isnull=False).order_by( "account_alias", "-created_timestamp").distinct("account_alias")) structure = {} for org_unit in aws_org_units: structure[self._create_lookup_key( org_unit.org_unit_id)] = org_unit for org_unit in aws_accounts: structure[self._create_lookup_key( org_unit.org_unit_id, org_unit.account_alias.account_id)] = org_unit return structure def _create_lookup_key(self, unit_id, account_id=None): """ Construct the lookup key for the _structure_yesterday tree Args: unit_id (str): The AWS organization unit id. account_id (str): The AWS account id or None if it's an internal node Returns: str: key to lookup org units and accounts """ if account_id: lookup_key = f"{unit_id}&{account_id}" else: lookup_key = f"{unit_id}" return lookup_key