def run(dao=None, cycle_timestamp=None, configs=None, crm_rate_limiter=None): """Runs the load IAM policies data pipeline. Args: dao: Data access object. cycle_timestamp: String of timestamp, formatted as YYYYMMDDTHHMMSSZ. crm_rate_limiter: RateLimiter object for CRM API client. Returns: None Raises: LoadDataPipelineException: An error with loading data has occurred. """ _ = configs # Get the projects for which we will retrieve the IAM policies. try: project_numbers = dao.select_project_numbers(RESOURCE_NAME, cycle_timestamp) except MySQLError as e: raise LoadDataPipelineError(e) crm_client = CloudResourceManagerClient(rate_limiter=crm_rate_limiter) try: # Retrieve data from GCP. # Flatten the iterator since we will use it twice, and it is faster # than cloning to 2 iterators. iam_policies_map = crm_client.get_project_iam_policies( RESOURCE_NAME, project_numbers) # TODO: Investigate improving so the pylint disable isn't needed. # pylint: disable=redefined-variable-type iam_policies_map = list(iam_policies_map) # Flatten and relationalize data for upload to cloud sql. flattened_iam_policies = ( transform_util.flatten_iam_policies(iam_policies_map)) except ApiExecutionError as e: raise LoadDataPipelineError(e) # Load flattened iam policies into cloud sql. # Load raw iam policies into cloud sql. # A separate table is used to store the raw iam policies because it is # much faster than updating these individually into the projects table. try: dao.load_data(RESOURCE_NAME, cycle_timestamp, flattened_iam_policies) for i in iam_policies_map: i['iam_policy'] = json.dumps(i['iam_policy']) dao.load_data(RAW_PROJECT_IAM_POLICIES, cycle_timestamp, iam_policies_map) except (CSVFileError, MySQLError) as e: raise LoadDataPipelineError(e)
def run(dao, cycle_timestamp, configs, crm_rate_limiter): """Runs the load IAM policies data pipeline. Args: dao: Data access object. cycle_timestamp: String of timestamp, formatted as YYYYMMDDTHHMMSSZ. configs: Dictionary of configurations. crm_rate_limiter: RateLimiter object for CRM API client. Returns: None Raises: LoadDataPipelineException: An error with loading data has occurred. """ org_id = configs.get('organization_id') # Check if the placeholder is replaced in the config/flag. if org_id == '<organization id>': raise LoadDataPipelineError('No organization id is specified.') crm_client = CloudResourceManagerClient(rate_limiter=crm_rate_limiter) try: # Retrieve data from GCP. # Flatten the iterator since we will use it twice, and it is faster # than cloning to 2 iterators. iam_policies_map = crm_client.get_org_iam_policies( RESOURCE_NAME, org_id) iam_policies_map = list(iam_policies_map) # Flatten and relationalize data for upload to cloud sql. flattened_iam_policies = ( transform_util.flatten_iam_policies(iam_policies_map)) except ApiExecutionError as e: raise LoadDataPipelineError(e) # Load flattened iam policies into cloud sql. # Load raw iam policies into cloud sql. # A separate table is used to store the raw iam policies because it is # much faster than updating these individually into the projects table. try: dao.load_data(RESOURCE_NAME, cycle_timestamp, flattened_iam_policies) for i in iam_policies_map: i['iam_policy'] = json.dumps(i['iam_policy']) dao.load_data(RAW_ORG_IAM_POLICIES, cycle_timestamp, iam_policies_map) except (CSVFileError, MySQLError) as e: raise LoadDataPipelineError(e)
def test_can_flatten_project_iam_policies(self): flattened_iam_policies = transform_util.flatten_iam_policies( FAKE_PROJECT_IAM_POLICY_MAP) self.assertEquals(EXPECTED_FLATTENED_PROJECT_IAM_POLICY, list(flattened_iam_policies))