def _retrieve(self): """Retrieve the project cloudsql instances from GCP. Returns: list: Instances as per-project dictionary. Example: [{project_number: project_number, instances: instances_dict}] Raises: LoadDataPipelineException: An error with loading data has occurred. """ # Get the projects for which we will retrieve the instances. try: project_numbers = self.dao.get_project_numbers( self.PROJECTS_RESOURCE_NAME, self.cycle_timestamp) except dao_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) instances_maps = [] for project_number in project_numbers: instances = self.safe_api_call('get_instances', project_number) if instances: instances_map = { 'project_number': project_number, 'instances': instances } instances_maps.append(instances_map) return instances_maps
def _retrieve(self): """Retrieve the org IAM policies from GCP. Returns: iam_policies: List of IAM policies as per-org dictionary. Example: [{'org_id': org_id, 'iam_policy': iam_policy}] https://cloud.google.com/resource-manager/reference/rest/Shared.Types/Policy Raises: LoadDataPipelineException: An error with loading data has occurred. """ try: orgs = self.dao.get_organizations(self.RESOURCE_NAME, self.cycle_timestamp) except da_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) iam_policies = [] for org in orgs: try: iam_policy = self.api_client.get_org_iam_policies( self.RESOURCE_NAME, org.id) iam_policies.append(iam_policy) except api_errors.ApiExecutionError as e: LOGGER.error('Unable to get IAM policies for org %s:\n%s', org.id, e) return iam_policies
def _retrieve_dataset_project_map(self, project_ids): """Retrieve the bigquery datasets for all requested project ids. Args: project_ids: A list of project ids. Returns: A list of objects like: [[{'datasetId': 'test', 'projectId': 'bq-test'}, {'datasetId': 'test', 'projectId': 'bq-test'}], [{'datasetId': 'test', 'projectId': 'bq-test'}, {'datasetId': 'test', 'projectId': 'bq-test'}]] Raises: inventory_errors.LoadDataPipelineError when we encounter an error in the underlying bigquery API. """ dataset_project_map = [] for project_id in project_ids: try: result = self.api_client.get_datasets_for_projectid(project_id) except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e) if result: dataset_project_map.append(result) return dataset_project_map
def _retrieve(self): """Retrieve forwarding rules from GCP. Get all the projects in the current snapshot and retrieve the compute forwarding rules for each. Returns: A dict mapping projects with their forwarding rules (list): {project_id: [forwarding_rules]} """ projects = proj_dao.ProjectDao().get_projects(self.cycle_timestamp) forwarding_rules = {} for project in projects: project_fwd_rules = [] try: response = self.api_client.get_forwarding_rules(project.id) for page in response: items = page.get('items', {}) for region_fwd_rules in items.values(): fwd_rules = region_fwd_rules.get('forwardingRules', []) project_fwd_rules.extend(fwd_rules) except api_errors.ApiExecutionError as e: LOGGER.error(inventory_errors.LoadDataPipelineError(e)) if project_fwd_rules: forwarding_rules[project.id] = project_fwd_rules return forwarding_rules
def _retrieve(self): """Retrieve the project buckets from GCP. Args: None Returns: buckets_maps: List of buckets as per-project dictionary. Example: [{project_number: project_number, buckets: buckets_json}] Raises: LoadDataPipelineException: An error with loading data has occurred. """ # Get the projects for which we will retrieve the buckets. try: project_numbers = self.dao.get_project_numbers( self.PROJECTS_RESOURCE_NAME, self.cycle_timestamp) except data_access_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) # Retrieve data from GCP. buckets_maps = [] for project_number in project_numbers: try: buckets = self.api_client.get_buckets( project_number) buckets_map = {'project_number': project_number, 'buckets': buckets} buckets_maps.append(buckets_map) except api_errors.ApiExecutionError as e: LOGGER.error( 'Unable to get buckets for project %s:\n%s', project_number, e) return buckets_maps
def _retrieve(self): """Retrieve the project cloudsql instances from GCP. Args: None Returns: instances_maps: List of instances as per-project dictionary. Example: [{project_number: project_number, instances: instances_dict}] Raises: LoadDataPipelineException: An error with loading data has occurred. """ # Get the projects for which we will retrieve the instances. try: project_numbers = self.dao.get_project_numbers( self.PROJECTS_RESOURCE_NAME, self.cycle_timestamp) except data_access_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) instances_maps = [] for project_number in project_numbers: try: instances = self.api_client.get_instances(project_number) instances_map = { 'project_number': project_number, 'instances': instances } instances_maps.append(instances_map) except api_errors.ApiExecutionError as e: LOGGER.error( 'Unable to get cloudsql instances for project %s:\n%s', project_number, e) return instances_maps
def _retrieve(self): """Retrieve the project buckets acls from GCP. Returns: list: Bucket acls as per-bucket dictionary. Example: [{bucket_name: 'bucket name', acl: bucket_acls_json}] Raises: LoadDataPipelineException: An error with loading data has occurred. """ buckets_acls = [] # Get the projects for which we will retrieve the buckets. try: raw_buckets = self.dao.get_raw_buckets(self.cycle_timestamp) except dao_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) for result in raw_buckets: try: raw_bucket_json = json.loads(result.get('raw_bucket')) bucket_acls = raw_bucket_json.get('acl') except ValueError as err: LOGGER.warn('Invalid json: %s', err) continue if bucket_acls: buckets_acls.append({ 'bucket_name': result.get('bucket_id'), 'acl': bucket_acls }) return buckets_acls
def _retrieve(self): """Retrieve the folder IAM policies from GCP. Returns: list: List of IAM policies as per-folder dictionary. Example: [{'folder_id': folder_id, 'iam_policy': iam_policy}] Raises: LoadDataPipelineException: An error with loading data has occurred. """ try: folders = self.dao.get_folders( self.RESOURCE_NAME, self.cycle_timestamp) except da_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) iam_policies = [] for folder in folders: iam_policy = self.safe_api_call('get_folder_iam_policies', self.RESOURCE_NAME, folder.id) if iam_policy: iam_policies.append(iam_policy) return iam_policies
def _retrieve(self): """Retrieve the folder IAM policies from GCP. Returns: list: List of IAM policies as per-folder dictionary. Example: [{'folder_id': folder_id, 'iam_policy': iam_policy}] Raises: LoadDataPipelineException: An error with loading data has occurred. """ try: folders = self.dao.get_folders(self.RESOURCE_NAME, self.cycle_timestamp) except da_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) iam_policies = [] for folder in folders: try: iam_policy = self.api_client.get_folder_iam_policies( self.RESOURCE_NAME, folder.id) iam_policies.append(iam_policy) except api_errors.ApiExecutionError as e: LOGGER.error('Unable to get IAM policies for folder %s:\n%s', folder.id, e) return iam_policies
def _retrieve(self): """Retrieve the project IAM policies from GCP. Returns: list: IAM policies as per-org dictionary. Example: [{project_number: project_number, iam_policy: iam_policy}] https://cloud.google.com/resource-manager/reference/rest/Shared.Types/Policy Raises: LoadDataPipelineException: An error with loading data has occurred. """ # Get the projects for which we will retrieve the IAM policies. try: project_numbers = self.dao.get_project_numbers( self.RESOURCE_NAME, self.cycle_timestamp) except data_access_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) # Retrieve data from GCP. # Not using iterator since we will use the iam_policy_maps twice. iam_policy_maps = [] for project_number in project_numbers: try: iam_policy = self.api_client.get_project_iam_policies( self.RESOURCE_NAME, project_number) iam_policy_map = { 'project_number': project_number, 'iam_policy': iam_policy } iam_policy_maps.append(iam_policy_map) except api_errors.ApiExecutionError as e: LOGGER.error('Unable to get IAM policies for project %s:\n%s', project_number, e) return iam_policy_maps
def _retrieve(self): """Retrieve the folder resources from GCP. Returns: An iterable of resource manager folder search response. """ try: return self.api_client.get_folders( self.RESOURCE_NAME) except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e)
def _retrieve(self): """Retrieve the organizations resources from GCP. Returns: iterable: resource manager org search response. https://cloud.google.com/resource-manager/reference/rest/v1/organizations/search """ try: return self.api_client.get_organizations(self.RESOURCE_NAME) except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e)
def _retrieve_bigquery_projectids(self): """Retrieve a list of bigquery projectids. Returns: A list of project ids. Raises: inventory_errors.LoadDataPipelineError when we encounter an error in the underlying bigquery API. """ try: return self.api_client.get_bigquery_projectids() except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e)
def _retrieve(self): """Retrieve the project resources from GCP. Returns: iterable: resource manager project list response. https://cloud.google.com/resource-manager/reference/rest/v1/projects/list#response-body """ try: return self.api_client.get_projects( self.RESOURCE_NAME, lifecycleState=LifecycleState.ACTIVE) except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e)
def _retrieve(self): """Retrieve the groups from GSuite. Returns: A list of group list objects from the Admin SDK. Raises: LoadDataPipelineException: An error with loading data has occurred. """ try: return self.api_client.get_groups() except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e)
def _retrieve_dataset_access(self, project_id, dataset_id): """Retrieve the bigquery dataset resources from GCP. Args: project_id: A project id. dataset_id: A dataset id. Returns: See bigquery.get_dataset_access(). Raises: inventory_errors.LoadDataPipelineError when we encounter an error in the underlying bigquery API. """ try: return self.api_client.get_dataset_access(project_id, dataset_id) except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e)
def _fetch_groups_from_dao(self): """Fetch the latest group ids previously stored in Cloud SQL. Returns: list: A list of group ids. Raises: inventory_errors.LoadDataPipelineException: An error with loading data has occurred. """ try: group_ids = self.dao.select_group_ids(self.RESOURCE_NAME, self.cycle_timestamp) except dao_errors.MySQLError as e: raise inventory_errors.LoadDataPipelineError(e) return group_ids
def _retrieve(self): """Retrieve instance groups from GCP. Get all the projects in the current snapshot and retrieve the compute instance groups for each. Returns: A dict mapping projects with their instance groups (list): {project_id: [instance groups]} """ projects = proj_dao.ProjectDao().get_projects(self.cycle_timestamp) igs = {} for project in projects: try: project_igs = self.api_client.get_instance_groups(project.id) if project_igs: igs[project.id] = project_igs except api_errors.ApiExecutionError as e: LOGGER.error(inventory_errors.LoadDataPipelineError(e)) return igs
def _load(self, resource_name, data): """ Loads data into Forseti storage. Args: resource_name (str): Resource name. data (iterable or list): Data to be uploaded. Raises: LoadDataPipelineError: An error with loading data has occurred. """ if not data: LOGGER.warn('No %s data to load into Cloud SQL, continuing...', resource_name) return try: self.dao.load_data(resource_name, self.cycle_timestamp, data) except (data_access_errors.CSVFileError, data_access_errors.MySQLError) as e: raise inventory_errors.LoadDataPipelineError(e)
def _retrieve(self, group_ids): # pylint: disable=arguments-differ """Retrieve the membership for a list of given GSuite groups. Returns: A list of tuples (group_id, group_members) from the Admin SDK, e.g. (string, []) """ group_members_map = [] for group_id in group_ids: try: group_members = self.api_client.get_group_members(group_id) except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e) group_members_map.append((group_id, group_members)) LOGGER.debug('Retrieved members from %s: %d', group_id, len(group_members)) return group_members_map
def _retrieve(self): """Retrieve forwarding rules from GCP. Get all the projects in the current snapshot and retrieve the compute forwarding rules for each. Returns: A dict mapping projects with their forwarding rules (list): {project_id: [forwarding_rules]} """ projects = proj_dao.ProjectDao().get_projects(self.cycle_timestamp) forwarding_rules = {} for project in projects: project_fwd_rules = [] try: project_fwd_rules = self.api_client.get_forwarding_rules( project.id) if project_fwd_rules: forwarding_rules[project.id] = project_fwd_rules except api_errors.ApiExecutionError as e: LOGGER.error(inventory_errors.LoadDataPipelineError(e)) return forwarding_rules
def _retrieve_dataset_access(self, project_id, dataset_id): """Retrieve the bigquery dataset resources from GCP. Args: project_id (str): A project id. dataset_id (str): A dataset id. Returns: list: Access lists for a given project_id and dataset_id. [{'role': 'WRITER', 'specialGroup': 'projectWriters'}, {'role': 'OWNER', 'specialGroup': 'projectOwners'}, {'role': 'OWNER', 'userByEmail': '*****@*****.**'}, {'role': 'READER', 'specialGroup': 'projectReaders'}] Raises: inventory_errors.LoadDataPipelineError when we encounter an error in the underlying bigquery API. """ try: return self.api_client.get_dataset_access(project_id, dataset_id) except api_errors.ApiExecutionError as e: raise inventory_errors.LoadDataPipelineError(e)
def _retrieve(self): """Retrieve instances from GCP. Get all the projects in the current snapshot and retrieve the compute instances for each. Returns: dict: A map of projects with their instances (list): {project_id: [instances]} """ projects = (proj_dao .ProjectDao(self.global_configs) .get_projects(self.cycle_timestamp)) instances = {} for project in projects: try: project_instances = self.api_client.get_instances(project.id) if project_instances: instances[project.id] = project_instances except api_errors.ApiExecutionError as e: LOGGER.error(inventory_errors.LoadDataPipelineError(e)) return instances
def _build_pipelines(cycle_timestamp, configs, **kwargs): """Build the pipelines to load data. Args: cycle_timestamp: String of timestamp, formatted as YYYYMMDDTHHMMSSZ. configs: Dictionary of configurations. kwargs: Extra configs. Returns: List of pipelines that will be run. Raises: inventory_errors.LoadDataPipelineError. """ pipelines = [] # Commonly used clients for shared ratelimiter re-use. crm_v1_api_client = crm.CloudResourceManagerClient() dao = kwargs.get('dao') gcs_api_client = gcs.StorageClient() organization_dao_name = 'organization_dao' project_dao_name = 'project_dao' # The order here matters, e.g. groups_pipeline must come before # group_members_pipeline. pipelines = [ load_orgs_pipeline.LoadOrgsPipeline(cycle_timestamp, configs, crm_v1_api_client, kwargs.get(organization_dao_name)), load_org_iam_policies_pipeline.LoadOrgIamPoliciesPipeline( cycle_timestamp, configs, crm_v1_api_client, kwargs.get(organization_dao_name)), load_projects_pipeline.LoadProjectsPipeline( cycle_timestamp, configs, crm_v1_api_client, kwargs.get(project_dao_name)), load_projects_iam_policies_pipeline.LoadProjectsIamPoliciesPipeline( cycle_timestamp, configs, crm_v1_api_client, kwargs.get(project_dao_name)), load_projects_buckets_pipeline.LoadProjectsBucketsPipeline( cycle_timestamp, configs, gcs_api_client, kwargs.get(project_dao_name)), load_projects_buckets_acls_pipeline.LoadProjectsBucketsAclsPipeline( cycle_timestamp, configs, gcs_api_client, kwargs.get('bucket_dao')), load_projects_cloudsql_pipeline.LoadProjectsCloudsqlPipeline( cycle_timestamp, configs, cloudsql.CloudsqlClient(), kwargs.get('cloudsql_dao')), load_forwarding_rules_pipeline.LoadForwardingRulesPipeline( cycle_timestamp, configs, compute.ComputeClient(), kwargs.get('fwd_rules_dao')), load_folders_pipeline.LoadFoldersPipeline( cycle_timestamp, configs, crm.CloudResourceManagerClient(version='v2beta1'), dao), load_bigquery_datasets_pipeline.LoadBigQueryDatasetsPipeline( cycle_timestamp, configs, bq.BigQueryClient(), dao), load_firewall_rules_pipeline.LoadFirewallRulesPipeline( cycle_timestamp, configs, compute.ComputeClient(version='beta'), kwargs.get(project_dao_name)), ] if configs.get('inventory_groups'): if util.can_inventory_groups(configs): admin_api_client = ad.AdminDirectoryClient() pipelines.extend([ load_groups_pipeline.LoadGroupsPipeline( cycle_timestamp, configs, admin_api_client, dao), load_group_members_pipeline.LoadGroupMembersPipeline( cycle_timestamp, configs, admin_api_client, dao) ]) else: raise inventory_errors.LoadDataPipelineError( 'Unable to inventory groups with specified arguments:\n%s', configs) return pipelines