def _retrieve(self):
        """Retrieve the project cloudsql instances from GCP.

        Returns:
            list: Instances as per-project dictionary.
                Example: [{project_number: project_number,
                          instances: instances_dict}]

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        # Get the projects for which we will retrieve the instances.
        try:
            project_numbers = self.dao.get_project_numbers(
                self.PROJECTS_RESOURCE_NAME, self.cycle_timestamp)
        except dao_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)

        instances_maps = []
        for project_number in project_numbers:
            instances = self.safe_api_call('get_instances', project_number)
            if instances:
                instances_map = {
                    'project_number': project_number,
                    'instances': instances
                }
                instances_maps.append(instances_map)
        return instances_maps
    def _retrieve(self):
        """Retrieve the org IAM policies from GCP.

        Returns:
            iam_policies: List of IAM policies as per-org dictionary.
                Example: [{'org_id': org_id,
                           'iam_policy': iam_policy}]
                https://cloud.google.com/resource-manager/reference/rest/Shared.Types/Policy

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        try:
            orgs = self.dao.get_organizations(self.RESOURCE_NAME,
                                              self.cycle_timestamp)
        except da_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)

        iam_policies = []
        for org in orgs:
            try:
                iam_policy = self.api_client.get_org_iam_policies(
                    self.RESOURCE_NAME, org.id)
                iam_policies.append(iam_policy)
            except api_errors.ApiExecutionError as e:
                LOGGER.error('Unable to get IAM policies for org %s:\n%s',
                             org.id, e)
        return iam_policies
    def _retrieve_dataset_project_map(self, project_ids):
        """Retrieve the bigquery datasets for all requested project ids.

        Args:
            project_ids: A list of project ids.

        Returns:
            A list of objects like:
                [[{'datasetId': 'test', 'projectId': 'bq-test'},
                 {'datasetId': 'test', 'projectId': 'bq-test'}],
                [{'datasetId': 'test', 'projectId': 'bq-test'},
                 {'datasetId': 'test', 'projectId': 'bq-test'}]]

        Raises: inventory_errors.LoadDataPipelineError when we encounter an
            error in the underlying bigquery API.
        """
        dataset_project_map = []
        for project_id in project_ids:
            try:
                result = self.api_client.get_datasets_for_projectid(project_id)
            except api_errors.ApiExecutionError as e:
                raise inventory_errors.LoadDataPipelineError(e)

            if result:
                dataset_project_map.append(result)

        return dataset_project_map
    def _retrieve(self):
        """Retrieve forwarding rules from GCP.

        Get all the projects in the current snapshot and retrieve the
        compute forwarding rules for each.

        Returns:
            A dict mapping projects with their forwarding rules (list):
            {project_id: [forwarding_rules]}
        """
        projects = proj_dao.ProjectDao().get_projects(self.cycle_timestamp)
        forwarding_rules = {}
        for project in projects:
            project_fwd_rules = []
            try:
                response = self.api_client.get_forwarding_rules(project.id)
                for page in response:
                    items = page.get('items', {})
                    for region_fwd_rules in items.values():
                        fwd_rules = region_fwd_rules.get('forwardingRules', [])
                        project_fwd_rules.extend(fwd_rules)
            except api_errors.ApiExecutionError as e:
                LOGGER.error(inventory_errors.LoadDataPipelineError(e))
            if project_fwd_rules:
                forwarding_rules[project.id] = project_fwd_rules
        return forwarding_rules
Exemple #5
0
    def _retrieve(self):
        """Retrieve the project buckets from GCP.

        Args:
            None

        Returns:
            buckets_maps: List of buckets as per-project dictionary.
                Example: [{project_number: project_number,
                          buckets: buckets_json}]

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        # Get the projects for which we will retrieve the buckets.
        try:
            project_numbers = self.dao.get_project_numbers(
                self.PROJECTS_RESOURCE_NAME, self.cycle_timestamp)
        except data_access_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)
        # Retrieve data from GCP.
        buckets_maps = []
        for project_number in project_numbers:
            try:
                buckets = self.api_client.get_buckets(
                    project_number)
                buckets_map = {'project_number': project_number,
                               'buckets': buckets}
                buckets_maps.append(buckets_map)
            except api_errors.ApiExecutionError as e:
                LOGGER.error(
                    'Unable to get buckets for project %s:\n%s',
                    project_number, e)
        return buckets_maps
    def _retrieve(self):
        """Retrieve the project cloudsql instances from GCP.

        Args:
            None

        Returns:
            instances_maps: List of instances as per-project dictionary.
                Example: [{project_number: project_number,
                          instances: instances_dict}]

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        # Get the projects for which we will retrieve the instances.
        try:
            project_numbers = self.dao.get_project_numbers(
                self.PROJECTS_RESOURCE_NAME, self.cycle_timestamp)
        except data_access_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)

        instances_maps = []
        for project_number in project_numbers:
            try:
                instances = self.api_client.get_instances(project_number)
                instances_map = {
                    'project_number': project_number,
                    'instances': instances
                }
                instances_maps.append(instances_map)
            except api_errors.ApiExecutionError as e:
                LOGGER.error(
                    'Unable to get cloudsql instances for project %s:\n%s',
                    project_number, e)
        return instances_maps
Exemple #7
0
    def _retrieve(self):
        """Retrieve the project buckets acls from GCP.

        Returns:
            list: Bucket acls as per-bucket dictionary.
                Example: [{bucket_name: 'bucket name',
                          acl: bucket_acls_json}]

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        buckets_acls = []

        # Get the projects for which we will retrieve the buckets.
        try:
            raw_buckets = self.dao.get_raw_buckets(self.cycle_timestamp)
        except dao_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)

        for result in raw_buckets:
            try:
                raw_bucket_json = json.loads(result.get('raw_bucket'))
                bucket_acls = raw_bucket_json.get('acl')
            except ValueError as err:
                LOGGER.warn('Invalid json: %s', err)
                continue

            if bucket_acls:
                buckets_acls.append({
                    'bucket_name': result.get('bucket_id'),
                    'acl': bucket_acls
                })
        return buckets_acls
Exemple #8
0
    def _retrieve(self):
        """Retrieve the folder IAM policies from GCP.

        Returns:
            list: List of IAM policies as per-folder dictionary.
                Example: [{'folder_id': folder_id,
                           'iam_policy': iam_policy}]

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        try:
            folders = self.dao.get_folders(
                self.RESOURCE_NAME, self.cycle_timestamp)
        except da_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)

        iam_policies = []
        for folder in folders:
            iam_policy = self.safe_api_call('get_folder_iam_policies',
                                            self.RESOURCE_NAME,
                                            folder.id)
            if iam_policy:
                iam_policies.append(iam_policy)
        return iam_policies
Exemple #9
0
    def _retrieve(self):
        """Retrieve the folder IAM policies from GCP.

        Returns:
            list: List of IAM policies as per-folder dictionary.
                Example: [{'folder_id': folder_id,
                           'iam_policy': iam_policy}]

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        try:
            folders = self.dao.get_folders(self.RESOURCE_NAME,
                                           self.cycle_timestamp)
        except da_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)

        iam_policies = []
        for folder in folders:
            try:
                iam_policy = self.api_client.get_folder_iam_policies(
                    self.RESOURCE_NAME, folder.id)
                iam_policies.append(iam_policy)
            except api_errors.ApiExecutionError as e:
                LOGGER.error('Unable to get IAM policies for folder %s:\n%s',
                             folder.id, e)
        return iam_policies
Exemple #10
0
    def _retrieve(self):
        """Retrieve the project IAM policies from GCP.

        Returns:
            list: IAM policies as per-org dictionary.
                Example: [{project_number: project_number,
                          iam_policy: iam_policy}]
                https://cloud.google.com/resource-manager/reference/rest/Shared.Types/Policy

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        # Get the projects for which we will retrieve the IAM policies.
        try:
            project_numbers = self.dao.get_project_numbers(
                self.RESOURCE_NAME, self.cycle_timestamp)
        except data_access_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)

        # Retrieve data from GCP.
        # Not using iterator since we will use the iam_policy_maps twice.
        iam_policy_maps = []
        for project_number in project_numbers:
            try:
                iam_policy = self.api_client.get_project_iam_policies(
                    self.RESOURCE_NAME, project_number)
                iam_policy_map = {
                    'project_number': project_number,
                    'iam_policy': iam_policy
                }
                iam_policy_maps.append(iam_policy_map)
            except api_errors.ApiExecutionError as e:
                LOGGER.error('Unable to get IAM policies for project %s:\n%s',
                             project_number, e)
        return iam_policy_maps
Exemple #11
0
    def _retrieve(self):
        """Retrieve the folder resources from GCP.

        Returns:
            An iterable of resource manager folder search response.
        """
        try:
            return self.api_client.get_folders(
                self.RESOURCE_NAME)
        except api_errors.ApiExecutionError as e:
            raise inventory_errors.LoadDataPipelineError(e)
Exemple #12
0
    def _retrieve(self):
        """Retrieve the organizations resources from GCP.

        Returns:
            iterable: resource manager org search response.
                https://cloud.google.com/resource-manager/reference/rest/v1/organizations/search
        """
        try:
            return self.api_client.get_organizations(self.RESOURCE_NAME)
        except api_errors.ApiExecutionError as e:
            raise inventory_errors.LoadDataPipelineError(e)
    def _retrieve_bigquery_projectids(self):
        """Retrieve a list of bigquery projectids.

        Returns: A list of project ids.

        Raises: inventory_errors.LoadDataPipelineError when we encounter an
        error in the underlying bigquery API.
        """
        try:
            return self.api_client.get_bigquery_projectids()
        except api_errors.ApiExecutionError as e:
            raise inventory_errors.LoadDataPipelineError(e)
Exemple #14
0
    def _retrieve(self):
        """Retrieve the project resources from GCP.

        Returns:
            iterable: resource manager project list response.
                https://cloud.google.com/resource-manager/reference/rest/v1/projects/list#response-body
        """
        try:
            return self.api_client.get_projects(
                self.RESOURCE_NAME, lifecycleState=LifecycleState.ACTIVE)
        except api_errors.ApiExecutionError as e:
            raise inventory_errors.LoadDataPipelineError(e)
Exemple #15
0
    def _retrieve(self):
        """Retrieve the groups from GSuite.

        Returns:
            A list of group list objects from the Admin SDK.

        Raises:
            LoadDataPipelineException: An error with loading data has occurred.
        """
        try:
            return self.api_client.get_groups()
        except api_errors.ApiExecutionError as e:
            raise inventory_errors.LoadDataPipelineError(e)
    def _retrieve_dataset_access(self, project_id, dataset_id):
        """Retrieve the bigquery dataset resources from GCP.

        Args:
            project_id: A project id.
            dataset_id: A dataset id.

        Returns: See bigquery.get_dataset_access().

        Raises: inventory_errors.LoadDataPipelineError when we encounter an
        error in the underlying bigquery API.
        """
        try:
            return self.api_client.get_dataset_access(project_id, dataset_id)
        except api_errors.ApiExecutionError as e:
            raise inventory_errors.LoadDataPipelineError(e)
Exemple #17
0
    def _fetch_groups_from_dao(self):
        """Fetch the latest group ids previously stored in Cloud SQL.

        Returns:
            list: A list of group ids.

        Raises:
            inventory_errors.LoadDataPipelineException: An error with loading
                data has occurred.
        """
        try:
            group_ids = self.dao.select_group_ids(self.RESOURCE_NAME,
                                                  self.cycle_timestamp)
        except dao_errors.MySQLError as e:
            raise inventory_errors.LoadDataPipelineError(e)

        return group_ids
Exemple #18
0
    def _retrieve(self):
        """Retrieve instance groups from GCP.

        Get all the projects in the current snapshot and retrieve the
        compute instance groups for each.

        Returns:
            A dict mapping projects with their instance groups (list):
            {project_id: [instance groups]}
        """
        projects = proj_dao.ProjectDao().get_projects(self.cycle_timestamp)
        igs = {}
        for project in projects:
            try:
                project_igs = self.api_client.get_instance_groups(project.id)
                if project_igs:
                    igs[project.id] = project_igs
            except api_errors.ApiExecutionError as e:
                LOGGER.error(inventory_errors.LoadDataPipelineError(e))
        return igs
Exemple #19
0
    def _load(self, resource_name, data):
        """ Loads data into Forseti storage.

        Args:
            resource_name (str): Resource name.
            data (iterable or list): Data to be uploaded.

        Raises:
            LoadDataPipelineError: An error with loading data has occurred.
        """
        if not data:
            LOGGER.warn('No %s data to load into Cloud SQL, continuing...',
                        resource_name)
            return

        try:
            self.dao.load_data(resource_name, self.cycle_timestamp, data)
        except (data_access_errors.CSVFileError,
                data_access_errors.MySQLError) as e:
            raise inventory_errors.LoadDataPipelineError(e)
    def _retrieve(self, group_ids):  # pylint: disable=arguments-differ
        """Retrieve the membership for a list of given GSuite groups.

        Returns:
            A list of tuples (group_id, group_members) from the Admin SDK, e.g.
            (string, [])
        """

        group_members_map = []

        for group_id in group_ids:
            try:
                group_members = self.api_client.get_group_members(group_id)
            except api_errors.ApiExecutionError as e:
                raise inventory_errors.LoadDataPipelineError(e)

            group_members_map.append((group_id, group_members))
            LOGGER.debug('Retrieved members from %s: %d',
                         group_id,
                         len(group_members))

        return group_members_map
Exemple #21
0
    def _retrieve(self):
        """Retrieve forwarding rules from GCP.

        Get all the projects in the current snapshot and retrieve the
        compute forwarding rules for each.

        Returns:
            A dict mapping projects with their forwarding rules (list):
            {project_id: [forwarding_rules]}
        """
        projects = proj_dao.ProjectDao().get_projects(self.cycle_timestamp)
        forwarding_rules = {}
        for project in projects:
            project_fwd_rules = []
            try:
                project_fwd_rules = self.api_client.get_forwarding_rules(
                    project.id)
                if project_fwd_rules:
                    forwarding_rules[project.id] = project_fwd_rules
            except api_errors.ApiExecutionError as e:
                LOGGER.error(inventory_errors.LoadDataPipelineError(e))
        return forwarding_rules
Exemple #22
0
    def _retrieve_dataset_access(self, project_id, dataset_id):
        """Retrieve the bigquery dataset resources from GCP.

        Args:
            project_id (str): A project id.
            dataset_id (str): A dataset id.

        Returns:
            list: Access lists for a given project_id and dataset_id.
                [{'role': 'WRITER', 'specialGroup': 'projectWriters'},
                 {'role': 'OWNER', 'specialGroup': 'projectOwners'},
                 {'role': 'OWNER', 'userByEmail': '*****@*****.**'},
                 {'role': 'READER', 'specialGroup': 'projectReaders'}]

        Raises:
            inventory_errors.LoadDataPipelineError when we encounter an
                error in the underlying bigquery API.
        """
        try:
            return self.api_client.get_dataset_access(project_id, dataset_id)
        except api_errors.ApiExecutionError as e:
            raise inventory_errors.LoadDataPipelineError(e)
Exemple #23
0
    def _retrieve(self):
        """Retrieve instances from GCP.

        Get all the projects in the current snapshot and retrieve the
        compute instances for each.

        Returns:
            dict: A map of projects with their instances (list):
            {project_id: [instances]}
        """
        projects = (proj_dao
                    .ProjectDao(self.global_configs)
                    .get_projects(self.cycle_timestamp))
        instances = {}
        for project in projects:
            try:
                project_instances = self.api_client.get_instances(project.id)
                if project_instances:
                    instances[project.id] = project_instances
            except api_errors.ApiExecutionError as e:
                LOGGER.error(inventory_errors.LoadDataPipelineError(e))
        return instances
def _build_pipelines(cycle_timestamp, configs, **kwargs):
    """Build the pipelines to load data.

    Args:
        cycle_timestamp: String of timestamp, formatted as YYYYMMDDTHHMMSSZ.
        configs: Dictionary of configurations.
        kwargs: Extra configs.

    Returns:
        List of pipelines that will be run.

    Raises: inventory_errors.LoadDataPipelineError.
    """

    pipelines = []

    # Commonly used clients for shared ratelimiter re-use.
    crm_v1_api_client = crm.CloudResourceManagerClient()
    dao = kwargs.get('dao')
    gcs_api_client = gcs.StorageClient()

    organization_dao_name = 'organization_dao'
    project_dao_name = 'project_dao'

    # The order here matters, e.g. groups_pipeline must come before
    # group_members_pipeline.
    pipelines = [
        load_orgs_pipeline.LoadOrgsPipeline(cycle_timestamp, configs,
                                            crm_v1_api_client,
                                            kwargs.get(organization_dao_name)),
        load_org_iam_policies_pipeline.LoadOrgIamPoliciesPipeline(
            cycle_timestamp, configs, crm_v1_api_client,
            kwargs.get(organization_dao_name)),
        load_projects_pipeline.LoadProjectsPipeline(
            cycle_timestamp, configs, crm_v1_api_client,
            kwargs.get(project_dao_name)),
        load_projects_iam_policies_pipeline.LoadProjectsIamPoliciesPipeline(
            cycle_timestamp, configs, crm_v1_api_client,
            kwargs.get(project_dao_name)),
        load_projects_buckets_pipeline.LoadProjectsBucketsPipeline(
            cycle_timestamp, configs, gcs_api_client,
            kwargs.get(project_dao_name)),
        load_projects_buckets_acls_pipeline.LoadProjectsBucketsAclsPipeline(
            cycle_timestamp, configs, gcs_api_client,
            kwargs.get('bucket_dao')),
        load_projects_cloudsql_pipeline.LoadProjectsCloudsqlPipeline(
            cycle_timestamp, configs, cloudsql.CloudsqlClient(),
            kwargs.get('cloudsql_dao')),
        load_forwarding_rules_pipeline.LoadForwardingRulesPipeline(
            cycle_timestamp, configs, compute.ComputeClient(),
            kwargs.get('fwd_rules_dao')),
        load_folders_pipeline.LoadFoldersPipeline(
            cycle_timestamp, configs,
            crm.CloudResourceManagerClient(version='v2beta1'), dao),
        load_bigquery_datasets_pipeline.LoadBigQueryDatasetsPipeline(
            cycle_timestamp, configs, bq.BigQueryClient(), dao),
        load_firewall_rules_pipeline.LoadFirewallRulesPipeline(
            cycle_timestamp, configs, compute.ComputeClient(version='beta'),
            kwargs.get(project_dao_name)),
    ]

    if configs.get('inventory_groups'):
        if util.can_inventory_groups(configs):
            admin_api_client = ad.AdminDirectoryClient()
            pipelines.extend([
                load_groups_pipeline.LoadGroupsPipeline(
                    cycle_timestamp, configs, admin_api_client, dao),
                load_group_members_pipeline.LoadGroupMembersPipeline(
                    cycle_timestamp, configs, admin_api_client, dao)
            ])
        else:
            raise inventory_errors.LoadDataPipelineError(
                'Unable to inventory groups with specified arguments:\n%s',
                configs)

    return pipelines