Beispiel #1
0
def _upload_csv(output_path, now_utc, csv_name):
    """Upload CSV to Cloud Storage.

    Args:
        output_path: The output path for the csv.
        now_utc: The UTC timestamp of "now".
        csv_name: The csv_name.
    """

    from google.cloud.security.common.gcp_api import storage

    output_filename = _get_output_filename(now_utc)

    # If output path was specified, copy the csv temp file either to
    # a local file or upload it to Google Cloud Storage.
    full_output_path = os.path.join(output_path, output_filename)
    LOGGER.info('Output path: %s', full_output_path)

    if output_path.startswith('gs://'):
        # An output path for GCS must be the full
        # `gs://bucket-name/path/for/output`
        storage_client = storage.StorageClient()
        storage_client.put_text_file(csv_name, full_output_path)
    else:
        # Otherwise, just copy it to the output path.
        shutil.copy(csv_name, full_output_path)
Beispiel #2
0
 def test_get_bucket_and_path_from(self):
     """Given a valid bucket object path, return the bucket and path."""
     expected_bucket = 'my-bucket'
     expected_obj_path = 'path/to/object'
     test_path = 'gs://{}/{}'.format(expected_bucket, expected_obj_path)
     client = storage.StorageClient()
     bucket, obj_path = storage.get_bucket_and_path_from(test_path)
     self.assertEqual(expected_bucket, bucket)
     self.assertEqual(expected_obj_path, obj_path)
Beispiel #3
0
def _read_file_from_gcs(file_path):
    """Load file from GCS.

    Returns:
        The parsed dict from the loaded file.
    """
    storage_client = storage.StorageClient()

    file_content = storage_client.get_text_file(full_bucket_path=file_path)

    parser = _get_filetype_parser(file_path, 'string')
    return parser(file_content)
Beispiel #4
0
    def run(self):
        """Generate the temporary json file and upload to GCS."""
        with tempfile.NamedTemporaryFile() as tmp_violations:
            tmp_violations.write(parser.json_stringify(self.violations))
            tmp_violations.flush()

            gcs_upload_path = '{}/{}'.format(self.pipeline_config['gcs_path'],
                                             self._get_output_filename())

            if gcs_upload_path.startswith('gs://'):
                storage_client = storage.StorageClient()
                storage_client.put_text_file(tmp_violations.name,
                                             gcs_upload_path)
Beispiel #5
0
def _read_file_from_gcs(file_path, storage_client=None):
    """Load file from GCS.

    Args:
        file_path (str): The GCS path to the file.
        storage_client (storage.StorageClient): The Storage API Client to use
            for downloading the file using the API.

    Returns:
        dict: The parsed dict from the loaded file.
    """
    if not storage_client:
        storage_client = storage.StorageClient()

    file_content = storage_client.get_text_file(full_bucket_path=file_path)

    parser = _get_filetype_parser(file_path, 'string')
    return parser(file_content)
    def run(self, violations, gcs_path):
        """Generate the temporary json file and upload to GCS.

        Args:
            violations (dict): Violations to be uploaded as findings.
            gcs_path (str): The GCS bucket to upload the findings.
        """
        LOGGER.info('Running CSCC findings notification.')
        findings = self._transform_to_findings(violations)

        with tempfile.NamedTemporaryFile() as tmp_violations:
            tmp_violations.write(parser.json_stringify(findings))
            tmp_violations.flush()

            gcs_upload_path = '{}/{}'.format(gcs_path,
                                             self._get_output_filename())

            if gcs_upload_path.startswith('gs://'):
                storage_client = storage.StorageClient()
                storage_client.put_text_file(tmp_violations.name,
                                             gcs_upload_path)
        LOGGER.info('Completed CSCC findings notification.')
Beispiel #7
0
def _upload_csv_to_gcs(logger, output_path, output_filename, csv_name):
    """Upload CSV to Cloud Storage.

    Args:
        logger: The logger.
        output_path: The output path for the csv.
        output_filename: The output file name.
        csv_name: The csv_name.
    """
    # If output path was specified, copy the csv temp file either to
    # a local file or upload it to Google Cloud Storage.
    logger.info('Output filename: {}'.format(output_filename))

    if output_path.startswith('gs://'):
        # An output path for GCS must be the full
        # `gs://bucket-name/path/for/output`
        storage_client = storage.StorageClient()
        full_output_path = os.path.join(output_path, output_filename)

        storage_client.put_text_file(csv_name, full_output_path)
    else:
        # Otherwise, just copy it to the output path.
        shutil.copy(csv_name, os.path.join(output_path, output_filename))
def _build_pipelines(cycle_timestamp, configs, **kwargs):
    """Build the pipelines to load data.

    Args:
        cycle_timestamp: String of timestamp, formatted as YYYYMMDDTHHMMSSZ.
        configs: Dictionary of configurations.
        kwargs: Extra configs.

    Returns:
        List of pipelines that will be run.

    Raises: inventory_errors.LoadDataPipelineError.
    """

    pipelines = []

    # Commonly used clients for shared ratelimiter re-use.
    crm_v1_api_client = crm.CloudResourceManagerClient()
    dao = kwargs.get('dao')
    gcs_api_client = gcs.StorageClient()

    organization_dao_name = 'organization_dao'
    project_dao_name = 'project_dao'

    # The order here matters, e.g. groups_pipeline must come before
    # group_members_pipeline.
    pipelines = [
        load_orgs_pipeline.LoadOrgsPipeline(cycle_timestamp, configs,
                                            crm_v1_api_client,
                                            kwargs.get(organization_dao_name)),
        load_org_iam_policies_pipeline.LoadOrgIamPoliciesPipeline(
            cycle_timestamp, configs, crm_v1_api_client,
            kwargs.get(organization_dao_name)),
        load_projects_pipeline.LoadProjectsPipeline(
            cycle_timestamp, configs, crm_v1_api_client,
            kwargs.get(project_dao_name)),
        load_projects_iam_policies_pipeline.LoadProjectsIamPoliciesPipeline(
            cycle_timestamp, configs, crm_v1_api_client,
            kwargs.get(project_dao_name)),
        load_projects_buckets_pipeline.LoadProjectsBucketsPipeline(
            cycle_timestamp, configs, gcs_api_client,
            kwargs.get(project_dao_name)),
        load_projects_buckets_acls_pipeline.LoadProjectsBucketsAclsPipeline(
            cycle_timestamp, configs, gcs_api_client,
            kwargs.get('bucket_dao')),
        load_projects_cloudsql_pipeline.LoadProjectsCloudsqlPipeline(
            cycle_timestamp, configs, cloudsql.CloudsqlClient(),
            kwargs.get('cloudsql_dao')),
        load_forwarding_rules_pipeline.LoadForwardingRulesPipeline(
            cycle_timestamp, configs, compute.ComputeClient(),
            kwargs.get('fwd_rules_dao')),
        load_folders_pipeline.LoadFoldersPipeline(
            cycle_timestamp, configs,
            crm.CloudResourceManagerClient(version='v2beta1'), dao),
        load_bigquery_datasets_pipeline.LoadBigQueryDatasetsPipeline(
            cycle_timestamp, configs, bq.BigQueryClient(), dao),
        load_firewall_rules_pipeline.LoadFirewallRulesPipeline(
            cycle_timestamp, configs, compute.ComputeClient(version='beta'),
            kwargs.get(project_dao_name)),
    ]

    if configs.get('inventory_groups'):
        if util.can_inventory_groups(configs):
            admin_api_client = ad.AdminDirectoryClient()
            pipelines.extend([
                load_groups_pipeline.LoadGroupsPipeline(
                    cycle_timestamp, configs, admin_api_client, dao),
                load_group_members_pipeline.LoadGroupMembersPipeline(
                    cycle_timestamp, configs, admin_api_client, dao)
            ])
        else:
            raise inventory_errors.LoadDataPipelineError(
                'Unable to inventory groups with specified arguments:\n%s',
                configs)

    return pipelines
Beispiel #9
0
 def test_non_bucket_uri_raises(self, mock_base):
     """Given a valid bucket object path, return the bucket and path."""
     test_path = '/some/local/path/file.ext'
     client = storage.StorageClient()
     with self.assertRaises(api_errors.InvalidBucketPathError):
         bucket, obj_path = storage.get_bucket_and_path_from(test_path)
Beispiel #10
0
 def setUp(self, mock_base_client):
     """Set up."""
     self.gcs_api_client = storage.StorageClient()
Beispiel #11
0
 def setUpClass(cls, mock_google_credential):
     """Set up."""
     cls.gcs_api_client = storage.StorageClient({})