def _upload_csv(output_path, now_utc, csv_name): """Upload CSV to Cloud Storage. Args: output_path: The output path for the csv. now_utc: The UTC timestamp of "now". csv_name: The csv_name. """ from google.cloud.security.common.gcp_api import storage output_filename = _get_output_filename(now_utc) # If output path was specified, copy the csv temp file either to # a local file or upload it to Google Cloud Storage. full_output_path = os.path.join(output_path, output_filename) LOGGER.info('Output path: %s', full_output_path) if output_path.startswith('gs://'): # An output path for GCS must be the full # `gs://bucket-name/path/for/output` storage_client = storage.StorageClient() storage_client.put_text_file(csv_name, full_output_path) else: # Otherwise, just copy it to the output path. shutil.copy(csv_name, full_output_path)
def test_get_bucket_and_path_from(self): """Given a valid bucket object path, return the bucket and path.""" expected_bucket = 'my-bucket' expected_obj_path = 'path/to/object' test_path = 'gs://{}/{}'.format(expected_bucket, expected_obj_path) client = storage.StorageClient() bucket, obj_path = storage.get_bucket_and_path_from(test_path) self.assertEqual(expected_bucket, bucket) self.assertEqual(expected_obj_path, obj_path)
def _read_file_from_gcs(file_path): """Load file from GCS. Returns: The parsed dict from the loaded file. """ storage_client = storage.StorageClient() file_content = storage_client.get_text_file(full_bucket_path=file_path) parser = _get_filetype_parser(file_path, 'string') return parser(file_content)
def run(self): """Generate the temporary json file and upload to GCS.""" with tempfile.NamedTemporaryFile() as tmp_violations: tmp_violations.write(parser.json_stringify(self.violations)) tmp_violations.flush() gcs_upload_path = '{}/{}'.format(self.pipeline_config['gcs_path'], self._get_output_filename()) if gcs_upload_path.startswith('gs://'): storage_client = storage.StorageClient() storage_client.put_text_file(tmp_violations.name, gcs_upload_path)
def _read_file_from_gcs(file_path, storage_client=None): """Load file from GCS. Args: file_path (str): The GCS path to the file. storage_client (storage.StorageClient): The Storage API Client to use for downloading the file using the API. Returns: dict: The parsed dict from the loaded file. """ if not storage_client: storage_client = storage.StorageClient() file_content = storage_client.get_text_file(full_bucket_path=file_path) parser = _get_filetype_parser(file_path, 'string') return parser(file_content)
def run(self, violations, gcs_path): """Generate the temporary json file and upload to GCS. Args: violations (dict): Violations to be uploaded as findings. gcs_path (str): The GCS bucket to upload the findings. """ LOGGER.info('Running CSCC findings notification.') findings = self._transform_to_findings(violations) with tempfile.NamedTemporaryFile() as tmp_violations: tmp_violations.write(parser.json_stringify(findings)) tmp_violations.flush() gcs_upload_path = '{}/{}'.format(gcs_path, self._get_output_filename()) if gcs_upload_path.startswith('gs://'): storage_client = storage.StorageClient() storage_client.put_text_file(tmp_violations.name, gcs_upload_path) LOGGER.info('Completed CSCC findings notification.')
def _upload_csv_to_gcs(logger, output_path, output_filename, csv_name): """Upload CSV to Cloud Storage. Args: logger: The logger. output_path: The output path for the csv. output_filename: The output file name. csv_name: The csv_name. """ # If output path was specified, copy the csv temp file either to # a local file or upload it to Google Cloud Storage. logger.info('Output filename: {}'.format(output_filename)) if output_path.startswith('gs://'): # An output path for GCS must be the full # `gs://bucket-name/path/for/output` storage_client = storage.StorageClient() full_output_path = os.path.join(output_path, output_filename) storage_client.put_text_file(csv_name, full_output_path) else: # Otherwise, just copy it to the output path. shutil.copy(csv_name, os.path.join(output_path, output_filename))
def _build_pipelines(cycle_timestamp, configs, **kwargs): """Build the pipelines to load data. Args: cycle_timestamp: String of timestamp, formatted as YYYYMMDDTHHMMSSZ. configs: Dictionary of configurations. kwargs: Extra configs. Returns: List of pipelines that will be run. Raises: inventory_errors.LoadDataPipelineError. """ pipelines = [] # Commonly used clients for shared ratelimiter re-use. crm_v1_api_client = crm.CloudResourceManagerClient() dao = kwargs.get('dao') gcs_api_client = gcs.StorageClient() organization_dao_name = 'organization_dao' project_dao_name = 'project_dao' # The order here matters, e.g. groups_pipeline must come before # group_members_pipeline. pipelines = [ load_orgs_pipeline.LoadOrgsPipeline(cycle_timestamp, configs, crm_v1_api_client, kwargs.get(organization_dao_name)), load_org_iam_policies_pipeline.LoadOrgIamPoliciesPipeline( cycle_timestamp, configs, crm_v1_api_client, kwargs.get(organization_dao_name)), load_projects_pipeline.LoadProjectsPipeline( cycle_timestamp, configs, crm_v1_api_client, kwargs.get(project_dao_name)), load_projects_iam_policies_pipeline.LoadProjectsIamPoliciesPipeline( cycle_timestamp, configs, crm_v1_api_client, kwargs.get(project_dao_name)), load_projects_buckets_pipeline.LoadProjectsBucketsPipeline( cycle_timestamp, configs, gcs_api_client, kwargs.get(project_dao_name)), load_projects_buckets_acls_pipeline.LoadProjectsBucketsAclsPipeline( cycle_timestamp, configs, gcs_api_client, kwargs.get('bucket_dao')), load_projects_cloudsql_pipeline.LoadProjectsCloudsqlPipeline( cycle_timestamp, configs, cloudsql.CloudsqlClient(), kwargs.get('cloudsql_dao')), load_forwarding_rules_pipeline.LoadForwardingRulesPipeline( cycle_timestamp, configs, compute.ComputeClient(), kwargs.get('fwd_rules_dao')), load_folders_pipeline.LoadFoldersPipeline( cycle_timestamp, configs, crm.CloudResourceManagerClient(version='v2beta1'), dao), load_bigquery_datasets_pipeline.LoadBigQueryDatasetsPipeline( cycle_timestamp, configs, bq.BigQueryClient(), dao), load_firewall_rules_pipeline.LoadFirewallRulesPipeline( cycle_timestamp, configs, compute.ComputeClient(version='beta'), kwargs.get(project_dao_name)), ] if configs.get('inventory_groups'): if util.can_inventory_groups(configs): admin_api_client = ad.AdminDirectoryClient() pipelines.extend([ load_groups_pipeline.LoadGroupsPipeline( cycle_timestamp, configs, admin_api_client, dao), load_group_members_pipeline.LoadGroupMembersPipeline( cycle_timestamp, configs, admin_api_client, dao) ]) else: raise inventory_errors.LoadDataPipelineError( 'Unable to inventory groups with specified arguments:\n%s', configs) return pipelines
def test_non_bucket_uri_raises(self, mock_base): """Given a valid bucket object path, return the bucket and path.""" test_path = '/some/local/path/file.ext' client = storage.StorageClient() with self.assertRaises(api_errors.InvalidBucketPathError): bucket, obj_path = storage.get_bucket_and_path_from(test_path)
def setUp(self, mock_base_client): """Set up.""" self.gcs_api_client = storage.StorageClient()
def setUpClass(cls, mock_google_credential): """Set up.""" cls.gcs_api_client = storage.StorageClient({})