Ejemplo n.º 1
0
def copy_file_from_gcs(file_path, output_path=None, storage_client=None):
    """Copy file from GCS to local file.

     Args:
        file_path (str): The full GCS path to the file.
        output_path (str): The local file to copy to, if not set creates a
            temporary file.
        storage_client (storage.StorageClient): The Storage API Client to use
            for downloading the file using the API.

     Returns:
        str: The output_path the file was copied to.
    """
    if not storage_client:
        storage_client = storage.StorageClient()

    if not output_path:
        tmp_file, output_path = tempfile.mkstemp()
        # Ensure the handle returned by mkstemp is not leaked.
        os.close(tmp_file)

    with open(output_path, mode='wb') as f:
        storage_client.download(full_bucket_path=file_path, output_file=f)

    return output_path
Ejemplo n.º 2
0
def load_cloudasset_data(engine, config):
    """Export asset data from Cloud Asset API and load into storage.

    Args:
        engine (object): Database engine.
        config (InventoryConfig): Inventory configuration on server.

    Returns:
        int: The count of assets imported into the database, or None if there
            is an error.
    """
    cai_gcs_dump_paths = config.get_cai_dump_file_paths()

    storage_client = storage.StorageClient()
    imported_assets = 0

    if not cai_gcs_dump_paths:
        # Dump file paths not specified, download the dump files instead.
        cai_gcs_dump_paths = _download_cloudasset_data(config)

    for gcs_path in cai_gcs_dump_paths:
        try:
            assets = _stream_gcs_to_database(gcs_path, engine, storage_client)
            imported_assets += assets
        except StreamError as e:
            LOGGER.error('Error streaming data from GCS to Database: %s', e)
            return _clear_cai_data(engine)

    # Each worker's imported asset count is appended to the deque, sum them all
    # to get total imported assets.
    LOGGER.info('%i assets imported to database.', imported_assets)

    # Optimize the new database before returning
    engine.execute('pragma optimize;')
    return imported_assets
Ejemplo n.º 3
0
def _read_file_from_gcs(file_path, storage_client=None):
    """Load file from GCS.

    Args:
        file_path (str): The GCS path to the file.
        storage_client (storage.StorageClient): The Storage API Client to use
            for downloading the file using the API.

    Returns:
        dict: The parsed dict from the loaded file.
    """
    if not storage_client:
        storage_client = storage.StorageClient()

    file_content = storage_client.get_text_file(full_bucket_path=file_path)

    parser = _get_filetype_parser(file_path, 'string')
    return parser(file_content)
Ejemplo n.º 4
0
    def _send_findings_to_gcs(self, violations, gcs_path):
        """Send violations to CSCC via upload to GCS (legacy mode).
        Args:
            violations (dict): Violations to be uploaded as findings.
            gcs_path (str): The GCS bucket to upload the findings.
        """
        LOGGER.info('Legacy mode detected - writing findings to GCS.')

        gcs_upload_path = '{}/{}'.format(gcs_path, self._get_output_filename())

        findings = self._transform_for_gcs(violations, gcs_upload_path)

        with tempfile.NamedTemporaryFile() as tmp_violations:
            tmp_violations.write(parser.json_stringify(findings))
            tmp_violations.flush()

            if gcs_upload_path.startswith('gs://'):
                storage_client = storage.StorageClient()
                storage_client.put_text_file(tmp_violations.name,
                                             gcs_upload_path)
        return
    def _upload_csv(self, output_path, now_utc, csv_name):
        """Upload CSV to Cloud Storage.

        Args:
            output_path (str): The output path for the csv.
            now_utc (datetime): The UTC timestamp of "now".
            csv_name (str): The csv_name.
        """
        output_filename = self.get_output_filename(now_utc)

        # If output path was specified, copy the csv temp file either to
        # a local file or upload it to Google Cloud Storage.
        full_output_path = os.path.join(output_path, output_filename)
        LOGGER.info('Output path: %s', full_output_path)

        if output_path.startswith('gs://'):
            # An output path for GCS must be the full
            # `gs://bucket-name/path/for/output`
            storage_client = storage.StorageClient()
            storage_client.put_text_file(csv_name, full_output_path)
        else:
            # Otherwise, just copy it to the output path.
            shutil.copy(csv_name, full_output_path)
Ejemplo n.º 6
0
 def setUpClass(cls, mock_reach_metadata, mock_get_project_id,
                mock_google_credential):
     """Set up."""
     mock_reach_metadata.return_value = True
     mock_get_project_id.return_value = 'test-project'
     cls.gcs_api_client = storage.StorageClient({})