def setUp(self): super(CloudStorageTest, self).setUp() self.addCleanup(mock.patch.stopall) # Mock for google.cloud.storage.Client object self.project_id = 'project-id' self.mock_client = mock.patch.object( storage, 'Client', autospec=True).start() self.mock_get_credentials = mock.patch.object( cloud_auth, 'get_credentials', autospec=True).start() self.mock_credentials = mock.Mock(credentials.Credentials, autospec=True) self.mock_get_credentials.return_value = self.mock_credentials self.source_file_path = '/tmp/file.txt' self.source_directory_path = '/tmp/dir1' self.destination_blob_path = 'dir1/dir2/blob' self.bucket_name = 'bucket_name' self.destination_blob_url = (f'gs://{self.bucket_name}/' f'{self.destination_blob_path}') self.mock_is_file = mock.patch.object( os.path, 'isfile', autospec=True).start() self.mock_is_dir = mock.patch.object( os.path, 'isdir', autospec=True).start() self.mock_bucket = mock.Mock(storage.Bucket, autospec=True) self.mock_blob = mock.Mock(storage.Blob, autospec=True) self.mock_blob_name = 'blob_name' self.mock_blob.name = self.mock_blob_name self.mock_client.return_value.get_bucket.return_value = self.mock_bucket self.mock_bucket.blob.return_value = self.mock_blob self.mock_bucket.get_blob.return_value = self.mock_blob self.file_content = b'Content of the file.' self.mock_blob.download_as_string.return_value = self.file_content self.cloud_storage_obj = cloud_storage.CloudStorageUtils(self.project_id)
def test_credential_retrieval_logic_when_initializing_cloud_storage_utils( self, service_account_key_file): cloud_storage_obj = cloud_storage.CloudStorageUtils( self.project_id, service_account_key_file=service_account_key_file) self.mock_get_credentials.assert_called_with(service_account_key_file) self.mock_client.assert_called_with( project=self.project_id, credentials=self.mock_credentials) self.assertEqual(cloud_storage_obj.client, self.mock_client.return_value)
def test_client_initializes_with_impersonated_service_account( self, mock_impersonated_account): service_account_name = '*****@*****.**' mock_impersonated_account.return_value = self.mock_credentials cloud_storage.CloudStorageUtils( project_id=self.project_id, service_account_name=service_account_name) mock_impersonated_account.assert_called_once_with(service_account_name) self.mock_client.assert_called_with(project=self.project_id, credentials=self.mock_credentials)
def test_client_initializes_with_service_account_info( self, mock_get_credentials_from_info): service_account_info = {''} mock_get_credentials_from_info.return_value = self.mock_credentials cloud_storage.CloudStorageUtils( project_id=self.project_id, service_account_info=service_account_info) mock_get_credentials_from_info.assert_called_once_with( service_account_info) self.mock_client.assert_called_with(project=self.project_id, credentials=self.mock_credentials)
def main() -> None: logging.getLogger('').setLevel(logging.INFO) args = parse_arguments() # Create service account. cloud_auth.create_service_account( project_id=args.project_id, service_account_name=_SERVICE_ACCOUNT_NAME, role_name=_SERVICE_ACCOUNT_ROLE, file_name=args.service_account_key_file) # Initialize cloud util classes. cloud_api_utils = cloud_api.CloudApiUtils( project_id=args.project_id, service_account_key_file=args.service_account_key_file) cloud_composer_utils = cloud_composer.CloudComposerUtils( project_id=args.project_id, service_account_key_file=args.service_account_key_file) cloud_storage_utils = cloud_storage.CloudStorageUtils( project_id=args.project_id, service_account_key_file=args.service_account_key_file) # Enable required Cloud APIs. cloud_api_utils.enable_apis(apis=_APIS_TO_BE_ENABLED) # Create Cloud Composer environment. cloud_composer_utils.create_environment( environment_name=args.composer_env_name) # Set Cloud Composer environment variables. cloud_composer_utils.set_environment_variables( environment_name=args.composer_env_name, environment_variables=_COMPOSER_ENV_VARIABLES) # Copy local DAGs and dependencies to Cloud Storage dag and plugins folders. dags_folder_url = cloud_composer_utils.get_dags_folder( environment_name=args.composer_env_name) gcs_dags_path = os.path.dirname(dags_folder_url) cloud_storage_utils.upload_directory_to_url( source_directory_path=args.local_dags_folder, destination_dir_url=gcs_dags_path) # Install required Python packages on Cloud Composer environment. cloud_composer_utils.install_python_packages( environment_name=args.composer_env_name, packages=_COMPOSER_PYPI_PACKAGES)
def visualize_instances(config_file: str) -> None: """Visualizes the statistics from the Instance table in BigQuery. This involves calculating statistics from the Instance table in BigQuery, generates and outputs plots into a pdf file and uploads the pdf file to a given location in Cloud Storage. Args: config_file: Path to the configuration file. """ viz_config = viz_utils.parse_config_file(config_file) project_id = viz_config['project_id'] dataset = viz_config['dataset'] instance_table = viz_config['instance_table'] instance_table_path = f'{project_id}.{dataset}.{instance_table}' bq_client = typing.cast( client.Client, cloud_auth.build_service_client( service_name='bigquery', service_account_credentials=cloud_auth.get_default_credentials())) storage_client = cloud_storage.CloudStorageUtils( project_id=viz_config['project_id'], service_account_key_file=viz_config['service_account_key_file']) pdf_output = matplotlib.backends.backend_pdf.PdfPages( viz_config['output_local_path']) ins_viz_obj = instance_visualizer.InstanceVisualizer( bq_client=bq_client, instance_table_path=instance_table_path, num_instances=viz_config['num_instances'], label_column=viz_config['label'], positive_class_label=viz_config['True'], negative_class_label=viz_config['False']) ins_viz_obj.plot_instances(**viz_config['plot_style_params']) pdf_output.savefig() storage_client.upload_file_to_url(viz_config['output_local_path'], viz_config['output_gcs_path'])
def visualize_facts(config_file: str) -> None: """Visualizes the statistics from the Facts table in BigQuery. This involves calculating statistics from the Facts table in BigQuery, generates and outputs plots into a pdf file and uploads the pdf file to a given location in Cloud Storage. Args: config_file: Path to the configuration file. """ viz_config = viz_utils.parse_config_file(config_file) project_id = viz_config['project_id'] dataset = viz_config['dataset'] facts_table = viz_config['facts_table'] facts_table_path = f'{project_id}.{dataset}.{facts_table}' bq_client = typing.cast( client.Client, cloud_auth.build_service_client( service_name='bigquery', service_account_credentials=cloud_auth.get_default_credentials())) storage_client = cloud_storage.CloudStorageUtils( project_id=viz_config['project_id'], service_account_key_file=viz_config['service_account_key_file']) pdf_output = matplotlib.backends.backend_pdf.PdfPages( viz_config['output_local_path']) fact_viz_obj = fact_visualizer.FactVisualizer( bq_client=bq_client, facts_table_path=facts_table_path, numerical_facts=viz_config['numerical_fact_list'], categorical_facts=viz_config['categorical_fact_list'], number_top_levels=viz_config['number_top_levels']) fact_viz_obj.plot_facts(**viz_config['plot_style_params']) pdf_output.savefig() storage_client.upload_file_to_url(viz_config['output_local_path'], viz_config['output_gcs_path'])