def setUp(self):
   super(CloudStorageTest, self).setUp()
   self.addCleanup(mock.patch.stopall)
   # Mock for google.cloud.storage.Client object
   self.project_id = 'project-id'
   self.mock_client = mock.patch.object(
       storage, 'Client', autospec=True).start()
   self.mock_get_credentials = mock.patch.object(
       cloud_auth, 'get_credentials', autospec=True).start()
   self.mock_credentials = mock.Mock(credentials.Credentials, autospec=True)
   self.mock_get_credentials.return_value = self.mock_credentials
   self.source_file_path = '/tmp/file.txt'
   self.source_directory_path = '/tmp/dir1'
   self.destination_blob_path = 'dir1/dir2/blob'
   self.bucket_name = 'bucket_name'
   self.destination_blob_url = (f'gs://{self.bucket_name}/'
                                f'{self.destination_blob_path}')
   self.mock_is_file = mock.patch.object(
       os.path, 'isfile', autospec=True).start()
   self.mock_is_dir = mock.patch.object(
       os.path, 'isdir', autospec=True).start()
   self.mock_bucket = mock.Mock(storage.Bucket, autospec=True)
   self.mock_blob = mock.Mock(storage.Blob, autospec=True)
   self.mock_blob_name = 'blob_name'
   self.mock_blob.name = self.mock_blob_name
   self.mock_client.return_value.get_bucket.return_value = self.mock_bucket
   self.mock_bucket.blob.return_value = self.mock_blob
   self.mock_bucket.get_blob.return_value = self.mock_blob
   self.file_content = b'Content of the file.'
   self.mock_blob.download_as_string.return_value = self.file_content
   self.cloud_storage_obj = cloud_storage.CloudStorageUtils(self.project_id)
  def test_credential_retrieval_logic_when_initializing_cloud_storage_utils(
      self, service_account_key_file):
    cloud_storage_obj = cloud_storage.CloudStorageUtils(
        self.project_id, service_account_key_file=service_account_key_file)

    self.mock_get_credentials.assert_called_with(service_account_key_file)
    self.mock_client.assert_called_with(
        project=self.project_id, credentials=self.mock_credentials)
    self.assertEqual(cloud_storage_obj.client, self.mock_client.return_value)
예제 #3
0
    def test_client_initializes_with_impersonated_service_account(
            self, mock_impersonated_account):
        service_account_name = '*****@*****.**'
        mock_impersonated_account.return_value = self.mock_credentials

        cloud_storage.CloudStorageUtils(
            project_id=self.project_id,
            service_account_name=service_account_name)

        mock_impersonated_account.assert_called_once_with(service_account_name)
        self.mock_client.assert_called_with(project=self.project_id,
                                            credentials=self.mock_credentials)
예제 #4
0
    def test_client_initializes_with_service_account_info(
            self, mock_get_credentials_from_info):
        service_account_info = {''}
        mock_get_credentials_from_info.return_value = self.mock_credentials

        cloud_storage.CloudStorageUtils(
            project_id=self.project_id,
            service_account_info=service_account_info)

        mock_get_credentials_from_info.assert_called_once_with(
            service_account_info)
        self.mock_client.assert_called_with(project=self.project_id,
                                            credentials=self.mock_credentials)
예제 #5
0
def main() -> None:
    logging.getLogger('').setLevel(logging.INFO)
    args = parse_arguments()

    # Create service account.
    cloud_auth.create_service_account(
        project_id=args.project_id,
        service_account_name=_SERVICE_ACCOUNT_NAME,
        role_name=_SERVICE_ACCOUNT_ROLE,
        file_name=args.service_account_key_file)

    # Initialize cloud util classes.
    cloud_api_utils = cloud_api.CloudApiUtils(
        project_id=args.project_id,
        service_account_key_file=args.service_account_key_file)
    cloud_composer_utils = cloud_composer.CloudComposerUtils(
        project_id=args.project_id,
        service_account_key_file=args.service_account_key_file)
    cloud_storage_utils = cloud_storage.CloudStorageUtils(
        project_id=args.project_id,
        service_account_key_file=args.service_account_key_file)

    # Enable required Cloud APIs.
    cloud_api_utils.enable_apis(apis=_APIS_TO_BE_ENABLED)

    # Create Cloud Composer environment.
    cloud_composer_utils.create_environment(
        environment_name=args.composer_env_name)

    # Set Cloud Composer environment variables.
    cloud_composer_utils.set_environment_variables(
        environment_name=args.composer_env_name,
        environment_variables=_COMPOSER_ENV_VARIABLES)

    # Copy local DAGs and dependencies to Cloud Storage dag and plugins folders.
    dags_folder_url = cloud_composer_utils.get_dags_folder(
        environment_name=args.composer_env_name)
    gcs_dags_path = os.path.dirname(dags_folder_url)
    cloud_storage_utils.upload_directory_to_url(
        source_directory_path=args.local_dags_folder,
        destination_dir_url=gcs_dags_path)

    # Install required Python packages on Cloud Composer environment.
    cloud_composer_utils.install_python_packages(
        environment_name=args.composer_env_name,
        packages=_COMPOSER_PYPI_PACKAGES)
def visualize_instances(config_file: str) -> None:
  """Visualizes the statistics from the Instance table in BigQuery.

  This involves calculating statistics from the Instance table in BigQuery,
  generates and outputs plots into a pdf file and uploads the pdf file to
  a given location in Cloud Storage.

  Args:
    config_file: Path to the configuration file.
  """
  viz_config = viz_utils.parse_config_file(config_file)

  project_id = viz_config['project_id']
  dataset = viz_config['dataset']
  instance_table = viz_config['instance_table']
  instance_table_path = f'{project_id}.{dataset}.{instance_table}'
  bq_client = typing.cast(
      client.Client,
      cloud_auth.build_service_client(
          service_name='bigquery',
          service_account_credentials=cloud_auth.get_default_credentials()))
  storage_client = cloud_storage.CloudStorageUtils(
      project_id=viz_config['project_id'],
      service_account_key_file=viz_config['service_account_key_file'])
  pdf_output = matplotlib.backends.backend_pdf.PdfPages(
      viz_config['output_local_path'])

  ins_viz_obj = instance_visualizer.InstanceVisualizer(
      bq_client=bq_client,
      instance_table_path=instance_table_path,
      num_instances=viz_config['num_instances'],
      label_column=viz_config['label'],
      positive_class_label=viz_config['True'],
      negative_class_label=viz_config['False'])

  ins_viz_obj.plot_instances(**viz_config['plot_style_params'])
  pdf_output.savefig()
  storage_client.upload_file_to_url(viz_config['output_local_path'],
                                    viz_config['output_gcs_path'])
def visualize_facts(config_file: str) -> None:
  """Visualizes the statistics from the Facts table in BigQuery.

  This involves calculating statistics from the Facts table in BigQuery,
  generates and outputs plots into a pdf file and uploads the pdf file to
  a given location in Cloud Storage.

  Args:
    config_file: Path to the configuration file.
  """
  viz_config = viz_utils.parse_config_file(config_file)

  project_id = viz_config['project_id']
  dataset = viz_config['dataset']
  facts_table = viz_config['facts_table']
  facts_table_path = f'{project_id}.{dataset}.{facts_table}'
  bq_client = typing.cast(
      client.Client,
      cloud_auth.build_service_client(
          service_name='bigquery',
          service_account_credentials=cloud_auth.get_default_credentials()))
  storage_client = cloud_storage.CloudStorageUtils(
      project_id=viz_config['project_id'],
      service_account_key_file=viz_config['service_account_key_file'])
  pdf_output = matplotlib.backends.backend_pdf.PdfPages(
      viz_config['output_local_path'])

  fact_viz_obj = fact_visualizer.FactVisualizer(
      bq_client=bq_client,
      facts_table_path=facts_table_path,
      numerical_facts=viz_config['numerical_fact_list'],
      categorical_facts=viz_config['categorical_fact_list'],
      number_top_levels=viz_config['number_top_levels'])

  fact_viz_obj.plot_facts(**viz_config['plot_style_params'])
  pdf_output.savefig()
  storage_client.upload_file_to_url(viz_config['output_local_path'],
                                    viz_config['output_gcs_path'])