예제 #1
0
  def __init__(self, options):
    """Initializes a Dataflow API client object."""
    self.standard_options = options.view_as(StandardOptions)
    self.google_cloud_options = options.view_as(GoogleCloudOptions)

    if _use_fnapi(options):
      self.environment_version = _FNAPI_ENVIRONMENT_MAJOR_VERSION
    else:
      self.environment_version = _LEGACY_ENVIRONMENT_MAJOR_VERSION

    if self.google_cloud_options.no_auth:
      credentials = None
    else:
      credentials = get_service_credentials()

    http_client = get_new_http()
    self._client = dataflow.DataflowV1b3(
        url=self.google_cloud_options.dataflow_endpoint,
        credentials=credentials,
        get_credentials=(not self.google_cloud_options.no_auth),
        http=http_client,
        response_encoding=get_response_encoding())
    self._storage_client = storage.StorageV1(
        url='https://www.googleapis.com/storage/v1',
        credentials=credentials,
        get_credentials=(not self.google_cloud_options.no_auth),
        http=http_client,
        response_encoding=get_response_encoding())
예제 #2
0
def assert_bucket_exists(bucket_name):
    # type: (str) -> None
    """Asserts whether the specified GCS bucket with the name
  bucket_name exists.

    Logs an error and raises a ValueError if the bucket does not exist.

    Logs a warning if the bucket cannot be verified to exist.
  """
    try:
        from apitools.base.py.exceptions import HttpError
        storage_client = storage.StorageV1(
            credentials=auth.get_service_credentials(),
            get_credentials=False,
            http=get_new_http(),
            response_encoding='utf8')
        request = storage.StorageBucketsGetRequest(bucket=bucket_name)
        storage_client.buckets.Get(request)
    except HttpError as e:
        if e.status_code == 404:
            _LOGGER.error('%s bucket does not exist!', bucket_name)
            raise ValueError('Invalid GCS bucket provided!')
        else:
            _LOGGER.warning(
                'HttpError - unable to verify whether bucket %s exists',
                bucket_name)
    except ImportError:
        _LOGGER.warning(
            'ImportError - unable to verify whether bucket %s exists',
            bucket_name)
예제 #3
0
 def _download_file(from_url, to_path):
     """Downloads a file over http/https from a url or copy it from a remote
     path to local path."""
     if from_url.startswith('http://') or from_url.startswith('https://'):
         # TODO(silviuc): We should cache downloads so we do not do it for every
         # job.
         try:
             # We check if the file is actually there because wget returns a file
             # even for a 404 response (file will contain the contents of the 404
             # response).
             # TODO(angoenka): Extract and use the filename when downloading file.
             response, content = get_new_http().request(from_url)
             if int(response['status']) >= 400:
                 raise RuntimeError(
                     'Artifact not found at %s (response: %s)' %
                     (from_url, response))
             with open(to_path, 'w') as f:
                 f.write(content)
         except Exception:
             logging.info('Failed to download Artifact from %s', from_url)
             raise
     else:
         if not os.path.isdir(os.path.dirname(to_path)):
             logging.info(
                 'Created folder (since we have not done yet, and any errors '
                 'will follow): %s ', os.path.dirname(to_path))
             os.mkdir(os.path.dirname(to_path))
         shutil.copyfile(from_url, to_path)
예제 #4
0
파일: stager.py 프로젝트: eralmas7/beam
 def _download_file(from_url, to_path):
   """Downloads a file over http/https from a url or copy it from a remote
       path to local path."""
   if from_url.startswith('http://') or from_url.startswith('https://'):
     # TODO(silviuc): We should cache downloads so we do not do it for every
     # job.
     try:
       # We check if the file is actually there because wget returns a file
       # even for a 404 response (file will contain the contents of the 404
       # response).
       # TODO(angoenka): Extract and use the filename when downloading file.
       response, content = get_new_http().request(from_url)
       if int(response['status']) >= 400:
         raise RuntimeError(
             'Artifact not found at %s (response: %s)' % (from_url, response))
       with open(to_path, 'w') as f:
         f.write(content)
     except Exception:
       logging.info('Failed to download Artifact from %s', from_url)
       raise
   else:
     if not os.path.isdir(os.path.dirname(to_path)):
       logging.info(
           'Created folder (since we have not done yet, and any errors '
           'will follow): %s ', os.path.dirname(to_path))
       os.mkdir(os.path.dirname(to_path))
     shutil.copyfile(from_url, to_path)
예제 #5
0
    def __init__(self, options):
        """Initializes a Dataflow API client object."""
        self.standard_options = options.view_as(StandardOptions)
        self.google_cloud_options = options.view_as(GoogleCloudOptions)

        if _use_fnapi(options):
            self.environment_version = _FNAPI_ENVIRONMENT_MAJOR_VERSION
        else:
            self.environment_version = _LEGACY_ENVIRONMENT_MAJOR_VERSION

        if self.google_cloud_options.no_auth:
            credentials = None
        else:
            credentials = get_service_credentials()

        http_client = get_new_http()
        self._client = dataflow.DataflowV1b3(
            url=self.google_cloud_options.dataflow_endpoint,
            credentials=credentials,
            get_credentials=(not self.google_cloud_options.no_auth),
            http=http_client,
            response_encoding=get_response_encoding())
        self._storage_client = storage.StorageV1(
            url='https://www.googleapis.com/storage/v1',
            credentials=credentials,
            get_credentials=(not self.google_cloud_options.no_auth),
            http=http_client,
            response_encoding=get_response_encoding())
예제 #6
0
 def __init__(self, client=None):
     self.client = client or bigquery.BigqueryV2(
         http=get_new_http(), credentials=auth.get_service_credentials())
     self._unique_row_id = 0
     # For testing scenarios where we pass in a client we do not want a
     # randomized prefix for row IDs.
     self._row_id_prefix = '' if client else uuid.uuid4()
     self._temporary_table_suffix = uuid.uuid4().hex
예제 #7
0
 def __init__(self, storage_client=None):
     if storage_client is None:
         storage_client = storage.StorageV1(
             credentials=auth.get_service_credentials(),
             get_credentials=False,
             http=get_new_http(),
             response_encoding='utf8')
     self.client = storage_client
     self._rewrite_cb = None
예제 #8
0
 def __init__(self, client=None):
   self.client = client or bigquery.BigqueryV2(
       http=get_new_http(),
       credentials=auth.get_service_credentials(),
       response_encoding=None if sys.version_info[0] < 3 else 'utf8')
   self._unique_row_id = 0
   # For testing scenarios where we pass in a client we do not want a
   # randomized prefix for row IDs.
   self._row_id_prefix = '' if client else uuid.uuid4()
   self._temporary_table_suffix = uuid.uuid4().hex
예제 #9
0
 def __init__(self, options):
   super().__init__(options)
   self._google_cloud_options = options.view_as(GoogleCloudOptions)
   if self._google_cloud_options.no_auth:
     credentials = None
   else:
     credentials = get_service_credentials()
   self._storage_client = storage.StorageV1(
       url='https://www.googleapis.com/storage/v1',
       credentials=credentials,
       get_credentials=(not self._google_cloud_options.no_auth),
       http=get_new_http(),
       response_encoding='utf8')
   self._cloudbuild_client = cloudbuild.CloudbuildV1(
       credentials=credentials,
       get_credentials=(not self._google_cloud_options.no_auth),
       http=get_new_http(),
       response_encoding='utf8')
   if not self._docker_registry_push_url:
     self._docker_registry_push_url = (
         'gcr.io/%s/prebuilt_beam_sdk' % self._google_cloud_options.project)
예제 #10
0
 def __init__(self, storage_client=None, pipeline_options=None):
     if storage_client is None:
         storage_client = storage.StorageV1(
             credentials=auth.get_service_credentials(pipeline_options),
             get_credentials=False,
             http=get_new_http(),
             response_encoding='utf8',
             additional_http_headers={
                 "User-Agent": "apache-beam-%s" % apache_beam.__version__
             })
     self.client = storage_client
     self._rewrite_cb = None
     self.bucket_to_project_number = {}
예제 #11
0
파일: stager.py 프로젝트: melap/beam
 def _download_file(from_url, to_path):
   """Downloads a file over http/https from a url or copy it from a remote
       path to local path."""
   if from_url.startswith('http://') or from_url.startswith('https://'):
     # TODO(silviuc): We should cache downloads so we do not do it for every
     # job.
     try:
       # We check if the file is actually there because wget returns a file
       # even for a 404 response (file will contain the contents of the 404
       # response).
       response, content = get_new_http().request(from_url)
       if int(response['status']) >= 400:
         raise RuntimeError(
             'Artifact not found at %s (response: %s)' % (from_url, response))
       with open(to_path, 'wb') as f:
         f.write(content)
     except Exception:
       _LOGGER.info('Failed to download Artifact from %s', from_url)
       raise
   else:
     try:
       read_handle = FileSystems.open(
           from_url, compression_type=CompressionTypes.UNCOMPRESSED)
       with read_handle as fin:
         with open(to_path, 'wb') as f:
           while True:
             chunk = fin.read(Stager._DEFAULT_CHUNK_SIZE)
             if not chunk:
               break
             f.write(chunk)
       _LOGGER.info('Copied remote file from %s to %s.', from_url, to_path)
       return
     except Exception as e:
       _LOGGER.info(
           'Failed to download file from %s via apache_beam.io.filesystems.'
           'Trying to copy directly. %s',
           from_url,
           repr(e))
     if not os.path.isdir(os.path.dirname(to_path)):
       _LOGGER.info(
           'Created folder (since we have not done yet, and any errors '
           'will follow): %s ',
           os.path.dirname(to_path))
       os.mkdir(os.path.dirname(to_path))
     shutil.copyfile(from_url, to_path)
예제 #12
0
 def __new__(cls, storage_client=None):
   if storage_client:
     # This path is only used for testing.
     return super(GcsIO, cls).__new__(cls)
   else:
     # Create a single storage client for each thread.  We would like to avoid
     # creating more than one storage client for each thread, since each
     # initialization requires the relatively expensive step of initializing
     # credentaials.
     local_state = threading.local()
     if getattr(local_state, 'gcsio_instance', None) is None:
       credentials = auth.get_service_credentials()
       storage_client = storage.StorageV1(
           credentials=credentials,
           get_credentials=False,
           http=get_new_http(),
           response_encoding=None if sys.version_info[0] < 3 else 'utf8')
       local_state.gcsio_instance = super(GcsIO, cls).__new__(cls)
       local_state.gcsio_instance.client = storage_client
     return local_state.gcsio_instance
예제 #13
0
 def test_get_new_http_proxy_info(self):
     os.environ['http_proxy'] = 'localhost'
     http = get_new_http()
     expected = ProxyInfo(3, 'localhost', 80)
     self.assertEquals(str(http.proxy_info), str(expected))
예제 #14
0
 def test_get_new_http_proxy_info(self):
   with mock.patch.dict(os.environ, http_proxy='localhost'):
     http = get_new_http()
     expected = ProxyInfo(3, 'localhost', 80)
     self.assertEquals(str(http.proxy_info), str(expected))
예제 #15
0
 def test_get_new_http_timeout(self):
   http = get_new_http()
   self.assertEquals(http.timeout, DEFAULT_HTTP_TIMEOUT_SECONDS)
예제 #16
0
 def test_get_new_http_proxy_info(self):
   os.environ['http_proxy'] = 'localhost'
   http = get_new_http()
   expected = ProxyInfo(3, 'localhost', 80)
   self.assertEquals(str(http.proxy_info), str(expected))
예제 #17
0
 def test_get_new_http_proxy_info(self):
     with mock.patch.dict(os.environ, http_proxy='localhost'):
         http = get_new_http()
         expected = ProxyInfo(3, 'localhost', 80)
         self.assertEqual(str(http.proxy_info), str(expected))
예제 #18
0
 def test_get_new_http_timeout(self):
     http = get_new_http()
     self.assertEqual(http.timeout, DEFAULT_HTTP_TIMEOUT_SECONDS)