Esempio n. 1
0
def _get_first_null_byte_index(destination_url, offset, length):
    """Checks to see how many bytes in range have already been downloaded.

  Args:
    destination_url (storage_url.FileUrl): Has path of file being downloaded.
    offset (int): For components, index to start reading bytes at.
    length (int): For components, where to stop reading bytes.

  Returns:
    Int byte count of size of partially-downloaded file. Returns 0 if file is
    an invalid size, empty, or non-existent.
  """
    if not destination_url.exists():
        return 0

    # Component is slice of larger file. Find how much of slice is downloaded.
    first_null_byte = offset
    end_of_range = offset + length
    with files.BinaryFileReader(destination_url.object_name) as file_reader:
        file_reader.seek(offset)
        while first_null_byte < end_of_range:
            data = file_reader.read(_READ_SIZE)
            if not data:
                break
            null_byte_index = data.find(NULL_BYTE)
            if null_byte_index != -1:
                first_null_byte += null_byte_index
                break
            first_null_byte += len(data)
    return first_null_byte
Esempio n. 2
0
    def execute(self, task_status_queue=None):
        """Validates and clean ups after sliced download."""
        # Clean up master and component tracker files.
        tracker_file_util.delete_download_tracker_files(
            self._destination_resource.storage_url)

        # Validate final product of sliced download.
        # TODO(b/181340192): See if sharing and concating task hashes is faster.
        with files.BinaryFileReader(self._destination_resource.storage_url.
                                    object_name) as downloaded_file:
            # TODO(b/172048376): Test other hash algorithms.
            downloaded_file_hash_object = util.get_hash_from_file_stream(
                downloaded_file, util.HashAlgorithms.MD5)

        downloaded_file_hash_digest = util.get_base64_hash_digest_string(
            downloaded_file_hash_object)
        try:
            util.validate_object_hashes_match(
                self._destination_resource.storage_url,
                self._source_resource.md5_hash, downloaded_file_hash_digest)
        except errors.HashMismatchError:
            os.remove(self._destination_resource.storage_url.object_name)
            raise

        if _should_decompress_gzip(self._source_resource,
                                   self._destination_resource):
            _ungzip_file(self._destination_resource.storage_url.object_name)
    def _perform_resumable_download(self, digesters, progress_callback):
        """Resume or start download that can be resumabled."""
        destination_url = self._destination_resource.storage_url
        existing_file_size = _get_valid_downloaded_byte_count(
            destination_url, self._source_resource)
        if existing_file_size:
            with files.BinaryFileReader(
                    destination_url.object_name) as file_reader:
                # Get hash of partially-downloaded file as start for validation.
                for hash_algorithm in digesters:
                    digesters[hash_algorithm] = util.get_hash_from_file_stream(
                        file_reader, hash_algorithm)

        tracker_file_path, start_byte = (
            tracker_file_util.read_or_create_download_tracker_file(
                self._source_resource,
                destination_url,
                existing_file_size=existing_file_size))
        end_byte = self._source_resource.size

        self._perform_download(digesters, progress_callback,
                               cloud_api.DownloadStrategy.RESUMABLE,
                               start_byte, end_byte)

        tracker_file_util.delete_tracker_file(tracker_file_path)
Esempio n. 4
0
    def execute(self, task_status_queue=None):
        """Performs upload."""
        progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
            status_queue=task_status_queue,
            size=self._length,
            source_url=self._source_resource.storage_url,
            destination_url=self._destination_resource.storage_url,
            component_number=self._component_number,
            total_components=self._total_components,
            operation_name=task_status.OperationName.UPLOADING,
            process_id=os.getpid(),
            thread_id=threading.get_ident(),
        )

        source_stream = files.BinaryFileReader(
            self._source_resource.storage_url.object_name)
        provider = self._destination_resource.storage_url.scheme

        with file_part.FilePart(source_stream, self._offset,
                                self._length) as upload_stream:
            api_factory.get_api(provider).upload_object(
                upload_stream,
                self._destination_resource,
                request_config=cloud_api.RequestConfig(
                    md5_hash=self._source_resource.md5_hash,
                    size=self._length),
                progress_callback=progress_callback)
Esempio n. 5
0
def FindSentinel(filename, blocksize=2**16):
    """Return the sentinel line from the output file.

  Args:
    filename: The filename of the output file.  (We'll read this file.)
    blocksize: Optional block size for buffering, for unit testing.

  Returns:
    The contents of the last line in the file that doesn't start with
    a tab, with its trailing newline stripped; or None if the file
    couldn't be opened or no such line could be found by inspecting
    the last 'blocksize' bytes of the file.
  """
    try:
        fp = files.BinaryFileReader(filename)
    except files.Error as err:
        log.warning('Append mode disabled: can\'t read [%r]: %s', filename,
                    err)
        return None
    try:
        fp.seek(0, 2)  # EOF
        fp.seek(max(0, fp.tell() - blocksize))
        lines = fp.readlines()
        del lines[:1]  # First line may be partial, throw it away
        sentinel = None
        for line in lines:
            if not line.startswith('\t'):
                sentinel = line
        if not sentinel:
            return None
        return sentinel.rstrip('\n')
    finally:
        fp.close()
Esempio n. 6
0
def _MultiStepsDigest(digest, file_to_digest):
    # TODO(b/77481291) Refactor this to allow reading from stdin.
    with files.BinaryFileReader(file_to_digest) as f:
        while True:
            chunk = f.read(_READ_SIZE)
            if not chunk:
                break
            digest.update(chunk)
    return digest
Esempio n. 7
0
    def execute(self, callback=None):
        destination_url = self._destination_resource.storage_url
        provider = destination_url.scheme

        with files.BinaryFileReader(self._source_resource.storage_url.
                                    object_name) as upload_stream:
            # TODO(b/162069479): Support all of upload_object's parameters.
            api_factory.get_api(provider).upload_object(
                upload_stream, self._destination_resource)
Esempio n. 8
0
 def _catch_up_digesters(self, digesters, start_byte, end_byte):
     with files.BinaryFileReader(self._destination_resource.storage_url.
                                 object_name) as file_reader:
         # Get hash of partially-downloaded file as start for validation.
         for hash_algorithm in digesters:
             digesters[
                 hash_algorithm] = hash_util.get_hash_from_file_stream(
                     file_reader,
                     hash_algorithm,
                     start=start_byte,
                     stop=end_byte)
Esempio n. 9
0
def get_stream(source_resource,
               length,
               offset=0,
               digesters=None,
               task_status_queue=None,
               destination_resource=None,
               component_number=None,
               total_components=None):
    """Gets a stream to use for an upload.

  Args:
    source_resource (resource_reference.FileObjectResource): Contains a path to
      the source file.
    length (int): The total number of bytes to be uploaded.
    offset (int): The position of the first byte to be uploaded.
    digesters (dict[hash_util.HashAlgorithm, hash object]): Hash objects to be
      populated as bytes are read.
    task_status_queue (multiprocessing.Queue|None): Used for sending progress
      messages. If None, no messages will be generated or sent.
    destination_resource (resource_reference.ObjectResource): The upload
      destination. Used for progress reports, and should be specified if
      task_status_queue is.
    component_number (int|None): Identifies a component in composite uploads.
    total_components (int|None): The total number of components used in a
      composite upload.

  Returns:
    An UploadStream wrapping the file specified by source_resource.
  """
    if task_status_queue:
        progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
            status_queue=task_status_queue,
            offset=offset,
            length=length,
            source_url=source_resource.storage_url,
            destination_url=destination_resource.storage_url,
            component_number=component_number,
            total_components=total_components,
            operation_name=task_status.OperationName.UPLOADING,
            process_id=os.getpid(),
            thread_id=threading.get_ident(),
        )
    else:
        progress_callback = None

    source_stream = files.BinaryFileReader(
        source_resource.storage_url.object_name)
    return upload_stream.UploadStream(source_stream,
                                      offset,
                                      length,
                                      digesters=digesters,
                                      progress_callback=progress_callback)
Esempio n. 10
0
    def download_object(self,
                        cloud_resource,
                        download_stream,
                        compressed_encoding=False,
                        decryption_wrapper=None,
                        digesters=None,
                        download_strategy=cloud_api.DownloadStrategy.ONE_SHOT,
                        progress_callback=None,
                        start_byte=0,
                        end_byte=None):
        """See super class."""
        extra_args = {}
        if cloud_resource.generation:
            extra_args['VersionId'] = cloud_resource.generation

        if download_strategy == cloud_api.DownloadStrategy.RESUMABLE:
            response = self.client.get_object(
                Bucket=cloud_resource.bucket,
                Key=cloud_resource.name,
                Range='bytes={}-'.format(start_byte),
            )
            processed_bytes = start_byte
            for chunk in response['Body'].iter_chunks(
                    scaled_integer.ParseInteger(
                        properties.VALUES.storage.download_chunk_size.Get())):
                download_stream.write(chunk)
                processed_bytes += len(chunk)
                if progress_callback:
                    progress_callback(processed_bytes)
        else:
            # TODO(b/172480278) Conditionally call get_object for smaller object.
            self.client.download_fileobj(cloud_resource.bucket,
                                         cloud_resource.name,
                                         download_stream,
                                         Callback=progress_callback,
                                         ExtraArgs=extra_args)

        # Download callback doesn't give us streaming data, so we have to
        # read whole downloaded file to update digests.
        if digesters:
            with files.BinaryFileReader(
                    download_stream.name) as completed_download_stream:
                completed_download_stream.seek(0)
                for hash_algorithm in digesters:
                    digesters[
                        hash_algorithm] = hash_util.get_hash_from_file_stream(
                            completed_download_stream, hash_algorithm)

        return self._get_content_encoding(cloud_resource)
def ReportMetrics(metrics_file_path):
  """Sends the specified anonymous usage event to the given analytics endpoint.

  Args:
      metrics_file_path: str, File with pickled metrics (list of tuples).
  """
  with files.BinaryFileReader(metrics_file_path) as metrics_file:
    metrics = pickle.load(metrics_file)
  os.remove(metrics_file_path)

  http = httplib2.Http(timeout=TIMEOUT_IN_SEC,
                       proxy_info=http_proxy.GetHttpProxyInfo())

  for metric in metrics:
    http.request(metric[0], method=metric[1], body=metric[2], headers=metric[3])
Esempio n. 12
0
    def execute(self, callback=None):
        destination_url = self._destination_resource.storage_url
        provider = destination_url.scheme

        source_stream = files.BinaryFileReader(
            self._source_resource.storage_url.object_name)

        with file_part.FilePart(source_stream, self._offset,
                                self._length) as upload_stream:
            api_factory.get_api(provider).upload_object(
                upload_stream,
                self._destination_resource,
                request_config=cloud_api.RequestConfig(
                    md5_hash=self._source_resource.md5_hash,
                    size=self._length))
Esempio n. 13
0
def GetDigest(digest_algorithm, filename):
  """Digest the file at filename based on digest_algorithm.

  Args:
    digest_algorithm: The algorithm used to digest the file, can be one of
      'sha256', 'sha384', or 'sha512'.
    filename: A valid file path over which a digest will be calculated.

  Returns:
    The digest of the provided file.

  Raises:
    InvalidArgumentException: The provided digest_algorithm is invalid.
  """
  with files.BinaryFileReader(filename) as f:
    return GetDigestOfFile(digest_algorithm, f)
Esempio n. 14
0
    def execute(self, callback=None):
        with files.BinaryFileWriter(
                self._destination_resource.storage_url.object_name,
                create_path=True) as download_stream:
            provider = self._source_resource.storage_url.scheme

            # TODO(b/162264437): Support all of download_object's parameters.
            api_factory.get_api(provider).download_object(
                self._source_resource, download_stream)

        with files.BinaryFileReader(self._destination_resource.storage_url.
                                    object_name) as completed_download_stream:
            downloaded_file_hash = util.get_hash_digest_from_file_stream(
                completed_download_stream, util.HashAlgorithms.MD5)
            util.validate_object_hashes_match(
                self._source_resource.storage_url,
                self._source_resource.md5_hash, downloaded_file_hash)
def ReportMetrics(metrics_file_path):
    """Sends the specified anonymous usage event to the given analytics endpoint.

  Args:
      metrics_file_path: str, File with pickled metrics (list of tuples).
  """
    with files.BinaryFileReader(metrics_file_path) as metrics_file:
        metrics = pickle.load(metrics_file)
    os.remove(metrics_file_path)

    session = requests.Session()

    for metric in metrics:
        session.request(metric[1],
                        metric[0],
                        data=metric[2],
                        headers=metric[3],
                        timeout=TIMEOUT_IN_SEC)
Esempio n. 16
0
def UploadArchive(upload_url, zip_file):
    """Uploads the specified zip file with a PUT request to the provided URL.

  Args:
    upload_url: A string of the URL to send the PUT request to. Required to be a
      signed URL from GCS.
    zip_file: A string of the file path to the zip file to upload.

  Returns:
    A requests.Response object.
  """
    sess = requests.GetSession()
    # Required headers for the Apigee generated signed URL.
    headers = {
        'content-type': 'application/zip',
        'x-goog-content-length-range': '0,1073741824'
    }
    with files.BinaryFileReader(zip_file) as data:
        response = sess.put(upload_url, data=data, headers=headers)
    return response
    def _get_upload_stream(self, digesters, task_status_queue):
        if task_status_queue:
            progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
                status_queue=task_status_queue,
                offset=self._offset,
                length=self._length,
                source_url=self._source_resource.storage_url,
                destination_url=self._destination_resource.storage_url,
                component_number=self._component_number,
                total_components=self._total_components,
                operation_name=task_status.OperationName.UPLOADING,
                process_id=os.getpid(),
                thread_id=threading.get_ident(),
            )
        else:
            progress_callback = None

        source_stream = files.BinaryFileReader(self._source_path)
        return upload_stream.UploadStream(source_stream,
                                          self._offset,
                                          self._length,
                                          digesters=digesters,
                                          progress_callback=progress_callback)
 def __init__(self, name, create=True, timeout=None, version=None):
     super(Cache, self).__init__(_Table,
                                 name,
                                 create=create,
                                 timeout=timeout,
                                 version=version)
     self._persistent = False
     # Check if the db file exists and is an sqlite3 db.
     # Surprise, we have to do the heavy lifting.
     # That stops here.
     try:
         with files.BinaryFileReader(name) as f:
             actual_magic = f.read(len(self._EXPECTED_MAGIC))
             if actual_magic != self._EXPECTED_MAGIC:
                 raise exceptions.CacheInvalid(
                     '[{}] is not a persistent cache.'.format(self.name))
         self._persistent = True
     except files.MissingFileError:
         if not create:
             raise exceptions.CacheNotFound(
                 'Persistent cache [{}] not found.'.format(self.name))
     except files.Error:
         raise exceptions.CacheInvalid(
             '[{}] is not a persistent cache.'.format(self.name))
     self._db = sqlite3.connect(name)
     self.cursor = self._db.cursor()
     self._restricted = set(['__lock__'])
     self._tables = {}
     self._metadata = None
     self._start = persistent_cache_base.Now()
     try:
         self.InitializeMetadata()
     except exceptions.Error:
         # Make sure we clean up any dangling resources.
         self.Close(commit=False)
         raise
def get_temporary_gzipped_file(file_path):
    zipped_file_path = file_path + storage_url.TEMPORARY_FILE_SUFFIX
    with files.BinaryFileReader(file_path) as file_reader:
        with gzip.open(zipped_file_path, 'wb') as gzip_file_writer:
            shutil.copyfileobj(file_reader, gzip_file_writer)
    return zipped_file_path