Exemple #1
0
def _GetIncludedPaths(source_dir, exclude, skip_files=None):
  """Helper function to filter paths in root using dockerignore and skip_files.

  We iterate separately to filter on skip_files in order to preserve expected
  behavior (standard deployment skips directories if they contain only files
  ignored by skip_files).

  Args:
    source_dir: the path to the root directory.
    exclude: the .dockerignore file exclusions.
    skip_files: the regex for files to skip. If None, only dockerignore is used
        to filter.

  Returns:
    Set of paths (relative to source_dir) to include.
  """
  # See docker.utils.tar
  root = os.path.abspath(source_dir)
  # Get set of all paths other than exclusions from dockerignore.
  paths = docker.utils.exclude_paths(root, exclude)
  # Also filter on the ignore regex from the app.yaml.
  if skip_files:
    included_paths = set(util.FileIterator(source_dir, skip_files))
    # FileIterator replaces all path separators with '/', so reformat
    # the results to compare with the first set.
    included_paths = {
        p.replace('/', os.path.sep) for p in included_paths}
    paths.intersection_update(included_paths)
  return paths
Exemple #2
0
def _GetIncludedPaths(source_dir, exclude, skip_files=None):
    """Helper function to filter paths in root using dockerignore and skip_files.

  We iterate separately to filter on skip_files in order to preserve expected
  behavior (standard deployment skips directories if they contain only files
  ignored by skip_files).

  Args:
    source_dir: the path to the root directory.
    exclude: the .dockerignore file exclusions.
    skip_files: the regex for files to skip. If None, only dockerignore is used
        to filter.

  Returns:
    Set of paths (relative to source_dir) to include.
  """
    # This code replicates how docker.utils.tar() finds the root
    # and excluded paths.
    root = os.path.abspath(source_dir)
    # Get set of all paths other than exclusions from dockerignore.
    paths = docker.utils.exclude_paths(root, exclude)
    # Also filter on the ignore regex from the app.yaml.
    if skip_files:
        included_paths = set(util.FileIterator(source_dir, skip_files))
        paths.intersection_update(included_paths)
    return paths
 def testUpload(self):
     """Test basic upload with single file."""
     tmp = self.CreateTempDir()
     self.WriteFile(os.path.join(tmp, 'Dockerfile'), 'empty')
     self._ExpectUpload()
     cloud_build.UploadSource(tmp, util.FileIterator(tmp, self.re),
                              self.object_ref)
 def testFailure(self):
     """Test HttpError raises to user."""
     tmp = self.CreateTempDir()
     self.WriteFile(os.path.join(tmp, 'Dockerfile'), 'empty')
     self._ExpectUpload(exception=http_error.MakeHttpError())
     with self.assertRaises(storage_api.UploadError):
         cloud_build.UploadSource(tmp, util.FileIterator(tmp, self.re),
                                  self.object_ref)
 def testUploadWithGenFiles(self):
     """Test that generated files passed to UploadSource don't raise error."""
     tmp = self.CreateTempDir()
     self.WriteFile(os.path.join(tmp, 'main.py'), 'empty')
     gen_files = {'Dockerfile': 'empty'}
     self._ExpectUpload()
     cloud_build.UploadSource(tmp,
                              util.FileIterator(tmp, self.re),
                              self.object_ref,
                              gen_files=gen_files)
def _BuildDeploymentManifest(info, source_dir, bucket_ref, tmp_dir):
    """Builds a deployment manifest for use with the App Engine Admin API.

  Args:
    info: An instance of yaml_parsing.ServiceInfo.
    source_dir: str, path to the service's source directory
    bucket_ref: The reference to the bucket files will be placed in.
    tmp_dir: A temp directory for storing generated files (currently just source
        context files).
  Returns:
    A deployment manifest (dict) for use with the Admin API.
  """
    excluded_files_regex = info.parsed.skip_files.regex
    manifest = {}
    bucket_url = 'https://storage.googleapis.com/{0}'.format(bucket_ref.bucket)

    # Normal application files.
    for rel_path in util.FileIterator(source_dir, excluded_files_regex):
        full_path = os.path.join(source_dir, rel_path)
        sha1_hash = file_utils.Checksum.HashSingleFile(full_path,
                                                       algorithm=hashlib.sha1)
        manifest_path = '/'.join([bucket_url, sha1_hash])
        manifest[_FormatForManifest(rel_path)] = {
            'sourceUrl': manifest_path,
            'sha1Sum': sha1_hash
        }

    # Source context files. These are temporary files which indicate the current
    # state of the source repository (git, cloud repo, etc.)
    context_files = context_util.CreateContextFiles(tmp_dir,
                                                    None,
                                                    source_dir=source_dir)
    for context_file in context_files:
        rel_path = os.path.basename(context_file)
        if rel_path in manifest:
            # The source context file was explicitly provided by the user.
            log.debug(
                'Source context already exists. Using the existing file.')
            continue
        else:
            sha1_hash = file_utils.Checksum.HashSingleFile(
                context_file, algorithm=hashlib.sha1)
            manifest_path = '/'.join([bucket_url, sha1_hash])
            manifest[_FormatForManifest(rel_path)] = {
                'sourceUrl': manifest_path,
                'sha1Sum': sha1_hash,
            }
    return manifest
Exemple #7
0
def _BuildDeploymentManifest(info, bucket_ref, source_contexts,
                             context_dir):
  """Builds a deployment manifest for use with the App Engine Admin API.

  Args:
    info: An instance of yaml_parsing.ServiceInfo.
    bucket_ref: The reference to the bucket files will be placed in.
    source_contexts: A list of source context files.
    context_dir: A temp directory to place the source context files in.
  Returns:
    A deployment manifest (dict) for use with the Admin API.
  """
  source_dir = os.path.dirname(info.file)
  excluded_files_regex = info.parsed.skip_files.regex
  manifest = {}
  bucket_url = 'https://storage.googleapis.com/{0}'.format(bucket_ref.bucket)

  # Normal application files.
  for rel_path in util.FileIterator(source_dir, excluded_files_regex):
    full_path = os.path.join(source_dir, rel_path)
    sha1_hash = _GetSha1(full_path)
    manifest_path = '/'.join([bucket_url, sha1_hash])
    manifest[rel_path] = {
        'sourceUrl': manifest_path,
        'sha1Sum': sha1_hash
    }

  # Source context files.
  context_files = context_util.CreateContextFiles(
      context_dir, source_contexts, overwrite=True, source_dir=source_dir)
  for context_file in context_files:
    rel_path = os.path.basename(context_file)
    if rel_path in manifest:
      # The source context file was explicitly provided by the user.
      log.debug('Source context already exists. Skipping creation.')
      continue
    else:
      sha1_hash = _GetSha1(context_file)
      manifest_path = '/'.join([bucket_url, sha1_hash])
      manifest[rel_path] = {
          'sourceUrl': manifest_path,
          'sha1Sum': sha1_hash
      }
  return manifest
 def testUploadWithGeneratedDockerignore(self):
     """Test that UploadSource correctly interprets generated .dockerignore."""
     tmp = self.CreateTempDir()
     create_tar_mock = self.StartObjectPatch(cloud_build, '_CreateTar')
     create_tar_mock.return_value = 1
     self.WriteFile(os.path.join(tmp, 'Dockerfile'), 'empty')
     self.WriteFile(os.path.join(tmp, 'main.py'), 'empty')
     self.WriteFile(os.path.join(tmp, 'fake.zip'), 'Dummy')
     os.mkdir(os.path.join(tmp, 'tmpsubdir'))
     self.WriteFile(os.path.join(tmp, 'tmpsubdir', 'fake2.zip'), 'Dummy')
     gen_files = {'.dockerignore': 'main.py'}
     self._ExpectUpload()
     cloud_build.UploadSource(tmp, util.FileIterator(tmp, self.re),
                              self.object_ref, gen_files)
     # Test that _CreateTar was called with the correct directory, files, and
     # exclusions
     create_tar_mock.assert_called_once_with(
         tmp, gen_files,
         {'Dockerfile', 'fake.zip',
          os.path.join('tmpsubdir', 'fake2.zip')}, mock.ANY)
    def _AddFilesThatAreSmallEnough(self):
        """Calls self.AddFile on files that are small enough.

    By small enough, we mean that their size is within
    self.resource_limits['max_file_size'] for application files, and
    'max_blob_size' otherwise. Files that are too large are logged as errors,
    and dropped (not sure why this isn't handled by raising an exception...).
    """
        log.debug('Scanning files on local disk.')

        module_yaml_dirname = os.path.dirname(self.module_yaml_path)
        paths = util.FileIterator(module_yaml_dirname,
                                  self.module_yaml.skip_files,
                                  self.module_yaml.runtime)
        num_files = 0
        for path in paths:
            with open(os.path.join(module_yaml_dirname, path),
                      'rb') as file_handle:
                file_length = GetFileLength(file_handle)

                # Get maximum length that the file may be.
                file_classification = FileClassification(
                    self.module_yaml, path)
                if file_classification.IsApplicationFile():
                    max_size = self.resource_limits['max_file_size']
                else:
                    max_size = self.resource_limits['max_blob_size']

                # Handle whether the file is too big.
                if file_length > max_size:
                    log.error('Ignoring file [{0}]: Too long '
                              '(max {1} bytes, file is {2} bytes).'.format(
                                  path, max_size, file_length))
                else:
                    log.info('Processing file [{0}]'.format(path))
                    self.AddFile(path, file_handle)

            # Occassionally, indicate that progress is being made.
            num_files += 1
            if num_files % 500 == 0:
                log.debug('Scanned {0} files.'.format(num_files))
def _GetIncludedPaths(source_dir, exclude, skip_files=None):
    """Helper function to filter paths in root using dockerignore and skip_files.

  We iterate separately to filter on skip_files in order to preserve expected
  behavior (standard deployment skips directories if they contain only files
  ignored by skip_files).

  Args:
    source_dir: the path to the root directory.
    exclude: the .dockerignore file exclusions.
    skip_files: the regex for files to skip. If None, only dockerignore is used
        to filter.

  Returns:
    Set of paths (relative to source_dir) to include.
  """
    # See docker.utils.tar
    root = os.path.abspath(source_dir)
    paths = docker.utils.exclude_paths(root, exclude)
    # Filter on the ignore regex
    if skip_files:
        paths &= set(util.FileIterator(source_dir, skip_files))
    return paths
Exemple #11
0
def _BuildStagingDirectory(source_dir, staging_dir, bucket_ref,
                           excluded_regexes):
  """Creates a staging directory to be uploaded to Google Cloud Storage.

  The staging directory will contain a symlink for each file in the original
  directory. The source is a file whose name is the sha1 hash of the original
  file and points to the original file.

  Consider the following original structure:
    app/
      main.py
      tools/
        foo.py
   Assume main.py has SHA1 hash 123 and foo.py has SHA1 hash 456. The resultant
   staging directory will look like:
     /tmp/staging/
       123 -> app/main.py
       456 -> app/tools/foo.py
   (Note: "->" denotes a symlink)

   If the staging directory is then copied to a GCS bucket at
   gs://staging-bucket/ then the resulting manifest will be:
     {
       "app/main.py": {
         "sourceUrl": "https://storage.googleapis.com/staging-bucket/123",
         "sha1Sum": "123"
       },
       "app/tools/foo.py": {
         "sourceUrl": "https://storage.googleapis.com/staging-bucket/456",
         "sha1Sum": "456"
       }
     }

  Args:
    source_dir: The original directory containing the application's source
      code.
    staging_dir: The directory where the staged files will be created.
    bucket_ref: A reference to the GCS bucket where the files will be uploaded.
    excluded_regexes: List of file patterns to skip while building the staging
      directory.

  Raises:
    LargeFileError: if one of the files to upload exceeds the maximum App Engine
    file size.

  Returns:
    A dictionary which represents the file manifest.
  """
  manifest = {}
  bucket_url = bucket_ref.ToAppEngineApiReference()

  def AddFileToManifest(manifest_path, input_path):
    """Adds the given file to the current manifest.

    Args:
      manifest_path: The path to the file as it will be stored in the manifest.
      input_path: The location of the file to be added to the manifest.
    Returns:
      If the target was already in the manifest with different contexts,
      returns None. In all other cases, returns a target location to which the
      caller must copy, move, or link the file.
    """
    file_ext = os.path.splitext(input_path)[1]
    sha1_hash = file_utils.Checksum().AddFileContents(input_path).HexDigest()

    target_filename = sha1_hash + file_ext
    target_path = os.path.join(staging_dir, target_filename)

    dest_path = '/'.join([bucket_url, target_filename])
    old_url = manifest.get(manifest_path, {}).get('sourceUrl', '')
    if old_url and old_url != dest_path:
      return None
    manifest[manifest_path] = {
        'sourceUrl': dest_path,
        'sha1Sum': sha1_hash,
    }
    return target_path

  for relative_path in util.FileIterator(source_dir, excluded_regexes):
    local_path = os.path.join(source_dir, relative_path)
    size = os.path.getsize(local_path)
    if size > _MAX_FILE_SIZE:
      raise LargeFileError(local_path, size, _MAX_FILE_SIZE)
    target_path = AddFileToManifest(relative_path, local_path)
    if not os.path.exists(target_path):
      _CopyOrSymlink(local_path, target_path)

  context_files = context_util.CreateContextFiles(
      staging_dir, None, overwrite=True, source_dir=source_dir)
  for context_file in context_files:
    manifest_path = os.path.basename(context_file)
    target_path = AddFileToManifest(manifest_path, context_file)
    if not target_path:
      log.status.Print('Not generating {0} because a user-generated '
                       'file with the same name exists.'.format(manifest_path))
    if not target_path or os.path.exists(target_path):
      # If we get here, it probably means that the user already generated the
      # context file manually and put it either in the top directory or in some
      # subdirectory. The new context file is useless and may confuse later
      # stages of the upload (it is in the staging directory with a
      # nonconformant name), so delete it. The entry in the manifest will point
      # at the existing file.
      os.remove(context_file)
    else:
      # Rename the source-context*.json file (which is in the staging directory)
      # to the hash-based name in the same directory.
      os.rename(context_file, target_path)

  log.debug('Generated deployment manifest: "{0}"'.format(
      json.dumps(manifest, indent=2, sort_keys=True)))
  return manifest
Exemple #12
0
def GetSourceFiles(upload_dir, skip_files_regex, has_explicit_skip_files,
                   runtime, environment, source_dir):
    """Returns an iterator for accessing all source files to be uploaded.

  This method uses several implementations based on the provided runtime and
  env. The rules are as follows, in decreasing priority:
  1) For some runtimes/envs (i.e. those defined in _GCLOUDIGNORE_REGISTRY), we
     completely ignore skip_files and generate a runtime-specific .gcloudignore
     if one is not present, or use the existing .gcloudignore.
  2) For all other runtimes/envs, we:
    2a) Check for an existing .gcloudignore and use that if one exists. We also
        raise an error if the user has both a .gcloudignore file and explicit
        skip_files defined.
    2b) If there is no .gcloudignore, we use the provided skip_files.

  Args:
    upload_dir: str, path to upload directory, the files to be uploaded.
    skip_files_regex: str, skip_files to use if necessary - see above rules for
      when this could happen. This can be either the user's explicit skip_files
      as defined in their app.yaml or the default skip_files we implicitly
      provide if they didn't define any.
    has_explicit_skip_files: bool, indicating whether skip_files_regex was
      explicitly defined by the user
    runtime: str, runtime as defined in app.yaml
    environment: env.Environment enum
    source_dir: str, path to original source directory, for writing generated
      files. May be the same as upload_dir.

  Raises:
    SkipFilesError: if you are using a runtime that no longer supports
      skip_files (such as those defined in _GCLOUDIGNORE_REGISTRY), or if using
      a runtime that still supports skip_files, but both skip_files and
      a. gcloudignore file are present.

  Returns:
    A list of path names of source files to be uploaded.
  """
    gcloudignore_registry = _GetGcloudignoreRegistry()
    registry_entry = gcloudignore_registry.Get(runtime, environment)

    if registry_entry:
        if has_explicit_skip_files:
            raise SkipFilesError(
                'skip_files cannot be used with the [{}] runtime. '
                'Ignore patterns are instead expressed in '
                'a .gcloudignore file. For information on the format and '
                'syntax of .gcloudignore files, see '
                'https://cloud.google.com/sdk/gcloud/reference/topic/gcloudignore.'
                .format(runtime))
        file_chooser = gcloudignore.GetFileChooserForDir(
            source_dir,
            default_ignore_file=registry_entry,
            write_on_disk=True,
            gcloud_ignore_creation_predicate=lambda unused_dir: True,
            include_gitignore=False)
        it = file_chooser.GetIncludedFiles(upload_dir, include_dirs=False)
    elif os.path.exists(os.path.join(source_dir,
                                     gcloudignore.IGNORE_FILE_NAME)):
        if has_explicit_skip_files:
            raise SkipFilesError(
                'Cannot have both a .gcloudignore file and skip_files defined in '
                'the same application. We recommend you translate your skip_files '
                'ignore patterns to your .gcloudignore file. See '
                'https://cloud.google.com/sdk/gcloud/reference/topic/gcloudignore '
                'for more information about gcloudignore.')
        it = gcloudignore.GetFileChooserForDir(source_dir).GetIncludedFiles(
            upload_dir, include_dirs=False)
    else:
        it = util.FileIterator(upload_dir, skip_files_regex)
    return list(it)