def AddFileToManifest(manifest_path, input_path): """Adds the given file to the current manifest. Args: manifest_path: The path to the file as it will be stored in the manifest. input_path: The location of the file to be added to the manifest. Returns: If the target was already in the manifest with different contexts, returns None. In all other cases, returns a target location to which the caller must copy, move, or link the file. """ file_ext = os.path.splitext(input_path)[1] sha1_hash = file_utils.Checksum().AddFileContents(input_path).HexDigest() target_filename = sha1_hash + file_ext target_path = os.path.join(staging_dir, target_filename) dest_path = '/'.join([bucket_url, target_filename]) old_url = manifest.get(manifest_path, {}).get('sourceUrl', '') if old_url and old_url != dest_path: return None manifest[manifest_path] = { 'sourceUrl': dest_path, 'sha1Sum': sha1_hash, } return target_path
def _GetYamlPath(source_dir, service_path, skip_files, gen_files): """Returns the yaml path, optionally updating gen_files. Args: source_dir: str, the absolute path to the root of the application directory. service_path: str, the absolute path to the service YAML file skip_files: appengine.api.Validation._RegexStr, the validated regex object from the service info file. gen_files: dict, the dict of files to generate. May be updated if a file needs to be generated. Returns: str, the relative path to the service YAML file that should be used for build. """ if files.IsDirAncestorOf(source_dir, service_path): rel_path = os.path.relpath(service_path, start=source_dir) if not util.ShouldSkip(skip_files, rel_path): return rel_path yaml_contents = files.ReadFileContents(service_path) # Use a checksum to ensure file uniqueness, not for security reasons. checksum = files.Checksum().AddContents(yaml_contents).HexDigest() generated_path = '_app_{}.yaml'.format(checksum) gen_files[generated_path] = yaml_contents return generated_path
def UploadFiles(upload_pairs, bucket_ref, gs_prefix=None): """Uploads files at the local path to a specifically prefixed location. The prefix is 'cloudmldist/<current timestamp>'. Args: upload_pairs: [(str, str)]. Pairs of absolute paths to local files to upload and corresponding path in Cloud Storage (that goes after the prefix). For example, ('/path/foo', 'bar') will upload '/path/foo' to '<prefix>/bar' in Cloud Storage. bucket_ref: storage_util.BucketReference. Files will be uploaded to this bucket. gs_prefix: str. Prefix to the GCS Path where files will be uploaded. Returns: [str]. A list of fully qualified gcs paths for the uploaded files, in the same order they were provided. """ checksum = file_utils.Checksum(algorithm=hashlib.sha256) for local_path, _ in upload_pairs: checksum.AddFileContents(local_path) if gs_prefix is not None: gs_prefix = '/'.join([gs_prefix, checksum.HexDigest()]) else: gs_prefix = checksum.HexDigest() storage_client = storage_api.StorageClient() dests = [] for local_path, uploaded_path in upload_pairs: obj_ref = storage_util.ObjectReference.FromBucketRef( bucket_ref, '/'.join([gs_prefix, uploaded_path])) obj = storage_client.CopyFileToGCS(local_path, obj_ref) dests.append('/'.join(['gs:/', obj.bucket, obj.name])) return dests
def GetSha(content): """A helper function to return the SHA of a string. Args: content: A string to get the SHA of. Returns: A string containing the hex digest of the SHA1 checksum of the content. """ return files.Checksum(algorithm=hashlib.sha1).AddContents( content.encode('utf-8')).HexDigest()
def _BuildStagingDirectory(source_dir, staging_dir, bucket, excluded_regexes): """Creates a staging directory to be uploaded to Google Cloud Storage. The staging directory will contain a symlink for each file in the original directory. The source is a file whose name is the sha1 hash of the original file and points to the original file. Consider the following original structure: app/ main.py tools/ foo.py Assume main.py has SHA1 hash 123 and foo.py has SHA1 hash 456. The resultant staging directory will look like: /tmp/staging/ 123 -> app/main.py 456 -> app/tools/foo.py (Note: "->" denotes a symlink) If the staging directory is then copied to a GCS bucket at gs://staging-bucket/ then the resulting manifest will be: { "app/main.py": { "sourceUrl": "https://storage.googleapis.com/staging-bucket/123", "sha1Sum": "123" }, "app/tools/foo.py": { "sourceUrl": "https://storage.googleapis.com/staging-bucket/456", "sha1Sum": "456" } } Args: source_dir: The original directory containing the application's source code. staging_dir: The directory where the staged files will be created. bucket: A URL to the Google Cloud Storage bucket where the files will be uploaded. excluded_regexes: List of file patterns to skip while building the staging directory. Returns: A dictionary which represents the file manifest. """ manifest = {} bucket_url = cloud_storage.GsutilReferenceToApiReference(bucket) for relative_path in util.FileIterator(source_dir, excluded_regexes, runtime=None): local_path = os.path.join(source_dir, relative_path) file_ext = os.path.splitext(local_path)[1] sha1_hash = file_utils.Checksum().AddFileContents(local_path).HexDigest() target_filename = sha1_hash + file_ext target_path = os.path.join(staging_dir, target_filename) if not os.path.exists(target_path): _CopyOrSymlink(local_path, target_path) dest_path = '/'.join([bucket_url.rstrip('/'), target_filename]) manifest[relative_path] = { 'sourceUrl': dest_path, # TODO(user) Actually send SHA1 hash in payload. Currently # the server doesn't provide enough information to debug this. # 'sha1Sum': sha1_hash, } log.debug('Generated deployment manifest: "{0}"'.format( json.dumps(manifest, indent=2, sort_keys=True))) return manifest
def BuildAndPushDockerImage( project, service, source_dir, version_id, code_bucket_ref, gcr_domain, runtime_builder_strategy=runtime_builders.RuntimeBuilderStrategy.NEVER ): """Builds and pushes a set of docker images. Args: project: str, The project being deployed to. service: ServiceYamlInfo, The parsed service config. source_dir: str, path to the service's source directory version_id: The version id to deploy these services under. code_bucket_ref: The reference to the GCS bucket where the source will be uploaded. gcr_domain: str, Cloud Registry domain, determines the physical location of the image. E.g. `us.gcr.io`. runtime_builder_strategy: runtime_builders.RuntimeBuilderStrategy, whether to use the new CloudBuild-based runtime builders (alternative is old externalized runtimes). Returns: str, The name of the pushed container image. Raises: DockerfileError: if a Dockerfile is present, but the runtime is not "custom". NoDockerfileError: Raised if a user didn't supply a Dockerfile and chose a custom runtime. UnsatisfiedRequirementsError: Raised if the code in the directory doesn't satisfy the requirements of the specified runtime type. """ needs_dockerfile = _NeedsDockerfile(service, source_dir) use_runtime_builders = runtime_builder_strategy.ShouldUseRuntimeBuilders( service.runtime, needs_dockerfile) # Nothing to do if this is not an image-based deployment. if not service.RequiresImage(): return None log.status.Print( 'Building and pushing image for service [{service}]'.format( service=service.module)) gen_files = dict(_GetSourceContextsForUpload(source_dir)) if needs_dockerfile and not use_runtime_builders: # The runtime builders will generate a Dockerfile in the Cloud, so we only # need to do this if use_runtime_builders is True gen_files.update(_GetDockerfiles(service, source_dir)) image = docker_image.Image(dockerfile_dir=source_dir, repo=_GetImageName(project, service.module, version_id, gcr_domain), nocache=False, tag=config.DOCKER_IMAGE_TAG) metrics.CustomTimedEvent(metric_names.CLOUDBUILD_UPLOAD_START) object_ref = storage_util.ObjectReference(code_bucket_ref, image.tagged_repo) if files.IsDirAncestorOf(source_dir, service.file): relative_yaml_path = os.path.relpath(service.file, source_dir) else: yaml_contents = files.GetFileContents(service.file) checksum = files.Checksum().AddContents(yaml_contents).HexDigest() relative_yaml_path = checksum + '.yaml' gen_files[relative_yaml_path] = yaml_contents try: cloud_build.UploadSource(image.dockerfile_dir, object_ref, gen_files=gen_files, skip_files=service.parsed.skip_files.regex) except (OSError, IOError) as err: if platforms.OperatingSystem.IsWindows(): if err.filename and len(err.filename) > _WINDOWS_MAX_PATH: raise WindowMaxPathError(err.filename) raise metrics.CustomTimedEvent(metric_names.CLOUDBUILD_UPLOAD) if use_runtime_builders: builder_reference = runtime_builders.FromServiceInfo( service, source_dir) log.info('Using runtime builder [%s]', builder_reference.build_file_uri) builder_reference.WarnIfDeprecated() yaml_path = posixpath.join(*relative_yaml_path.split(os.sep)) build = builder_reference.LoadCloudBuild({ '_OUTPUT_IMAGE': image.tagged_repo, '_GAE_APPLICATION_YAML_PATH': yaml_path }) # TODO(b/37542869) Remove this hack once the API can take the gs:// path # as a runtime name. service.runtime = builder_reference.runtime service.parsed.SetEffectiveRuntime(builder_reference.runtime) else: build = cloud_build.GetDefaultBuild(image.tagged_repo) metrics.CustomTimedEvent(metric_names.CLOUDBUILD_EXECUTE_START) cloudbuild_build.CloudBuildClient().ExecuteCloudBuild( cloud_build.FixUpBuild(build, object_ref), project=project) metrics.CustomTimedEvent(metric_names.CLOUDBUILD_EXECUTE) return image.tagged_repo
def _GetSha1(input_path): return file_utils.Checksum().AddFileContents(input_path).HexDigest()