def __init__(self, src_dir): self.src_dir = src_dir self.files = {} self.dirs = [] self.uncompressed_size = 0 self._client = core_apis.GetClientInstance('storage', 'v1') self._messages = core_apis.GetMessagesModule('storage', 'v1') file_chooser = gcloudignore.GetFileChooserForDir(self.src_dir) for (dirpath, dirnames, filenames) in os.walk(self.src_dir): relpath = os.path.relpath(dirpath, self.src_dir) if not file_chooser.IsIncluded(relpath, is_dir=True): continue for fname in filenames: # Join file paths with Linux path separators, avoiding ./ prefix. # GCB workers are Linux VMs so os.path.join produces incorrect output. fpath = '/'.join([relpath, fname]) if relpath != '.' else fname if not file_chooser.IsIncluded(fpath): continue fm = FileMetadata(self.src_dir, fpath) self.files[fpath] = fm self.uncompressed_size += fm.size for dname in dirnames[:]: # Make a copy since we modify the original. # Join dir paths with Linux path separators, avoiding ./ prefix. # GCB workers are Linux VMs so os.path.join produces incorrect output. dpath = '/'.join([relpath, dname]) if relpath != '.' else dname if not file_chooser.IsIncluded(dpath, is_dir=True): dirnames.remove(dpath) # Don't recurse into dpath at all. continue self.dirs.append(dpath)
def _GetChooser(path): default_ignore_file = gcloudignore.DEFAULT_IGNORE_FILE + '\nnode_modules\n' return gcloudignore.GetFileChooserForDir( path, default_ignore_file=default_ignore_file, gcloud_ignore_creation_predicate=_GcloudIgnoreCreationPredicate)
def testGetFileChooserForDir_GcloudignoreFile(self): with _TempDir() as temp_path: self.Touch(temp_path, '.gcloudignore', contents='foo\n') self.Touch(temp_path, 'foo') self.Touch(temp_path, 'bar') file_chooser = gcloudignore.GetFileChooserForDir(temp_path) self.assertEqual(set(file_chooser.GetIncludedFiles(temp_path)), set(['.gcloudignore', 'bar']))
def testGetFileChooserForDir_DisableGcloudignore(self): properties.VALUES.gcloudignore.enabled.Set(False) with _TempDir() as temp_path: self.Touch(temp_path, '.gcloudignore', contents='foo\n') self.Touch(temp_path, 'foo') self.Touch(temp_path, 'bar') file_chooser = gcloudignore.GetFileChooserForDir(temp_path) self.assertEqual(set(file_chooser.GetIncludedFiles(temp_path)), set(['.gcloudignore', 'bar', 'foo']))
def testGetFileChooserForDir_NoIgnoreFiles(self): with _TempDir() as temp_path: self.Touch(temp_path, 'foo') self.Touch(temp_path, 'bar') file_chooser = gcloudignore.GetFileChooserForDir( temp_path, default_ignore_file='foo') self.assertTrue(file_chooser) self.assertEqual(set(file_chooser.GetIncludedFiles(temp_path)), set(['foo', 'bar']))
def _ImportStorageApi(gcs_bucket, source, destination): """Imports files and directories into a bucket.""" client = storage_api.StorageClient() old_source = source source = source.rstrip('*') # Source ends with an asterisk. This means the user indicates that the source # is a directory so we shouldn't bother trying to see if source is an object. # This is important because we always have certain subdirs created as objects # (e.g. dags/), so if we don't do this check, import/export will just try # and copy this empty object. object_is_subdir = old_source != source if not object_is_subdir: # If source is not indicated to be a subdir, then strip the ending slash # so the specified directory is present in the destination. source = source.rstrip(posixpath.sep) source_is_local = not source.startswith('gs://') if source_is_local and not os.path.exists(source): raise command_util.Error('Source for import does not exist.') # Don't include the specified directory as we want that present in the # destination bucket. source_dirname = _JoinPaths(os.path.dirname(source), '', gsutil_path=not source_is_local) if source_is_local: if os.path.isdir(source): file_chooser = gcloudignore.GetFileChooserForDir(source) for rel_path in file_chooser.GetIncludedFiles(source): file_path = _JoinPaths(source, rel_path) if os.path.isdir(file_path): continue dest_path = _GetDestPath(source_dirname, file_path, destination, False) obj_ref = storage_util.ObjectReference.FromBucketRef( gcs_bucket, dest_path) client.CopyFileToGCS(file_path, obj_ref) else: # Just upload the file. dest_path = _GetDestPath(source_dirname, source, destination, False) obj_ref = storage_util.ObjectReference.FromBucketRef( gcs_bucket, dest_path) client.CopyFileToGCS(source, obj_ref) else: source_ref = storage_util.ObjectReference.FromUrl(source) to_import = _GetObjectOrSubdirObjects( source_ref, object_is_subdir=object_is_subdir, client=client) for obj in to_import: dest_object = storage_util.ObjectReference.FromBucketRef( gcs_bucket, # Use obj.ToUrl() to ensure that the dirname is properly stripped. _GetDestPath(source_dirname, obj.ToUrl(), destination, False)) client.Copy(obj, dest_object)
def testGetFileChooserForDir_DontIncludeGitignore(self): with _TempDir() as temp_path: self.Touch(temp_path, 'foo') self.Touch(temp_path, 'bar') self.Touch(temp_path, '.gitignore', contents='bar\n') file_chooser = gcloudignore.GetFileChooserForDir( temp_path, default_ignore_file='\n'.join(['.gitignore', '.gcloudignore']), include_gitignore=False) self.assertEqual(set(file_chooser.GetIncludedFiles(temp_path)), set(['foo', 'bar']))
def testGetFileChooserForDir_Gitfiles(self): with _TempDir() as temp_path: self.Touch(os.path.join(temp_path, '.git'), 'git-metadata', makedirs=True) self.Touch(temp_path, 'foo') file_chooser = gcloudignore.GetFileChooserForDir(temp_path) self.assertEqual(set(file_chooser.GetIncludedFiles(temp_path)), set(['foo'])) self.assertTrue( os.path.exists(os.path.join(temp_path, '.gcloudignore')))
def testGetFileChooserForDir_GitignoreDoNotWrite(self): with _TempDir() as temp_path: self.Touch(os.path.join(temp_path, '.git'), 'git-metadata', makedirs=True) self.Touch(temp_path, 'foo') self.Touch(temp_path, 'bar') self.Touch(temp_path, '.gitignore', contents='foo') file_chooser = gcloudignore.GetFileChooserForDir( temp_path, write_on_disk=False) self.assertEqual(set(file_chooser.GetIncludedFiles(temp_path)), set(['bar'])) self.assertFalse( os.path.exists(os.path.join(temp_path, '.gcloudignore')))
def __init__(self, src_dir, ignore_file=None): self.src_dir = src_dir self.files = {} self.dirs = [] self.uncompressed_size = 0 self._client = core_apis.GetClientInstance('storage', 'v1') self._messages = core_apis.GetMessagesModule('storage', 'v1') file_chooser = gcloudignore.GetFileChooserForDir( self.src_dir, write_on_disk=False, ignore_file=ignore_file) self.any_files_ignored = False for (dirpath, dirnames, filenames) in os.walk(six.text_type(self.src_dir)): relpath = os.path.relpath(dirpath, self.src_dir) if (dirpath != self.src_dir and # don't ever ignore the main source dir! not file_chooser.IsIncluded(relpath, is_dir=True)): self.any_files_ignored = True continue for fname in filenames: path = os.path.join(relpath, fname) if os.path.islink(path) and not os.path.exists(path): # The file is a broken symlink; ignore it. log.info( 'Ignoring [{}] which is a symlink to non-existent path' .format(path)) continue # Join file paths with Linux path separators, avoiding ./ prefix. # GCB workers are Linux VMs so os.path.join produces incorrect output. fpath = '/'.join([relpath, fname]) if relpath != '.' else fname if not file_chooser.IsIncluded(fpath): self.any_files_ignored = True continue fm = FileMetadata(self.src_dir, fpath) self.files[fpath] = fm self.uncompressed_size += fm.size # NOTICE: Modifying dirnames is explicitly allowed by os.walk(). The # modified dirnames is used in the next loop iteration which is also # the next os.walk() iteration. for dname in dirnames[:]: # Make a copy since we modify the original. # Join dir paths with Linux path separators, avoiding ./ prefix. # GCB workers are Linux VMs so os.path.join produces incorrect output. dpath = '/'.join([relpath, dname]) if relpath != '.' else dname if not file_chooser.IsIncluded(dpath, is_dir=True): dirnames.remove(dname) # Don't recurse into dpath at all. continue self.dirs.append(dpath)
def testGetFileChooserForDir_GitignoreNotWritable(self): with _TempDir() as temp_path: self.Touch(os.path.join(temp_path, '.git'), 'git-metadata', makedirs=True) self.Touch(temp_path, 'foo') self.Touch(temp_path, 'bar') self.Touch(temp_path, '.gitignore', contents='foo') try: os.chmod(temp_path, 0o555) file_chooser = gcloudignore.GetFileChooserForDir(temp_path) finally: os.chmod(temp_path, 0o777) self.assertEqual(set(file_chooser.GetIncludedFiles(temp_path)), set(['bar'])) self.assertFalse( os.path.exists(os.path.join(temp_path, '.gcloudignore')))
def _GetSourceLocal(client, messages, region, function_name, source, stage_bucket_arg, ignore_file_arg): """Constructs a `Source` message from a local file system path. Args: client: The GCFv2 API client messages: messages module, the GCFv2 message stubs region: str, the region to deploy the function to function_name: str, the name of the function source: str, the path stage_bucket_arg: str, the passed in --stage-bucket flag argument ignore_file_arg: str, the passed in --ignore-file flag argument Returns: function_source: cloud.functions.v2main.Source """ with file_utils.TemporaryDirectory() as tmp_dir: zip_file_path = os.path.join(tmp_dir, 'fun.zip') chooser = gcloudignore.GetFileChooserForDir( source, default_ignore_file=_DEFAULT_IGNORE_FILE, gcloud_ignore_creation_predicate=_GcloudIgnoreCreationPredicate, ignore_file=ignore_file_arg) archive.MakeZipFromDir(zip_file_path, source, predicate=chooser.IsIncluded) if stage_bucket_arg: dest_object = _UploadToStageBucket(region, function_name, zip_file_path, stage_bucket_arg) return messages.Source(storageSource=messages.StorageSource( bucket=dest_object.bucket, object=dest_object.name)) else: dest = client.projects_locations_functions.GenerateUploadUrl( messages. CloudfunctionsProjectsLocationsFunctionsGenerateUploadUrlRequest( generateUploadUrlRequest=messages.GenerateUploadUrlRequest( ), parent='projects/%s/locations/%s' % (properties.VALUES.core.project.GetOrFail(), region))) _UploadToGeneratedUrl(zip_file_path, dest.uploadUrl) return messages.Source(storageSource=dest.storageSource)
def __init__(self, src_dir, ignore_file=None): self.src_dir = src_dir self.files = {} self.dirs = [] self.uncompressed_size = 0 file_chooser = gcloudignore.GetFileChooserForDir( self.src_dir, write_on_disk=False, ignore_file=ignore_file) self.any_files_ignored = False # Iterate over each directory in the source directory so that we can collect # only the unignored files and directories. for (dirpath, dirnames, filenames) in os.walk(six.text_type(self.src_dir)): relpath = os.path.relpath(dirpath, self.src_dir) for fname in filenames: path = os.path.join(relpath, fname) if os.path.islink(path) and not os.path.exists(path): # The file is a broken symlink; ignore it. log.info( 'Ignoring [{}] which is a symlink to non-existent path'.format( path)) continue fpath = os.path.join(relpath, fname) if relpath != '.' else fname if not file_chooser.IsIncluded(fpath): self.any_files_ignored = True continue fm = FileMetadata(self.src_dir, fpath) self.files[fpath] = fm self.uncompressed_size += fm.size # NOTICE: Modifying dirnames is explicitly allowed by os.walk(). The # modified dirnames is used in the next loop iteration which is also # the next os.walk() iteration. for dname in dirnames[:]: # Make a copy since we modify the original. dpath = os.path.join(relpath, dname) if relpath != '.' else dname if not file_chooser.IsIncluded(dpath, is_dir=True): dirnames.remove(dname) # Don't recurse into dpath at all. continue self.dirs.append(dpath)
def __init__(self, src_dir): self.src_dir = src_dir self.files = {} self.dirs = [] self.uncompressed_size = 0 self._client = core_apis.GetClientInstance('storage', 'v1') self._messages = core_apis.GetMessagesModule('storage', 'v1') file_chooser = gcloudignore.GetFileChooserForDir(self.src_dir, write_on_disk=False) self.any_files_ignored = False for (dirpath, dirnames, filenames) in os.walk(self.src_dir): relpath = os.path.relpath(dirpath, self.src_dir) if not file_chooser.IsIncluded(relpath, is_dir=True): self.any_files_ignored = True continue for fname in filenames: # Join file paths with Linux path separators, avoiding ./ prefix. # GCB workers are Linux VMs so os.path.join produces incorrect output. fpath = '/'.join([relpath, fname]) if relpath != '.' else fname if not file_chooser.IsIncluded(fpath): self.any_files_ignored = True continue fm = FileMetadata(self.src_dir, fpath) self.files[fpath] = fm self.uncompressed_size += fm.size # NOTICE: Modifying dirnames is explicitly allowed by os.walk(). The # modified dirnames is used in the next loop iteration which is also # the next os.walk() iteration. for dname in dirnames[:]: # Make a copy since we modify the original. # Join dir paths with Linux path separators, avoiding ./ prefix. # GCB workers are Linux VMs so os.path.join produces incorrect output. dpath = '/'.join([relpath, dname]) if relpath != '.' else dname if not file_chooser.IsIncluded(dpath, is_dir=True): dirnames.remove(dname) # Don't recurse into dpath at all. continue self.dirs.append(dpath)
def _GetChooser(path): default_ignore_file = gcloudignore.DEFAULT_IGNORE_FILE + '\nnode_modules\n' return gcloudignore.GetFileChooserForDir( path, default_ignore_file=default_ignore_file)
def GetSourceFiles(upload_dir, skip_files_regex, has_explicit_skip_files, runtime, environment, source_dir): """Returns an iterator for accessing all source files to be uploaded. This method uses several implementations based on the provided runtime and env. The rules are as follows, in decreasing priority: 1) For some runtimes/envs (i.e. those defined in _GCLOUDIGNORE_REGISTRY), we completely ignore skip_files and generate a runtime-specific .gcloudignore if one is not present, or use the existing .gcloudignore. 2) For all other runtimes/envs, we: 2a) Check for an existing .gcloudignore and use that if one exists. We also raise an error if the user has both a .gcloudignore file and explicit skip_files defined. 2b) If there is no .gcloudignore, we use the provided skip_files. Args: upload_dir: str, path to upload directory, the files to be uploaded. skip_files_regex: str, skip_files to use if necessary - see above rules for when this could happen. This can be either the user's explicit skip_files as defined in their app.yaml or the default skip_files we implicitly provide if they didn't define any. has_explicit_skip_files: bool, indicating whether skip_files_regex was explicitly defined by the user runtime: str, runtime as defined in app.yaml environment: env.Environment enum source_dir: str, path to original source directory, for writing generated files. May be the same as upload_dir. Raises: SkipFilesError: if you are using a runtime that no longer supports skip_files (such as those defined in _GCLOUDIGNORE_REGISTRY), or if using a runtime that still supports skip_files, but both skip_files and a. gcloudignore file are present. Returns: A list of path names of source files to be uploaded. """ gcloudignore_registry = _GetGcloudignoreRegistry() registry_entry = gcloudignore_registry.Get(runtime, environment) if registry_entry: if has_explicit_skip_files: raise SkipFilesError( 'skip_files cannot be used with the [{}] runtime. ' 'Ignore patterns are instead expressed in ' 'a .gcloudignore file. For information on the format and ' 'syntax of .gcloudignore files, see ' 'https://cloud.google.com/sdk/gcloud/reference/topic/gcloudignore.' .format(runtime)) file_chooser = gcloudignore.GetFileChooserForDir( source_dir, default_ignore_file=registry_entry, write_on_disk=True, gcloud_ignore_creation_predicate=lambda unused_dir: True, include_gitignore=False) it = file_chooser.GetIncludedFiles(upload_dir, include_dirs=False) elif os.path.exists(os.path.join(source_dir, gcloudignore.IGNORE_FILE_NAME)): if has_explicit_skip_files: raise SkipFilesError( 'Cannot have both a .gcloudignore file and skip_files defined in ' 'the same application. We recommend you translate your skip_files ' 'ignore patterns to your .gcloudignore file. See ' 'https://cloud.google.com/sdk/gcloud/reference/topic/gcloudignore ' 'for more information about gcloudignore.') it = gcloudignore.GetFileChooserForDir(source_dir).GetIncludedFiles( upload_dir, include_dirs=False) else: it = util.FileIterator(upload_dir, skip_files_regex) return list(it)
def Upload(self, branch, root_path, ignore_file=None): """Uploads files to a branch in Cloud Source Repositories. Args: branch: (string) The name of the branch to upload to. If empty, a name will be generated. root_path: (string) The path of a directory tree to upload. ignore_file: (string) The file overrides the `.gcloudignore` file and uses the specified file instead. Returns: A dictionary containing various status information: 'branch': The name of the branch. 'source_contexts': One or more dictionaries compatible with the ExtendedSourceContext message, including one context pointing to the upload. This context will be the only one with the value 'capture' for its 'category' label. 'files_written': The number of files uploaded. 'files_skipped': The number of files skipped. 'size_written': The total number of bytes in all files uploaded. """ try: sourcerepo.Source().GetRepo(sourcerepo.ParseRepo(UPLOAD_REPO_NAME)) except exceptions.HttpNotFoundError: raise RepoNotFoundError( REPO_NOT_FOUND_ERROR.format(UPLOAD_REPO_NAME, self._project_id)) file_chooser = gcloudignore.GetFileChooserForDir( root_path, write_on_disk=False, ignore_file=ignore_file) branch = branch or (_GetNow().strftime(TIME_FORMAT) + '.' + _GetUuid().hex) all_paths = [ os.path.join(root_path, f) for f in file_chooser.GetIncludedFiles(root_path, include_dirs=False) ] paths = [ f for f in all_paths if not os.path.islink(f) and os.path.getsize(f) <= self.SIZE_THRESHOLD ] git.Git(self._project_id, UPLOAD_REPO_NAME).ForcePushFilesToBranch( branch, root_path, sorted(paths)) source_context = { 'context': { 'cloudRepo': { 'repoId': { 'projectRepoId': { 'projectId': self._project_id, 'repoName': UPLOAD_REPO_NAME } }, 'aliasContext': { 'kind': 'MOVABLE', 'name': branch } } }, 'labels': { 'category': 'capture' } } return { 'branch': branch, 'source_contexts': [source_context], 'files_written': len(paths), 'files_skipped': len(all_paths) - len(paths), 'size_written': sum([os.path.getsize(f) for f in paths]) }
def Run(self, args): file_chooser = gcloudignore.GetFileChooserForDir(args.directory, write_on_disk=False) file_chooser = file_chooser or gcloudignore.FileChooser([]) return file_chooser.GetIncludedFiles(args.directory, include_dirs=False)