def PrepareToUpload(local_root, dest_dir): """Check necessary conditions required for uploading. Args: local_root: The local directory containing directories to upload. dest_dir: The remote directory to upload to. Returns: existing_dest_paths: A set of filenames for existing files in the destination directory. gs_api: The gsutil.GsutilApi object. Raises: BadTimeToUploadError: Internet is not available. ValueError: local_root is not a valid path. """ if not os.path.isdir(local_root): raise ValueError('Cannot find local directory %s.' % local_root) if not HasInternet(): raise BadTimeToUploadError('No internet connection detected.') gs_api = gsutil.GsutilApi() try: existing_dest_paths = set(gs_api.List(dest_dir)) except httplib2.ServerNotFoundError: raise BadTimeToUploadError('Internet has become unavailable.') return existing_dest_paths, gs_api
def RemoveRemoteFiles(): gs_api = gsutil.GsutilApi() filenames = gs_api.List( gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH)) for filename in filenames: gs_api.DeleteFile(filename) self.assertFalse( gs_api.List(gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH)))
def __init__(self, folder, base_params): self._folder = folder self.label = 'Wind Database' # Pull down info from the cloud about files in this database set. gsutil_api = gsutil.GsutilApi() cloud_path = os.path.join( 'gs://gcp-public-data-makani-deps/deps/turbsim_databases', folder, 'h5_files') self._databases = gsutil_api.List(cloud_path) self._base_params = base_params
def DownloadDatabase(database_file, local_path): """Downloads TurbSim file to the specified local directory, if needed.""" if os.path.exists(local_path): print 'TurbSim database already exists locally, skipping download.' else: gsutil_api = gsutil.GsutilApi() # TODO: Set up a class for handling more autoselection tasks so # that we don't have to pass as much back and forth, # or requery the folder list? cloud_path = os.path.join(GetCloudBasePath(), GetOnlineFolder(database_file), 'h5_files', database_file) gsutil_api.Copy(cloud_path, local_path, False)
def GetOnlineFolder(database_file): """Extracts source id of the file and returns matching folder name.""" assert CheckTurbsimFileName(database_file), ( 'File name does not follow TurbSim naming convention') online_folder_id = ('-' + database_file[16:19] + '-') gsutil_api = gsutil.GsutilApi() online_paths = gsutil_api.List(GetCloudBasePath()) online_folder = [ os.path.basename(os.path.normpath(path)) for path in online_paths if online_folder_id in path ] assert len(online_folder) == 1, ( 'Turbsim online folder identification failed.') return online_folder[0]
def AssertRemoteFiles(): gs_api = gsutil.GsutilApi() filenames = gs_api.List( gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH)) prefix = os.path.join(CLOUD_BASE_DIR, CLOUD_LOG_PATH) self.assertEqual( set(filenames), { os.path.join(prefix, 'w7/logs/logs1-folder/subfolder/dummy.json'), os.path.join( prefix, 'w7/logs/logs1-folder/another_subfolder/dummy.json'), os.path.join( prefix, 'w7/logs/logs1-folder/one_more_subfolder/dummy.json'), os.path.join(prefix, 'w7/logs/w7-2013.h5'), os.path.join(prefix, 'M600A/logs/m600.h5'), os.path.join(prefix, 'M600A/logs/dummy.json'), os.path.join(prefix, 'w7/logs/logs1-folder/dummy.json'), os.path.join(prefix, 'w7/logs/logs1-folder/dummy.txt'), })
def IterFilesFromCloud(path_prefix, regex_str): """Iterate through files in a cloud path recursively. The function downloads matching files in the cloud one by one. Downloaded files are removed after each iteration. Args: path_prefix: Path, or prefix, to the cloud storage. E.g., gs://bucket/dir/. regex_str: Regular expression to match the file from the beginning. Yields: The full cloud path to the file, and the path to the downloaded file. """ gs_api = gsutil.GsutilApi() filenames = [ f for f in gs_api.List(path_prefix) if f.endswith('.h5') and not f.endswith('-format.h5') ] if regex_str: regex = re.compile(regex_str) filenames = [ f for f in filenames if regex.match(gcloud_util.GcsBasename(f)) ] if not filenames: print 'Found no files matching the criteria.' return for full_cloud_path in sorted(filenames): temp_fp = tempfile.NamedTemporaryFile(suffix='.h5', delete=False) temp_fp.close() print '------------------- %s ------------' % full_cloud_path print 'Downloading %s...' % full_cloud_path gs_api.Copy(full_cloud_path, temp_fp.name, overwrite=True) yield (full_cloud_path, temp_fp.name) os.remove(temp_fp.name)
def AssertNoRemoteFiles(): gs_api = gsutil.GsutilApi() filenames = gs_api.List( gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH)) self.assertEqual(filenames, [])