def _sync_metadata(echo, metadata, datastore_root, attempt): if metadata.TYPE == 'local': def echo_none(*args, **kwargs): pass path = os.path.join( datastore_root, MetaflowDataStore.filename_with_attempt_prefix( 'metadata.tgz', attempt)) url = urlparse(path) bucket = url.netloc key = url.path.lstrip('/') s3, err = get_s3_client() try: s3.head_object(Bucket=bucket, Key=key) # If we are here, we can download the object with util.TempDir() as td: tar_file_path = os.path.join(td, 'metadata.tgz') with open(tar_file_path, 'wb') as f: s3.download_fileobj(bucket, key, f) with tarfile.open(tar_file_path, 'r:gz') as tar: tar.extractall(td) copy_tree( os.path.join(td, DATASTORE_LOCAL_DIR), LocalDataStore.get_datastore_root_from_config(echo_none), update=True) except err as e: # noqa F841 pass
def task_finished(self, step_name, flow, graph, is_task_ok, retry_count, max_retries): if self.ds_root: # We have a local metadata service so we need to persist it to the datastore. # Note that the datastore is *always* s3 (see runtime_task_created function) with util.TempDir() as td: tar_file_path = os.path.join(td, 'metadata.tgz') with tarfile.open(tar_file_path, 'w:gz') as tar: # The local metadata is stored in the local datastore # which, for batch jobs, is always the DATASTORE_LOCAL_DIR tar.add(DATASTORE_LOCAL_DIR) # At this point we upload what need to s3 s3, _ = get_s3_client() with open(tar_file_path, 'rb') as f: path = os.path.join( self.ds_root, MetaflowDataStore.filename_with_attempt_prefix( 'metadata.tgz', retry_count)) url = urlparse(path) s3.upload_fileobj(f, url.netloc, url.path.lstrip('/'))