def stream(self): url = self.get_url() if url is None: return None # pylint: disable=consider-using-with # if our url is actually a local file reference # (can happen in local execution mode) # then we just open a local file: if self._looks_like_local_file(url): fileobj = open(url, 'rb') else: fileobj = urlopen(url) if fileobj: try: retval = tarfile.open(fileobj=fileobj, mode='r|*') return retval except BaseException as exc: util.check_for_kb_interrupt() fileobj.close() msg: str = 'FAILED to stream artifact {0}: {1}'.format( url, exc) util.report_fatal(msg, self.logger) return None
def _download_no_key_artifact(self): if self.is_mutable: self.logger.info("Downloading mutable artifact: %s", self.name) if self.remote_path is None: msg: str =\ "CANNOT download artifact without remote path: {0}"\ .format(self.name) util.report_fatal(msg, self.logger) key = self._generate_key() local_path = artifacts_tracker.get_blob_cache(key) local_path =\ self._get_target_local_path(local_path, self.remote_path) if os.path.exists(local_path): msg: str = ('Immutable artifact exists at local_path {0},' + ' skipping the download').format(local_path) self.logger.debug(msg) self.local_path = local_path return local_path if self.storage_handler.type == StorageType.storageDockerHub or \ self.storage_handler.type == StorageType.storageSHub: msg: str = ('Qualified {0} points to a shub or dockerhub,' + ' skipping the download').format(self.remote_path) self.logger.debug(msg) return self.remote_path self.storage_handler.download_remote_path(self.remote_path, local_path) self.logger.debug('Downloaded file %s from external source %s', local_path, self.remote_path) self.local_path = local_path #self.key = key return self.local_path
def _tar_artifact_single_file(local_path: str, tar_filename: str, key, logger): tf = None try: debug_str: str = ("Tarring artifact single file. " + "tar_filename = {0}, " + "local_path = {1}, " + "key = {2}").format(tar_filename, local_path, key) logger.debug(debug_str) tf = tarfile.open(tar_filename, 'w') _, last_name = os.path.split(local_path) tf.add(local_path, "./" + last_name) except Exception as exc: msg: str =\ "FAILED to create tarfile: {0} for artifact {1} reason: {2}"\ .format(tar_filename, local_path, exc) util.report_fatal(msg, logger) finally: if tf is not None: tf.close()
def _has_newer_artifact(self, local_path) -> bool: self.logger.debug( 'Comparing date of the artifact %s in storage with local %s', self.key, local_path) storage_time = self.storage_handler.get_file_timestamp(self.key) local_time = os.path.getmtime(local_path) if storage_time is None: msg: str = \ ("Unable to get storage timestamp for {0}, storage is either " + \ "corrupted or has not finished uploading").format(self.key) util.report_fatal(msg, self.logger) return False timestamp_shift = self.storage_handler.get_timestamp_shift() if local_time > storage_time - timestamp_shift: self.logger.debug( "Local path %s is younger than stored %s, skipping the download", local_path, self.key) return False return True
def _tar_artifact_directory(local_path: str, tar_filename: str, key, ignore_filepath, logger): tf = None try: debug_str: str = ("Tarring artifact directory. " + "tar_filename = {0}, " + "local_path = {1}, " + "key = {2}").format(tar_filename, local_path, key) if ignore_filepath is not None: debug_str += ", exclude = {0}".format(ignore_filepath) logger.debug(debug_str) tf = tarfile.open(tar_filename, 'w') files_list = os.listdir(local_path) for file_name in files_list: tf.add(os.path.join(local_path, file_name), arcname=file_name) except Exception as exc: msg: str =\ "FAILED to create tarfile: {0} for artifact {1} reason: {2}"\ .format(tar_filename, local_path, exc) util.report_fatal(msg, logger) finally: if tf is not None: tf.close()
def upload(self, local_path=None): if self.storage_handler is None: msg: str = "No storage handler is set for artifact {0}"\ .format(self.key) util.report_fatal(msg, self.logger) if local_path is None: local_path = self.local_path if self.in_blobstore: msg: str = ('Artifact with key {0} exists in blobstore, ' + 'skipping the upload').format(self.key) self.logger.debug(msg) return self.key if os.path.exists(local_path): tar_filename =\ tar_artifact(local_path, self.key, self.get_compression(), self.logger) if self.key is None: self.key = 'blobstore/' + util.sha256_checksum(tar_filename) \ + '.tar' + util.compression_to_extension(self.get_compression()) time_stamp = self.storage_handler.get_file_timestamp(self.key) if time_stamp is not None: self.logger.debug( 'Artifact with key %s exists in blobstore, skipping the upload', self.key) os.remove(tar_filename) return self.key self.storage_handler.upload_file(self.key, tar_filename) os.remove(tar_filename) return self.key self.logger.debug( "Local path %s does not exist. Not uploading anything.", local_path) return None
def download(self, local_path=None, only_newer=True): if self.storage_handler is None: msg: str = "No storage handler is set for artifact {0}" \ .format(self.key) util.report_fatal(msg, self.logger) if self.key is None: return self._download_no_key_artifact() if local_path is None: if self.local_path is not None and \ os.path.exists(self.local_path): local_path = self.local_path else: if self.is_mutable: local_path = artifacts_tracker.get_artifact_cache(self.key) else: local_path = artifacts_tracker.get_blob_cache(self.key) if os.path.exists(local_path): msg: str = ( 'Immutable artifact exists at local_path {0},' + ' skipping the download').format(local_path) self.logger.debug(msg) self.local_path = local_path return local_path local_path = re.sub(r'\/\Z', '', local_path) self.logger.debug( "Downloading dir %s to local path %s from studio.storage...", self.key, local_path) if only_newer and os.path.exists(local_path): if not self._has_newer_artifact(local_path): return local_path # Now download our artifact from studio.storage and untar it: return self._download_and_untar_artifact(local_path)
def __init__(self, cred_dict): self.logger = logs.get_logger(self.__class__.__name__) self.logger.setLevel(storage_setup.get_storage_verbose_level()) self.type = None self.key = None self.secret_key = None self.session_token = None self.region = None self.profile = None if cred_dict is None: return if isinstance(cred_dict, str) and cred_dict == 'none': return if not isinstance(cred_dict, dict): msg: str =\ "NOT SUPPORTED credentials format {0}".format(repr(cred_dict)) util.report_fatal(msg, self.logger) if len(cred_dict) == 0: # Empty credentials dictionary is like None: return if len(cred_dict) == 1 and AWS_TYPE in cred_dict.keys(): aws_creds = cred_dict[AWS_TYPE] self.type = AWS_TYPE self.key = aws_creds.get(AWS_KEY, None) self.secret_key = aws_creds.get(AWS_SECRET_KEY, None) self.session_token = aws_creds.get(AWS_SESSION_TOKEN, None) self.region = self._get_named(AWS_REGION, aws_creds) self.profile = self._get_named(AWS_PROFILE, aws_creds) if self.key is None or self.secret_key is None: msg: str = \ "INVALID aws credentials format {0}".format(repr(cred_dict)) util.report_fatal(msg, self.logger) else: msg: str =\ "NOT SUPPORTED credentials format {0}".format(repr(cred_dict)) util.report_fatal(msg, self.logger)
def _report_fatal(self, msg: str): report_fatal(msg, self.logger)