def _check_bucket(self): """ Check bucket name is exist. If not exist, create new bucket If bucket and metadata sub folder exist, get metadata(attributes, compressor) from there. """ _client = (Minio(self.endpoint, access_key=self.access_key, secret_key=self.secret_key, secure=self.secure, region=self.region) if not check_nas(self.endpoint) else NAS(self.endpoint)) if _client.bucket_exists(self.bucket_name): try: _metadata = _client.get_object(self.bucket_name, "metadata.json") except: _client.remove_bucket(self.bucket_name) raise FileNotFoundError( "metadata.json is not in bucket name {}" ", So this bucket will be removed".format( self.bucket_name)) metadata_dict = json.loads(_metadata.read().decode("utf-8")) if self.endpoint != metadata_dict["endpoint"]: raise ValueError( "Already created endpoint({}) doesn't current endpoint str({})" " It may occurs permission denied error".format( metadata_dict["endpoint"], self.endpoint)) self.compressor = metadata_dict["compressor"] self.metadata = metadata_dict else: logger.info("{} {} is not exist!".format(self.optimizer_name, str(self.additional)))
def __init__( self, config, num_worker_threads=4, clear=True, cache_folder_path="~/.matorage", index=False, ): self.config = config self.attribute = self._set_attribute() # Storage configuration self.num_worker_threads = num_worker_threads self.clear = clear self.index = index self._check_bucket() # merge all metadatas and load in memory. self.merged_indexer, self.merged_filetype = self._merge_metadata() self.end_indices = list(self.merged_indexer.keys()) self._clients = {} if not self.index: # cache object which is downloaded. if not check_nas(self.config.endpoint): self._caching(cache_folder_path=cache_folder_path) else: self._object_file_mapper = {} # download all object in /tmp folder self._init_download() atexit.register(self._exit)
def _create_client(self): return (Minio( endpoint=self.config.endpoint, access_key=self.config.access_key, secret_key=self.config.secret_key, secure=self.config.secure, ) if not check_nas(self.config.endpoint) else NAS( self.config.endpoint))
def _create_client(self): return ( Minio( endpoint=self.endpoint, access_key=self.access_key, secret_key=self.secret_key, secure=self.secure, region=self.region, ) if not check_nas(self.endpoint) else NAS(self.endpoint) )
def _exit(self): """ Close all opened files and remove. """ if self.clear and not check_nas(self.config.endpoint): for _local_file in list(self._object_file_mapper.values()): if os.path.exists(_local_file): os.remove(_local_file) if os.path.exists(self.cache_path): os.remove(self.cache_path)
def _init_download(self): """ Download all object from bucket with multi thread. cache to `_object_file_mapper` downloaded object paths. """ _client = self._create_client() _downloader = Downloader( client=_client, bucket=self.config.bucket_name, num_worker_threads=self.num_worker_threads, ) _remote_files = list(self.merged_indexer.values()) + list( self.merged_filetype) for _remote_file in _remote_files: if not check_nas(self.config.endpoint): _local_file = tempfile.mktemp(_remote_file) if _remote_file not in self._object_file_mapper: self._object_file_mapper[_remote_file] = _local_file _downloader.set_queue(local_file=_local_file, remote_file=_remote_file) else: if _remote_file not in self._object_file_mapper: self._object_file_mapper[_remote_file] = os.path.join( self.config.endpoint, self.config.bucket_name, _remote_file) _downloader.join_queue() assert len(self._object_file_mapper) == (len(self.merged_indexer) + len(self.merged_filetype)) if not check_nas(self.config.endpoint) and not os.path.exists( self.cache_path): with open(self.cache_path, "w") as f: json.dump(self._object_file_mapper, f) logger.info("All {} {} datasets are downloaded done.".format( self.config.dataset_name, str(self.config.additional)))
def __init__( self, config, multipart_upload_size=5 * _MB, num_worker_threads=4, inmemory=False, refresh=False, ): self.config = config # Storage configuration self.multipart_upload_size = multipart_upload_size self.num_worker_threads = num_worker_threads # HDF5 configuration self.inmemory = inmemory self.filter = tb.Filters(**config.compressor) self._filelist = [] self._file, self._earray = self._get_newfile() self._disconnected = False self._client = (Minio( endpoint=self.config.endpoint, access_key=self.config.access_key, secret_key=self.config.secret_key, secure=self.config.secure, region=self.config.region, ) if not check_nas(self.config.endpoint) else NAS( self.config.endpoint)) self._check_and_create_bucket(refresh=refresh) self._uploader = Uploader( client=self._client, bucket=self.config.bucket_name, num_worker_threads=self.num_worker_threads, multipart_upload_size=self.multipart_upload_size, inmemory=self.inmemory, ) atexit.register(self._exit)
def _check_bucket(self): """ Check bucket name is exist. If not exist, create new bucket If bucket and metadata sub folder exist, get metadata(attributes, compressor) from there. Returns: :obj: `None`: """ _client = (Minio( self.endpoint, access_key=self.access_key, secret_key=self.secret_key, secure=self.secure, ) if not check_nas(self.endpoint) else NAS(self.endpoint)) if _client.bucket_exists(self.bucket_name): objects = _client.list_objects(self.bucket_name, prefix="metadata/") _metadata = None for obj in objects: _metadata = _client.get_object(self.bucket_name, obj.object_name) break if not _metadata: return metadata_dict = json.loads(_metadata.read().decode("utf-8")) if self.endpoint != metadata_dict["endpoint"]: raise ValueError( "Already created endpoint({}) doesn't current endpoint str({})" " It may occurs permission denied error".format( metadata_dict["endpoint"], self.endpoint)) self.compressor = metadata_dict["compressor"] self.attributes = [ DataAttribute(**item) for item in metadata_dict["attributes"] ] else: logger.warn("{} {} is not exist!".format(self.dataset_name, str(self.additional)))
def __init__(self, config, num_worker_threads=4, multipart_upload_size=5 * _MB): self.config = config self.num_worker_threads = num_worker_threads self.multipart_upload_size = multipart_upload_size self._client = ( Minio( endpoint=self.config.endpoint, access_key=self.config.access_key, secret_key=self.config.secret_key, secure=self.config.secure, ) if not check_nas(self.config.endpoint) else NAS(self.config.endpoint) ) self._uploader = Uploader( client=self._client, bucket=self.config.bucket_name, num_worker_threads=self.num_worker_threads, multipart_upload_size=self.multipart_upload_size, inmemory=True, )