def main(): """ Functional testing of minio python library. """ fake = Factory.create() client = Minio('s3.amazonaws.com', os.getenv('ACCESS_KEY'), os.getenv('SECRET_KEY')) _http = urllib3.PoolManager( cert_reqs='CERT_REQUIRED', ca_certs=certifi.where() ) # Get unique bucket_name, object_name. bucket_name = uuid.uuid4().__str__() object_name = uuid.uuid4().__str__() # Enable trace # client.trace_on(sys.stderr) # Make a new bucket. bucket_name = 'minio-pytest' print(client.make_bucket(bucket_name)) print(client.make_bucket(bucket_name+'.unique', location='us-west-1')) ## Check if return codes a valid from server. try: client.make_bucket(bucket_name+'.unique', location='us-west-1') except ResponseError as err: if str(err.code) in ['BucketAlreadyOwnedByYou', 'BucketAlreadyExists']: pass else: raise # Check if bucket was created properly. print(client.bucket_exists(bucket_name)) print(client.bucket_exists(bucket_name+'.unique')) # List all buckets. buckets = client.list_buckets() for bucket in buckets: print(bucket.name, bucket.creation_date) with open('testfile', 'wb') as file_data: file_data.write(fake.text().encode('utf-8')) file_data.close() # Put a file file_stat = os.stat('testfile') with open('testfile', 'rb') as file_data: client.put_object(bucket_name, object_name, file_data, file_stat.st_size) file_data.close() # Fput a file print(client.fput_object(bucket_name, object_name+'-f', 'testfile')) # Fetch stats on your object. print(client.stat_object(bucket_name, object_name)) # Get a full object object_data = client.get_object(bucket_name, object_name) with open('newfile', 'wb') as file_data: for data in object_data: file_data.write(data) file_data.close() # Get a full object locally. print(client.fget_object(bucket_name, object_name, 'newfile-f')) # List all object paths in bucket. objects = client.list_objects(bucket_name, recursive=True) for obj in objects: print(obj.bucket_name, obj.object_name, obj.last_modified, \ obj.etag, obj.size, obj.content_type) presigned_get_object_url = client.presigned_get_object(bucket_name, object_name) response = _http.urlopen('GET', presigned_get_object_url) if response.status != 200: response_error = ResponseError(response) raise response_error.get(bucket_name, object_name) presigned_put_object_url = client.presigned_put_object(bucket_name, object_name) value = fake.text().encode('utf-8') data = io.BytesIO(value).getvalue() response = _http.urlopen('PUT', presigned_put_object_url, body=data) if response.status != 200: response_error = ResponseError(response) raise response_error.put(bucket_name, object_name) object_data = client.get_object(bucket_name, object_name) if object_data.read() != value: raise ValueError('Bytes not equal') # Post policy. policy = PostPolicy() policy.set_bucket_name(bucket_name) policy.set_key_startswith('objectPrefix/') expires_date = datetime.utcnow()+timedelta(days=10) policy.set_expires(expires_date) print(client.presigned_post_policy(policy)) # Remove an object. print(client.remove_object(bucket_name, object_name)) print(client.remove_object(bucket_name, object_name+'-f')) # Remove a bucket. This operation will only work if your bucket is empty. print(client.remove_bucket(bucket_name)) print(client.remove_bucket(bucket_name+'.unique')) # Remove temporary files. os.remove('testfile') os.remove('newfile') os.remove('newfile-f')
class MinIO(object): ''' Class for work with Minio ''' def __init__ (self, config, pathTmp, verbose): """ Initialising object Parameters ---------- config : dict config of module pathTmp : str path to temporary files verbose : bool verbose output """ self.verbose = verbose self.config = config self.pathTmp = pathTmp self.moduleName = self.config['NAME'] self.handle = None self.host = 'localhost' if 'S3_HOST' in self.config: self.host = self.config['S3_HOST'] self.port = '9000' if 'S3_PORT' in self.config: self.port = self.config['S3_PORT'] self.url = "%s:%s" % (self.host, self.port) self.access_key = '' if 'S3_ACCESS_KEY' in self.config: self.access_key = self.config['S3_ACCESS_KEY'] self.secret_key = '' if 'S3_SECRET_KEY' in self.config: self.secret_key = self.config['S3_SECRET_KEY'] def reconnect(self): self.close() timeout = 15 stop_time = 1 elapsed_time = 0 str_err = '' while (self.handle is None) and elapsed_time < timeout: time.sleep(stop_time) elapsed_time += stop_time try: self.handle = Minio(self.url, access_key=self.access_key, secret_key=self.secret_key, secure=False) except Exception as e: if self.verbose: print("DBG: WAIT: %d: Connect to Minio '%s':%s" % (elapsed_time, self.url, str(e))) str_err = str(e) if self.handle is None: print("FATAL: Connect to Minio '%s': %s" % (self.url, str_err)) return None if self.verbose: print("DBG: Connected to Minio '%s'" % (self.url)) return self.handle def close(self): if not self.handle is None: self.handle.close() self.handle = None def init(self): if not 'INIT_MINIO_CREATE_FOLDERS' in self.config: return if self.reconnect() is None: return folders = self.config['INIT_MINIO_CREATE_FOLDERS'] af = folders.split(';') for folder in af: f = folder.split(':') if len(f) == 2: self.mkDir(f[0], f[1]) def getBasketsList(self): bs = self.handle.list_buckets() res = [] for b in bs: res.append(b.name) res.sort() return res def uploadFile(self, bucketName, filename, fullPath): try: found = self.handle.bucket_exists(bucketName) if not found: self.handle.make_bucket(bucketName) self.handle.fput_object(bucketName, filename, fullPath) except Exception as e: print("FATAL: uploadFile to Minio(%s): %s" % (self.url, str(e))) return False return True def mkDir(self, bucketName, fullPath): try: found = self.handle.bucket_exists(bucketName) if not found: self.handle.make_bucket(bucketName) self.handle.put_object(bucketName, fullPath, io.BytesIO(b''), 0, content_type='application/x-directory') if self.verbose: print("DBG: Make folder '%s:%s' into Minio(%s)" % (bucketName, fullPath, self.url)) except Exception as e: print("FATAL: Make folder '%s' into Minio(%s): %s" % (fullPath, self.url, str(e))) return False return True def getListObjects(self, bucketName, currentDir=''): res = [] objects = [] try: found = self.handle.bucket_exists(bucketName) if not found: self.handle.make_bucket(bucketName) objects = self.handle.list_objects(bucketName, prefix=currentDir, recursive=True) except Exception as e: print("FATAL: List folder '%s' into Minio(%s): %s" % (currentDir, self.url, str(e))) for obj in objects: res.append(obj.object_name) return res def downloadFile(self, bucketName, filename, fullPath): try: self.handle.fget_object(bucketName, filename, fullPath) except Exception as e: print("FATAL: downloadFile to Minio(%s): %s" % (self.url, str(e))) return False return True def compareFiles(self, bucketName, filename, filePath): result = False if not os.path.exists(filePath): return result try: pathTmp = os.path.join(self.pathTmp, bucketName) os.makedirs(pathTmp, exist_ok=True) file1 = os.path.join(pathTmp, filename) self.handle.fget_object(bucketName, filename, file1) result = filecmp.cmp(file1, filePath, shallow=False) os.remove(file1) except Exception as e: print("FATAL: compareFiles Minio(%s): %s" % (self.url, str(e))) return False return result
def add(self, model_run_name, args): """Add a model_run to the Modelrun scheduler. Parameters ---------- model_run_name: str Name of the modelrun args: dict Arguments for the command-line interface Exception --------- Exception When the modelrun was already started Notes ----- DAFNI's queuing mechanism starts model runs in separate container. This means that it is possible to run multiple modelruns concurrently. This will not cause conflicts. """ if self._status[model_run_name] != 'running': self._output[model_run_name] = '' self._status[model_run_name] = 'queing' yaml_files = self.get_yamls(model_run_name, args) model_run_id = model_run_name.replace("_", "-") minio_credentials = self.get_dict_from_json(MINIO_CREDENTIALS_FILE) minio_client = Minio(MINIO_IP, access_key=minio_credentials['accessKey'], secret_key=minio_credentials['secretKey'], secure=False) bucket_list = minio_client.list_buckets() for bucket in bucket_list: if bucket.name == model_run_id: for obj in minio_client.list_objects(model_run_id, recursive=True): minio_client.remove_object(model_run_id, obj.object_name) minio_client.remove_bucket(model_run_id) minio_client.make_bucket(model_run_id) for yml in yaml_files: try: local_path = args['directory'] + yml with open(local_path, 'rb') as yml_data: yml_stat = os.stat(local_path) minio_client.put_object(model_run_id, yml[1:], yml_data, yml_stat.st_size) except ResponseError as err: print(err) response = requests.get(URL_JOBS, headers=self.auth_header) response.raise_for_status() for job in response.json(): if job['job']['job_name'] == model_run_id: response = requests.delete(URL_JOBS + "/" + str(job['job']['id']), headers=self.auth_header) response.raise_for_status() response = requests.post(URL_JOBS, json={ "job_name": model_run_id, "model_name": model_run_name, "minio_config_id": model_run_id }, headers=self.auth_header) response.raise_for_status()
class MinioManagement: def __init__(self, access, secret): self.bucket_name = os.environ.get('BUCKET_NAME') try: self.client = Minio( 's3storage-e2e-cloud-storage:9000', #'localhost:9010', access_key=os.environ.get('MINIO_ACCESS_KEY'), secret_key=os.environ.get('MINIO_SECRET_KEY'), secure=False) print("Connected to Minio Server!") except Exception as e: raise e # also creates a new one if there isn't one at the moment when an object is put in def switch_active_bucket(self, bucket_name): self.bucket_name = bucket_name # user_id=bucket_name=user_name(?), file_id=file_name->stored in MonogDB # The uuid is the beginning of the filename uuid/file def put_object(self, uuid, file_name, blob, size_of_data, con_type): if not self.client.bucket_exists(self.bucket_name): try: self.client.make_bucket(self.bucket_name) except ResponseError as identifier: raise try: con_filename = str(uuid) + '/' + str(file_name) self.client.put_object( bucket_name=self.bucket_name, object_name=con_filename, data=blob, length=size_of_data, content_type=con_type, ) except ResponseError as identifier: raise # Generates a string list and returns it, no given uuid => get whole bucket # Generates a list of filenames, used to delete multiple files def generate_object_list(self, uuid=None): if self.client.bucket_exists(self.bucket_name): try: if uuid is None: objects = self.client.list_objects(self.bucket_name, recursive=True) else: # p1:bucketname,p2:prefix,p3:recursive?,p4:includeversion objects = self.client.list_objects(self.bucket_name, prefix=uuid, recursive=True) object_list = [x.object_name for x in objects] return object_list except ResponseError as identifier: raise # Generates a json list with all elements and returns it, no given uuid => get whole bucket def generate_object_list_json(self, uuid=None): if self.client.bucket_exists(self.bucket_name): try: if uuid is None: objects = self.client.list_objects(self.bucket_name, recursive=True) else: # p1:bucketname,p2:prefix,p3:recursive?,p4:includeversion objects = self.client.list_objects(self.bucket_name, prefix=uuid, recursive=True) jsondata = [] for x in objects: response = self.client.get_object( self.bucket_name, x.object_name) ### --- ### jsondata.append({ 'id': uuid, # to be filled in views from jwt 'filename': str(x.object_name)[str(x.object_name).index('/') + 1:], 'contentType': str(x.content_type), 'size': int(x.size), 'lastModifiedDate': str(x.last_modified), 'blob': response.data.decode() ### --- ### }) return jsondata except ResponseError as identifier: raise finally: response.close() response.release_conn() # Get file returns an object in the form of an httpResponse def get_file(self, uuid, file_name): try: path = uuid + "/" + file_name response = self.client.get_object(self.bucket_name, path) object = self.client.list_objects(self.bucket_name, prefix=path, recursive=False) #x = object[0] for x in object: jsondata = { 'id': uuid, 'filename': str(x.object_name)[str(x.object_name).index('/') + 1:], 'contentType': str(x.content_type), 'size': int(x.size), 'lastModifiedDate': str(x.last_modified), 'blob': response.data.decode() } return jsondata except ResponseError as identifier: raise finally: response.close() response.release_conn() def remove_file(self, uuid, object_name): if self.client.bucket_exists(self.bucket_name): try: path = uuid + "/" + object_name self.client.remove_object(self.bucket_name, path) except ResponseError as identifier: raise # Removes all objects given a list of strings and a bucket def remove_files(self, object_list): if self.client.bucket_exists(self.bucket_name): try: for del_err in self.client.remove_objects( self.bucket_name, object_list): print("Deletion Error: {}".format(del_err)) except ResponseError as identifier: raise def remove_empty_bucket(self, bucket_name): if self.client.bucket_exists(bucket_name): try: self.client.remove_bucket(bucket_name) except ResponseError as identifier: raise # Empties bucket and deletes it !!Erases whole database!! def purge_bucket(self, bucket_name): if self.client.bucket_exists(bucket_name): try: self.remove_files(bucket_name, self.generate_object_list(bucket_name)) self.remove_empty_bucket(bucket_name) except ResponseError as identifier: raise def purge_user(self, uuid): uuid_files_list = self.generate_object_list(uuid) self.remove_files(uuid_files_list) pass
# my-objectname are dummy values, please replace them with original values. import os from minio import Minio from minio.error import ResponseError client = Minio('s3.amazonaws.com', access_key='YOUR-ACCESSKEYID', secret_key='YOUR-SECRETACCESSKEY') # Put a file with default content-type. try: with open('my-testfile', 'rb') as file_data: file_stat = os.stat('my-testfile') client.put_object('my-bucketname', 'my-objectname', file_data, file_stat.st_size) except ResponseError as err: print(err) # Put a file with 'application/csv' try: with open('my-testfile.csv', 'rb') as file_data: file_stat = os.stat('my-testfile.csv') client.put_object('my-bucketname', 'my-objectname', file_data, file_stat.st_size, content_type='application/csv') except ResponseError as err: print(err)
"signature_name": "serving_default", "instances": test_images[0].tolist() }) print("here's a test to send...") print(data) pickle.dump(train_images, open("/train_images", "wb"), pickle.HIGHEST_PROTOCOL) pickle.dump(test_images, open("/test_images", "wb"), pickle.HIGHEST_PROTOCOL) try: with open('/train_images', 'rb') as file_data: file_stat = os.stat('/train_images') print( minioClient.put_object('fashionmnist', 'normalizedtrainimages', file_data, file_stat.st_size)) except ResponseError as err: print(err) print('Stored normalized training images in S3') try: with open('/test_images', 'rb') as file_data: file_stat = os.stat('/test_images') print( minioClient.put_object('fashionmnist', 'normalizedtestimages', file_data, file_stat.st_size)) except ResponseError as err: print(err) print('Stored normalized test images in S3')
class S3DataStore(CheckpointDataStore): """ An implementation of the data store using S3 for storing policy checkpoints when using Coach in distributed mode. The policy checkpoints are written by the trainer and read by the rollout worker. """ def __init__(self, params: S3DataStoreParameters): """ :param params: The parameters required to use the S3 data store. """ super(S3DataStore, self).__init__(params) self.params = params access_key = None secret_key = None if params.creds_file: config = ConfigParser() config.read(params.creds_file) try: access_key = config.get('default', 'aws_access_key_id') secret_key = config.get('default', 'aws_secret_access_key') except Error as e: print("Error when reading S3 credentials file: %s", e) else: access_key = os.environ.get('ACCESS_KEY_ID') secret_key = os.environ.get('SECRET_ACCESS_KEY') self.mc = Minio(self.params.end_point, access_key=access_key, secret_key=secret_key) def deploy(self) -> bool: return True def get_info(self): return "s3://{}/{}".format(self.params.bucket_name) def undeploy(self) -> bool: return True def save_to_store(self): self._save_to_store(self.params.checkpoint_dir) def _save_to_store(self, checkpoint_dir): """ save_to_store() uploads the policy checkpoint, gifs and videos to the S3 data store. It reads the checkpoint state files and uploads only the latest checkpoint files to S3. It is used by the trainer in Coach when used in the distributed mode. """ try: # remove lock file if it exists self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value) # Acquire lock self.mc.put_object(self.params.bucket_name, SyncFiles.LOCKFILE.value, io.BytesIO(b''), 0) state_file = CheckpointStateFile(os.path.abspath(checkpoint_dir)) if state_file.exists(): ckpt_state = state_file.read() checkpoint_file = None for root, dirs, files in os.walk(checkpoint_dir): for filename in files: if filename == CheckpointStateFile.checkpoint_state_filename: checkpoint_file = (root, filename) continue if filename.startswith(ckpt_state.name): abs_name = os.path.abspath( os.path.join(root, filename)) rel_name = os.path.relpath(abs_name, checkpoint_dir) self.mc.fput_object(self.params.bucket_name, rel_name, abs_name) abs_name = os.path.abspath( os.path.join(checkpoint_file[0], checkpoint_file[1])) rel_name = os.path.relpath(abs_name, checkpoint_dir) self.mc.fput_object(self.params.bucket_name, rel_name, abs_name) # upload Finished if present if os.path.exists( os.path.join(checkpoint_dir, SyncFiles.FINISHED.value)): self.mc.put_object(self.params.bucket_name, SyncFiles.FINISHED.value, io.BytesIO(b''), 0) # upload Ready if present if os.path.exists( os.path.join(checkpoint_dir, SyncFiles.TRAINER_READY.value)): self.mc.put_object(self.params.bucket_name, SyncFiles.TRAINER_READY.value, io.BytesIO(b''), 0) # release lock self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value) if self.params.expt_dir and os.path.exists(self.params.expt_dir): for filename in os.listdir(self.params.expt_dir): if filename.endswith((".csv", ".json")): self.mc.fput_object( self.params.bucket_name, filename, os.path.join(self.params.expt_dir, filename)) if self.params.expt_dir and os.path.exists( os.path.join(self.params.expt_dir, 'videos')): for filename in os.listdir( os.path.join(self.params.expt_dir, 'videos')): self.mc.fput_object( self.params.bucket_name, filename, os.path.join(self.params.expt_dir, 'videos', filename)) if self.params.expt_dir and os.path.exists( os.path.join(self.params.expt_dir, 'gifs')): for filename in os.listdir( os.path.join(self.params.expt_dir, 'gifs')): self.mc.fput_object( self.params.bucket_name, filename, os.path.join(self.params.expt_dir, 'gifs', filename)) except S3Error as e: print("Got exception: %s\n while saving to S3", e) def load_from_store(self): """ load_from_store() downloads a new checkpoint from the S3 data store when it is not available locally. It is used by the rollout workers when using Coach in distributed mode. """ try: state_file = CheckpointStateFile( os.path.abspath(self.params.checkpoint_dir)) # wait until lock is removed while True: objects = self.mc.list_objects_v2(self.params.bucket_name, SyncFiles.LOCKFILE.value) if next(objects, None) is None: try: # fetch checkpoint state file from S3 self.mc.fget_object(self.params.bucket_name, state_file.filename, state_file.path) except Exception as e: continue break time.sleep(10) # Check if there's a finished file objects = self.mc.list_objects_v2(self.params.bucket_name, SyncFiles.FINISHED.value) if next(objects, None) is not None: try: self.mc.fget_object( self.params.bucket_name, SyncFiles.FINISHED.value, os.path.abspath( os.path.join(self.params.checkpoint_dir, SyncFiles.FINISHED.value))) except Exception as e: pass # Check if there's a ready file objects = self.mc.list_objects_v2(self.params.bucket_name, SyncFiles.TRAINER_READY.value) if next(objects, None) is not None: try: self.mc.fget_object( self.params.bucket_name, SyncFiles.TRAINER_READY.value, os.path.abspath( os.path.join(self.params.checkpoint_dir, SyncFiles.TRAINER_READY.value))) except Exception as e: pass checkpoint_state = state_file.read() if checkpoint_state is not None: objects = self.mc.list_objects_v2(self.params.bucket_name, prefix=checkpoint_state.name, recursive=True) for obj in objects: filename = os.path.abspath( os.path.join(self.params.checkpoint_dir, obj.object_name)) if not os.path.exists(filename): self.mc.fget_object(obj.bucket_name, obj.object_name, filename) except S3Error as e: print("Got exception: %s\n while loading from S3", e) def setup_checkpoint_dir(self, crd=None): if crd: self._save_to_store(crd)
# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os from minio import Minio __author__ = 'minio' client = Minio('https://s3.amazonaws.com', access_key='YOUR-ACCESSKEYID', secret_key='YOUR-SECRETACCESSKEY') # Put a new object client.put_object('my-bucket', 'my_key', 11, 'hello world', content_type='text/plain') # Put a file file_stat = os.stat('file.dat') with open('file.dat', 'rb') as file_data: client.put_object('my-bucket', 'my_key', file_stat.st_size, file_data)
def test_length_is_string(self): client = Minio('localhost:9000') client.put_object('hello', 1234, '1', iter([1, 2, 3]))
class CookieUpdater: def __init__(self): with open('/home/credentials.json', 'r') as file: self.credentials = json.load(file) self.bucket = self.credentials['bucket'] self.client = Minio(self.credentials['url'], access_key=self.credentials['key_id'], secret_key=self.credentials['key'], region=self.credentials['region']) def update_local(self, folder): objects = self.client.list_objects(self.credentials['bucket'], recursive=True) for obj in objects: print(obj.object_name) try: data = self.client.get_object(self.credentials['bucket'], obj.object_name) with open('{}/{}'.format(folder, obj.object_name), 'wb') as file_data: for d in data.stream(32 * 1024): file_data.write(d) except ResponseError as err: print(err) def upload_file(self, filename): print('file to upload: {}'.format(filename)) new_filename = filename if 'cookies/' in new_filename: new_filename = new_filename.replace('cookies/', '') if '/home/' in new_filename: new_filename = new_filename.replace('/home/', '') file = open(filename, 'rb') print('file: {} filename: {}'.format(new_filename, filename)) length = os.path.getsize(filename) try: buk = self.client.put_object(self.bucket, new_filename, file, length) print('successfully uploaded {} '.format(buk)) except Exception as e: print('error uploading {}:\n{}'.format(filename, e)) def run(self): if os.path.isfile('{}{}'.format(PATH, 'INIT')): self.update_local(PATH) else: changed_files = list() new_cookies = list() for path, subdirs, files in os.walk(PATH): print(path, subdirs, files) for filename in files: filename = '{}{}'.format(PATH, filename) if '.sha256' not in filename: if is_new(filename): new_cookies.append(filename) elif has_changed(filename): print('FILE: {}'.format(filename)) changed_files.append(os.path.join(filename)) else: print('not new neither has changed') print('changed: {}'.format(changed_files)) print('new: {}'.format(new_cookies)) for new_cookie in new_cookies: self.upload_file(new_cookie) for cookie in changed_files: self.upload_file(cookie) make_hashs(PATH) print('\n' + '*' * 20 + '\n')
def test_object_is_string(self): client = Minio('http://localhost:9000') client.put_object('hello', 1234, 1, iter([1, 2, 3]))
# print("**************list buckets******************") # buckets = minio_client2.list_buckets() # for bucket in buckets: # print(bucket.name) print("**************enable version******************") minio_client2.disable_bucket_versioning("zcbucket") print("**************put objects******************") for i in range(10): file_stat = os.stat('hello.txt') with open('hello.txt', 'rb') as data: minio_client2.put_object( 'zcbucket', 'bar' + str(i), data, file_stat.st_size, 'text/plain', ) print("**************fput objects******************") minio_client2.fput_object("zcbucket", "readme.md", "./README.md") print("**************list objects******************") objects = minio_client2.list_objects(bucket_name="zcbucket", include_version=False) for object in objects: print(object.object_name) print(object.version_id) print("***************list objects_v2*****************")
print('\ntrain_images.shape: {}, of {}'.format(train_images.shape, train_images.dtype)) model.fit(train_images, train_labels, epochs=epochs, callbacks=[tensorboard]) print('Training finished') model.save('my_model.h5') print("Saved model to local disk") #--- try: with open('my_model.h5', 'rb') as file_data: file_stat = os.stat('my_model.h5') print(minioClient.put_object('fashionmnist', 'trainedmodel', file_data, file_stat.st_size)) except ResponseError as err: print(err) # This won't work with KF's viewer yet md_file = open("/mlpipeline-ui-metadata.json", "w") md_file.write('{"version": 1,"outputs": [{"type": "tensorboard","source": "/logdir"}]}') md_file.close() print('Wrote tensorboard metadata') text_file = open("/trainedModelName.txt", "w") text_file.write('trainedmodel') text_file.close() print('Stored trained model in S3')
class MinioStore(object): def __init__(self, endpoint=None, access_key=None, secret_key=None, secure=None, bucket_name=None, session_token=None, region=None, http_client=None): try: if endpoint is None: self.endpoint = settings.MINIO_ENDPOINT if access_key is None: self.access_key = settings.MINIO_ACCESS_KEY if secret_key is None: self.secret_key = settings.MINIO_SECRET_KEY if secure is None: self.secure = settings.MINIO_SECURE if bucket_name is None: self.bucket_name = settings.MINIO_BUCKET_NAME self.session_token = session_token self.region = region self.http_client = http_client self.mc = Minio(endpoint=self.endpoint, access_key=self.access_key, secret_key=self.secret_key, session_token=session_token, secure=self.secure, region=region, http_client=http_client) except ResponseError as err: raise err def put_file(self, f, name=None, size=None, content_type=None): if not isinstance(f, File): raise ValueError if name is None: name = f.name if size is None: size = f.size if content_type is None: content_type = f.content_type self.put_object(name, f, size, content_type) def put_chunk_object(self): """ 分块对象上传 """ raise NotImplementedError def put_object(self, object_name, data, length, content_type='application/octet-stream'): try: self.mc.put_object(self.bucket_name, object_name, data, length, content_type) except ResponseError as err: raise err def get_object(self, object_name): req = self.mc.get_object(self.bucket_name, object_name) return req def get_presigned_object(self, object_name, expires=timedelta(days=7)): return self.mc.presigned_get_object(self.bucket_name, object_name, expires) @deprecation.deprecated(details="该方法已过时,使用get_presigned_object方法代替") def presigned_get_object(self, object_name, expires=timedelta(days=7)): return self.get_presigned_object(object_name, expires) def _download(self, object_name, content_type='application/octet-stream', out_file_name=None, is_download=True): re = self.get_object(object_name) if out_file_name is None: out_file_name = object_name response = HttpResponse(re.data) response['Content-Type'] = content_type if is_download: response['Content-Disposition'] = 'attachment;filename="%s"' % ( urlquote(out_file_name)) else: response['Content-Disposition'] = 'filename="%s"' % ( urlquote(out_file_name)) return response def download(self, object_name, out_file_name=None): content_type = 'application/octet-stream' return self._download(object_name, content_type, out_file_name) def open(self, object_name, out_file_name=None): if out_file_name is None: out_file_name = object_name _, _ext = os.path.splitext(out_file_name) content_type = _get_content_type(_ext.lower()) return self._download(object_name, content_type, out_file_name, False)
def test_length_is_not_empty_string(self): client = Minio('localhost:9000') client.put_object('hello', ' \t \n ', -1, iter([1, 2, 3]))
from minio import Minio #from minio.error import ResponseError import os import time import math if __name__ == '__main__': client = Minio('10.10.1.6:9000', access_key='emulab_cmsc352_access', secret_key='emulab_cmsc352_secret', secure=False) granularity = 60000 dataset_size = 60000 object_dir = f"../mnist_objects_{granularity}" object_num = int(math.ceil(dataset_size / granularity)) i = 0 while i < object_num: filename = "object" + str(i) with open(os.path.join(object_dir, filename), 'rb') as file: file_stat = os.stat(object_dir + "/" + filename) client.put_object(f"mnist-pickle-{granularity}", filename, file, file_stat.st_size) i += 1
def main(): """ Functional testing of minio python library. """ fake = Factory.create() client = Minio('https://play.minio.io:9002', 'Q3AM3UQ867SPQQA43P2F', 'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG') # Get unique bucket_name, object_name. bucket_name = uuid.uuid4().__str__() object_name = uuid.uuid4().__str__() # Make a new bucket. print(client.make_bucket(bucket_name)) # Check if bucket was created properly. print(client.bucket_exists(bucket_name)) # Set bucket name to private. print(client.set_bucket_acl(bucket_name, Acl.private())) # Print current bucket acl. print(client.get_bucket_acl(bucket_name)) # List all buckets. buckets = client.list_buckets() for bucket in buckets: print(bucket.name, bucket.creation_date) with open('testfile', 'wb') as file_data: file_data.write(fake.text().encode('utf-8')) file_data.close() # Put a file file_stat = os.stat('testfile') with open('testfile', 'rb') as file_data: client.put_object(bucket_name, object_name, file_data, file_stat.st_size) file_data.close() # Fetch stats on your object. print(client.stat_object(bucket_name, object_name)) # Get a full object data = client.get_object(bucket_name, object_name) with open('newfile', 'wb') as file_data: for d in data: file_data.write(d) file_data.close() # List all object paths in bucket that begin with hello. objects = client.list_objects(bucket_name) for obj in objects: print(obj.bucket_name, obj.object_name, obj.last_modified, \ obj.etag, obj.size, obj.content_type) uploads = client.list_incomplete_uploads(bucket_name, prefix='', recursive=True) for obj in uploads: print(obj.bucket_name, obj.object_name, obj.upload_id) print(client.presigned_get_object(bucket_name, object_name)) print(client.presigned_put_object(bucket_name, object_name)) # Remove an object. print(client.remove_object(bucket_name, object_name)) # Remove a bucket. # This operation will only work if your bucket is empty. print(client.remove_bucket(bucket_name)) # Remove temporary files. os.remove('testfile') os.remove('newfile')
class MINIORepository(Repository): client = None def __init__(self, config): super().__init__() try: access_key = config['storage_access_key'] except Exception: access_key = 'minio' try: secret_key = config['storage_secret_key'] except Exception: secret_key = 'minio123' try: self.bucket = config['storage_bucket'] except Exception: self.bucket = 'fedn-models' try: self.context_bucket = config['context_bucket'] except Exception: self.bucket = 'fedn-context' try: self.secure_mode = bool(config['storage_secure_mode']) except Exception: self.secure_mode = False if not self.secure_mode: print( "\n\n\nWARNING : S3/MINIO RUNNING IN **INSECURE** MODE! THIS IS NOT FOR PRODUCTION!\n\n\n" ) if self.secure_mode: from urllib3.poolmanager import PoolManager manager = PoolManager(num_pools=100, cert_reqs='CERT_NONE', assert_hostname=False) self.client = Minio("{0}:{1}".format(config['storage_hostname'], config['storage_port']), access_key=access_key, secret_key=secret_key, secure=self.secure_mode, http_client=manager) else: self.client = Minio("{0}:{1}".format(config['storage_hostname'], config['storage_port']), access_key=access_key, secret_key=secret_key, secure=self.secure_mode) # TODO: generalize self.context_bucket = 'fedn-context' self.create_bucket(self.context_bucket) self.create_bucket(self.bucket) def create_bucket(self, bucket_name): found = self.client.bucket_exists(bucket_name) if not found: try: response = self.client.make_bucket(bucket_name) except InvalidResponseError as err: raise def set_artifact(self, instance_name, instance, is_file=False, bucket=''): """ Instance must be a byte-like object. """ if bucket == '': bucket = self.bucket if is_file == True: self.client.fput_object(bucket, instance_name, instance) else: try: self.client.put_object(bucket, instance_name, io.BytesIO(instance), len(instance)) except Exception as e: raise Exception("Could not load data into bytes {}".format(e)) return True def get_artifact(self, instance_name, bucket=''): if bucket == '': bucket = self.bucket try: data = self.client.get_object(bucket, instance_name) return data.read() except Exception as e: raise Exception("Could not fetch data from bucket, {}".format(e)) def get_artifact_stream(self, instance_name): try: data = self.client.get_object(self.bucket, instance_name) return data except Exception as e: raise Exception("Could not fetch data from bucket, {}".format(e)) def list_artifacts(self): objects_to_delete = [] try: objs = self.client.list_objects(self.bucket) for obj in objs: print(obj.object_name) objects_to_delete.append(obj.object_name) except Exception as e: raise Exception("Could not list models in bucket {}".format( self.bucket)) return objects_to_delete def delete_artifact(self, instance_name, bucket=[]): if not bucket: bucket = self.bucket try: self.client.remove_object(bucket, instance_name) except ResponseError as err: print(err) print('Could not delete artifact: {}'.format(instance_name)) def delete_objects(self): objects_to_delete = self.list_artifacts() try: # force evaluation of the remove_objects() call by iterating over # the returned value. for del_err in self.client.remove_objects(self.bucket, objects_to_delete): print("Deletion Error: {}".format(del_err)) except ResponseError as err: print(err)
# -*- coding: utf-8 -*- # Minio Python Library for Amazon S3 Compatible Cloud Storage, (C) 2015 Minio, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os from minio import Minio client = Minio('https://s3.amazonaws.com', access_key='YOUR-ACCESSKEYID', secret_key='YOUR-SECRETACCESSKEY') # Put a file file_stat = os.stat('testfile') with open('testfile', 'rb') as file_data: client.put_object('bucketName', 'objectName', file_data, file_stat.st_size)
from minio import Minio from minio.error import ResponseError import os from timeit import default_timer as timer if __name__ == '__main__': client = Minio('10.10.1.2:9000', access_key='BKIKJAA5BMMU2RHO6IBB', secret_key='V7f1CwQqAcwo80UEIJEjc5gVQUSSx5ohQ9GSrr12', secure=False) object_dir = "./mnist_objects" i = 0 while i < 60000: filename = "img" + str(i) with open(os.path.join(object_dir, filename), 'rb') as file: file_stat = os.stat(object_dir + "/" + filename) client.put_object("mnist", filename, file, file_stat.st_size) i += 1
buckets = minioClient.list_buckets() for bucket in buckets: print(bucket.name, bucket.creation_date) try: minioClient.make_bucket("mybucket") except ResponseError as err: print(err) buckets = minioClient.list_buckets() for bucket in buckets: print(bucket.name, bucket.creation_date) # Put a file with default content-type, upon success prints the etag identifier computed by server. try: with open('my-testfile', 'rb') as file_data: file_stat = os.stat('my-testfile') print(minioClient.put_object('mybucket', 'sub1/myobject', file_data, file_stat.st_size)) except ResponseError as err: print(err) # Put a file with 'application/csv'. try: with open('my-testfile.csv', 'rb') as file_data: file_stat = os.stat('my-testfile.csv') minioClient.put_object('mybucket', 'sub2/myobject.csv', file_data, file_stat.st_size, content_type='application/csv') except ResponseError as err: print(err)
] df_with_max["price_diff_per_day"] = ( df_with_max["latest_price"] - df_with_max["oldest_price"] ) / ((df_with_max["download_date_max"] - df_with_max["download_date_min"]).dt.days) df_with_max[["price_diff_per_day"]] = df_with_max[["price_diff_per_day"]].apply( pd.to_numeric ) biggest_increase = df_with_max.nlargest(5, "price_diff_per_day") biggest_decrease = df_with_max.nsmallest(5, "price_diff_per_day") # We found the top 5, write back the results. biggest_increase_json = biggest_increase.to_json(orient="records") print(f"Biggest increases: {biggest_increase_json}") biggest_increase_bytes = biggest_increase_json.encode("utf-8") client.put_object( bucket_name="inside-airbnb", object_name="results/biggest_increase.json", data=io.BytesIO(biggest_increase_bytes), length=len(biggest_increase_bytes), ) biggest_decrease_json = biggest_decrease.to_json(orient="records") print(f"Biggest decreases: {biggest_decrease_json}") biggest_decrease_bytes = biggest_decrease_json.encode("utf-8") client.put_object( bucket_name="inside-airbnb", object_name="results/biggest_decrease.json", data=io.BytesIO(biggest_decrease_bytes), length=len(biggest_decrease_bytes), )
def updateImage(self, image, imagePath, logger): logger.debug("Start sending to minio server") minioClient = Minio( '192.168.0.162:9000', access_key='FM9GO6CT17O8122165HB', secret_key='yLyai1DFC03hzN17srK0PvYTIZFvHDnDxRKYAjK4', secure=False ) imageName = os.path.splitext(image.image_name)[0] + '.dzi' image_oid = image.image_oid bucketName = image_oid.bucket_name user = image.user is_private = image.is_private pub_date = image.pub_date processed = True dataDirPath = os.path.join( os.path.split(imagePath)[0], os.path.splitext( os.path.split(imagePath)[1])[0]+"_files" ) try: with open(imagePath, 'rb') as file_data: file_stat = os.stat(imagePath) minioClient.put_object( bucketName, imageName, file_data, file_stat.st_size, content_type='application/dzi' ) file_data.close() logger.info( "Starting upload \ recursively to minio server, starting from " + dataDirPath) self.uploadRecursively( minioClient, dataDirPath, logger, os.path.split(dataDirPath)[0], bucketName, os.path.split(dataDirPath)[1], os.path.splitext(imageName)[0] ) logger.info("Successfully sent to minio server") logger.info("Copying files to frontend/public/") try: dir_util.copy_tree( os.path.split(dataDirPath)[0], '/code/frontend_app/deepzoom/' + bucketName) logger.info("[DeepZeeomWrapper] Successfully copied files") except Exception as e: logger.error("Error occured copying files: ") logger.error("" + str(e)) logger.info("Deleting temporary files") shutil.rmtree(os.path.split(dataDirPath)[0]) shutil.rmtree('/code/imageuploader/image_tmp/source/' + os.path.splitext(imageName)[0] ) logger.info("Successfully \ deleted temporary files") logger.info("Start update db") try: if (oid.objects.all().filter(bucket_name=bucketName) .filter(object_name=imageName).exists()): raise Exception else: m_oid = oid( url='192.168.0.162:9000', bucket_name=bucketName, object_name=imageName ) m_oid.save() m_user = Users.objects.all().filter(name=user).get() m_image = Image( image_name=imageName, image_oid=m_oid, preview_url="", user=m_user, is_private=is_private, pub_date=pub_date, processed=processed ) m_image.save() logger.debug("\ Deleting original image from db") m_image2del = Image.objects.all() \ .filter(image_oid__bucket_name=bucketName) \ .filter(image_name=image.image_name).get() m_image2del.delete() logger.debug("\ Deleting original image from minio") minioClient.remove_object(bucketName, image.image_name) logger.info("\ Successfully deleted unprocessed image") except Exception as e: logger.error("[Exception] at DeepZoomWrapper:183 " + e) logger.error("Object exists?") logger.info("Succesfully updated db") except ResponseError as err: logger.error("[ResponseError] at DeepZoomWrapper:190 " + err) return
from minio import Minio minio_access_key = 'Q3AM3UQ867SPQQA43P2F' minio_secret_key = 'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG' STORAGE_ENDPOINT = 'play.minio.io:9000' # Add your bucket name here STORAGE_BUCKET = 'testbucket' content = BytesIO(b'Hello again') minio = Minio(STORAGE_ENDPOINT, access_key=minio_access_key, secret_key=minio_secret_key, secure=True) # Make bucket. Skip this if bucket already exists on the server minio.make_bucket(STORAGE_BUCKET) # Put object with custom metadata minio.put_object(STORAGE_BUCKET, 'test_obj', content, content.getbuffer().nbytes, metadata={'x-amz-meta-testdata': 'testdata'}) # Head object with metadata obj = minio.stat_object(STORAGE_BUCKET, 'test_obj') print(obj.metadata)
class Video: def __init__(self, MINIO_ADDR, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, MINIO_1ST_BUCKET): self.minio_client = Minio(MINIO_ADDR, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) self.minio_1st_bucket = MINIO_1ST_BUCKET self.max_num = MINIO_BUCKET_NUM self.check_before_use() def check_before_use(self): for i in range(self.max_num): current = str(hex(self.max_num - i))[2:] if not self.minio_client.bucket_exists("{}-{}".format( self.minio_1st_bucket, current)): try: self.minio_client.make_bucket("{}-{}".format( self.minio_1st_bucket, current)) except ResponseError as err: return err if not self.minio_client.bucket_exists(self.minio_1st_bucket): try: self.minio_client.make_bucket(self.minio_1st_bucket) except ResponseError as err: return err def choose_bucket(self, id): for i in range(self.max_num): current = str(hex(self.max_num - i))[2:] if id.startswith(current): return "{}-{}".format(self.minio_1st_bucket, current) return self.minio_1st_bucket def get_object_data(self, object_name, bucket): try: data = self.minio_client.presigned_get_object(bucket, object_name) return data except Exception as err: return err def all_videos_with_time(self, reverse=False, count=5, page=0): res, c = [], -1 buckets = self.minio_client.list_buckets() for bucket in buckets: objects = self.minio_client.list_objects_v2(bucket.name) for obj in objects: c += 1 if c < count * (page + 1): res.append({ "name": obj.object_name, "bucket": bucket.name }) if len(res) == count: return self.object_datas(res) return self.object_datas(res) def object_datas(self, objects): for x in objects: x['data'] = self.get_object_data(x['name'], x['bucket']) return objects def upload_video(self, ufile, id, bucket=MINIO_1ST_BUCKET): with open(ufile, 'rb') as file_data: file_stat = os.stat(ufile) self.minio_client.put_object(bucket, id, file_data, file_stat.st_size) total_images_add() def delete_video(self, name): bucket = self.choose_bucket(name) res = [] objects = self.minio_client.list_objects_v2(bucket, prefix=name) for i in objects: self.minio_client.remove_object(bucket, i.object_name) total_images_reduce() res.append(i.object_name) return res def videos_by_prefix(self, video_meta=[], bucket=MINIO_1ST_BUCKET): res = [] all_videos = [] for i in video_meta: bucket = self.choose_bucket(i[0]) objects = self.minio_client.list_objects_v2(bucket, prefix=i[0]) distance = i[1] for obj in objects: if obj.object_name not in all_videos: res.append({ "name": obj.object_name, "last_modified": obj.last_modified.timestamp(), "data": self.get_object_data(obj.object_name, bucket), "bucket": bucket, "distance": distance }) all_videos.append(obj.object_name) return res
# Modified By: Jaseem Jas ([email protected]) # ----- # Copyright 2016 - 2019 Socialanimal.com ### from minio import Minio from minio.error import ResponseError from io import BytesIO host = "localhost:9000" access_key = "jaseem" secret_key = "iamminio" minioClient = Minio(host, access_key=access_key, secret_key=secret_key, secure=False) text = "My minio content" bucket = "text" content = BytesIO(bytes(text, 'utf-8')) key = 'sample.text' size = content.getbuffer().nbytes try: minioClient.put_object(bucket, key, content, size) print("Done!") except ResponseError as err: print("error:", err)
df = pd.Series(sorted(words)) # df = np.array(sorted(words)) # f: file f = '/tmp/normalized.pkl' df.to_pickle(f) # fd: file descriptor fd = os.stat(f) try: with open(f, 'rb') as data: minioClient.put_object('candig', 'normalized.pkl', data, fd.st_size, content_type='application/pickle') except ResponseError as err: print(err) # step 4: create dictionary of words and frequencies wordcount = Counter(df) # quick statistic summary of your data # print(wordcount.describe()) # step 5: output results # print(wordcount) # print(wordcount.index)
class MinioAdapter(BaseStorageAdapter): def __init__(self, endpoint=None, access_key=None, secret_key=None, secure=False, *args, **kwargs): # Initialize minioClient with an endpoint and access/secret keys. super().__init__(*args, **kwargs) try: self._client = Minio(endpoint=endpoint, access_key=access_key, secret_key=secret_key, secure=secure) except KeyError as err: logger.error(err) raise Exception("Please enter proper HOSTNAME,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY in .env ") def bucket_exists(self, bucket_name): if self._client.bucket_exists(bucket_name): return True return False def create_bucket(self, bucket_name): try: if not self._client.bucket_exists(bucket_name): self._client.make_bucket(bucket_name) except ResponseError as err: raise ResponseError def remove_bucket(self, bucket_name): try: if self._client.bucket_exists(bucket_name): self._client.remove_bucket(bucket_name) except Exception as e: raise e def remove_file(self, bucket_name, file_name): try: if self._client.bucket_exists(self._client): if (self._client.get_object(self._client, file_name)): self._client.remove_object(bucket_name, file_name) except Exception as e: raise e def get_bucket_list(self): bucket_list = self._client.list_buckets() return bucket_list def get_all_files(self, bucket_name): pass def upload_file(self, bucket_name, file_name, file_path): if (not self._client.bucket_exists(bucket_name)): self._client.make_bucket(bucket_name=bucket_name) try: self._client.fput_object(bucket_name=bucket_name, object_name=file_name, file_path=file_path) self.logger.info(f"Uploaded file {file_name}") except ResponseError as e: self.logger.error(e) raise Exception(e) def upload_data_stream(self, bucket_name, file_name, data_stream, length): if (not self._client.bucket_exists(bucket_name)): self._client.make_bucket(bucket_name=bucket_name) try: self._client.put_object(bucket_name=bucket_name, object_name=file_name, data=data_stream, length=length) except ResponseError as err: self.logger.error(err) raise err def download_all_files(self, bucket_name, download_path): try: if (self._client.bucket_exists(bucket_name)): obj_list = self._client.list_objects(bucket_name) for obj in obj_list: self._client.fget_object(bucket_name=bucket_name, object_name=obj, file_path=download_path) except Exception as e: self.logger.error(e) raise e def download_n_files(self, bucket_name, download_path, num_of_files): try: count = 0 for file in self._client.list_objects(bucket_name): self._client.fget_object(bucket_name=bucket_name, object_name=file, file_path=download_path) count = count + 1 if count == num_of_files: break except ResponseError as e: self.logger.error(e) raise e def count_files(self, bucket_name): list = self._client.list_objects(bucket_name) return len(list) def get_policy(self, bucket_name): policy = self._client.get_bucket_policy(bucket_name) return policy def set_policy(self, bucket_name, policy): self._client.set_bucket_policy(bucket_name, policy)
from minio import Minio from minio.error import ResponseError import os from timeit import default_timer as timer if __name__ == '__main__': client = Minio('10.10.1.2:9000', access_key='BKIKJAA5BMMU2RHO6IBB', secret_key='V7f1CwQqAcwo80UEIJEjc5gVQUSSx5ohQ9GSrr12', secure=False) object_dir = "./cifar10_objects" i = 0 while i < 50000: filename = "img" + str(i) with open(os.path.join(object_dir, filename), 'rb') as file: file_stat = os.stat(object_dir + "/" + filename) client.put_object("cifar", filename, file, file_stat.st_size) i += 1
def test_length_is_string(self): client = Minio('localhost:9000') client.put_object('hello', 1234, '1', iter([1, 2, 3]))
from minio import Minio from minio.error import ResponseError import os client = Minio('localhost', access_key='minioadmin', secret_key='minioadmin', secure=True) for root, dirs, files in os.walk("."): for file in files: print(root) try: with open(os.path.join(root, file), 'rb') as file_data: file_stat = os.stat(os.path.join(root, file)) print( client.put_object('bucketname', os.path.join(root[2:], file), file_data, file_stat.st_size)) except ResponseError as err: print(err)
class S3DataStore(DataStore): def __init__(self, params: S3DataStoreParameters): self.params = params access_key = None secret_key = None if params.creds_file: config = ConfigParser() config.read(params.creds_file) try: access_key = config.get('default', 'aws_access_key_id') secret_key = config.get('default', 'aws_secret_access_key') except Error as e: print("Error when reading S3 credentials file: %s", e) else: access_key = os.environ.get('ACCESS_KEY_ID') secret_key = os.environ.get('SECRET_ACCESS_KEY') self.mc = Minio(self.params.end_point, access_key=access_key, secret_key=secret_key) def deploy(self) -> bool: return True def get_info(self): return "s3://{}/{}".format(self.params.bucket_name) def undeploy(self) -> bool: return True def save_to_store(self): try: self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value) self.mc.put_object(self.params.bucket_name, SyncFiles.LOCKFILE.value, io.BytesIO(b''), 0) checkpoint_file = None for root, dirs, files in os.walk(self.params.checkpoint_dir): for filename in files: if filename == 'checkpoint': checkpoint_file = (root, filename) continue abs_name = os.path.abspath(os.path.join(root, filename)) rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir) self.mc.fput_object(self.params.bucket_name, rel_name, abs_name) abs_name = os.path.abspath( os.path.join(checkpoint_file[0], checkpoint_file[1])) rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir) self.mc.fput_object(self.params.bucket_name, rel_name, abs_name) self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value) except ResponseError as e: print("Got exception: %s\n while saving to S3", e) def load_from_store(self): try: filename = os.path.abspath( os.path.join(self.params.checkpoint_dir, "checkpoint")) while True: objects = self.mc.list_objects_v2(self.params.bucket_name, SyncFiles.LOCKFILE.value) if next(objects, None) is None: try: self.mc.fget_object(self.params.bucket_name, "checkpoint", filename) except Exception as e: continue break time.sleep(10) # Check if there's a finished file objects = self.mc.list_objects_v2(self.params.bucket_name, SyncFiles.FINISHED.value) if next(objects, None) is not None: try: self.mc.fget_object( self.params.bucket_name, SyncFiles.FINISHED.value, os.path.abspath( os.path.join(self.params.checkpoint_dir, SyncFiles.FINISHED.value))) except Exception as e: pass ckpt = CheckpointState() if os.path.exists(filename): contents = open(filename, 'r').read() text_format.Merge(contents, ckpt) rel_path = os.path.relpath(ckpt.model_checkpoint_path, self.params.checkpoint_dir) objects = self.mc.list_objects_v2(self.params.bucket_name, prefix=rel_path, recursive=True) for obj in objects: filename = os.path.abspath( os.path.join(self.params.checkpoint_dir, obj.object_name)) self.mc.fget_object(obj.bucket_name, obj.object_name, filename) except ResponseError as e: print("Got exception: %s\n while loading from S3", e)
def _crunch_numbers(): s3_conn = BaseHook.get_connection("locals3") client = Minio( s3_conn.extra_dejson["host"].replace("http://", ""), access_key=s3_conn.login, secret_key=s3_conn.password, secure=False, ) # Get list of all objects objects = [ obj.object_name for obj in client.list_objects(bucket_name="inside-airbnb", prefix="listing") ] df = pd.DataFrame() for obj in objects: response = client.get_object(bucket_name="inside-airbnb", object_name=obj) temp_df = pd.read_csv( io.BytesIO(response.read()), usecols=["id", "price", "download_date"], parse_dates=["download_date"], ) df = df.append(temp_df) # Per id, get the price increase/decrease # There's probably a nicer way to do this min_max_per_id = (df.groupby(["id"]).agg( download_date_min=("download_date", "min"), download_date_max=("download_date", "max"), ).reset_index()) df_with_min = (pd.merge( min_max_per_id, df, how="left", left_on=["id", "download_date_min"], right_on=["id", "download_date"], ).rename(columns={ "price": "oldest_price" }).drop("download_date", axis=1)) df_with_max = (pd.merge( df_with_min, df, how="left", left_on=["id", "download_date_max"], right_on=["id", "download_date"], ).rename(columns={ "price": "latest_price" }).drop("download_date", axis=1)) df_with_max = df_with_max[ df_with_max["download_date_max"] != df_with_max["download_date_min"]] df_with_max["price_diff_per_day"] = ( df_with_max["latest_price"] - df_with_max["oldest_price"]) / ( (df_with_max["download_date_max"] - df_with_max["download_date_min"]).dt.days) df_with_max[["price_diff_per_day"]] = df_with_max[["price_diff_per_day" ]].apply(pd.to_numeric) biggest_increase = df_with_max.nlargest(5, "price_diff_per_day") biggest_decrease = df_with_max.nsmallest(5, "price_diff_per_day") # We found the top 5, write back the results. biggest_increase_json = biggest_increase.to_json(orient="records") print(f"Biggest increases: {biggest_increase_json}") biggest_increase_bytes = biggest_increase_json.encode("utf-8") client.put_object( bucket_name="inside-airbnb", object_name="results/biggest_increase.json", data=io.BytesIO(biggest_increase_bytes), length=len(biggest_increase_bytes), ) biggest_decrease_json = biggest_decrease.to_json(orient="records") print(f"Biggest decreases: {biggest_decrease_json}") biggest_decrease_bytes = biggest_decrease_json.encode("utf-8") client.put_object( bucket_name="inside-airbnb", object_name="results/biggest_decrease.json", data=io.BytesIO(biggest_decrease_bytes), length=len(biggest_decrease_bytes), )
def job(minio: Minio, bucket): job_id = bucket data = json.dumps({"content": "Lorem ipsum"}).encode() minio.put_object(job_id, INPUT_DIR + "/payload.json", BytesIO(data), len(data)) yield job_id
objname = 'f0' # load tiff d = tf.imread(fname) # connect to minio client = Minio('127.0.0.1:9000', access_key='minioadmin', secret_key='minioadmin', secure=False) # convert object to byte stream f = d[0,:,:] b = f.tobytes() bio = io.BytesIO(b) # upload object try: client.put_object(bucketname, objname, bio, len(b)) except ResponseError as err: print('Error during upload: {}'.format(err)) # download object try: downloaded_data = client.get_object(bucketname, objname) except ResponseError as err: print('Error during download: {}'.format(err)) # compare res = downloaded_data.read() new_f = np.frombuffer(res, dtype=f.dtype).reshape(f.shape) if np.array_equal(f, new_f): print('Arrays are equal!')
def test_length_is_not_empty_string(self): client = Minio('localhost:9000') client.put_object('hello', ' \t \n ', -1, iter([1, 2, 3]))
class MinioDataStorage(DataStorage): def __init__(self, logger, host, port, access_key, secret_key, secure=False): super(MinioDataStorage, self).__init__(logger) self.client = Minio('{}:{}'.format(host, port), secret_key=secret_key, access_key=access_key, secure=secure) def save_request_data(self, request_id, data): bin_data = data.encode() raw = io.BytesIO(bin_data) raw.seek(0) if not self.client.bucket_exists('requests'): self.client.make_bucket('requests') try: self.client.put_object('requests', '{}_data.bin'.format(request_id), raw, len(bin_data)) except Exception: return 1 return 0 def save_request_audit_data(self, request_id, instrument_id, with_enrolment, with_request, data): raw = io.BytesIO(data) raw.seek(0) if not self.client.bucket_exists('requests'): self.client.make_bucket('requests') try: self.client.put_object('requests', '{}_audit.bin'.format(request_id), raw, len(data)) except Exception: return 1 return 0 def save_learner_model(self, tesla_id, instrument_id, model): return 1 def load_request_data(self, request_id): bin_data = self.client.get_object("requests", '{}_data.bin'.format(request_id)) return bin_data.decode() def load_request_audit_data(self, request_id, instrument_id): return self.db.requests.get_request_result_audit( request_id, instrument_id) def load_learner_model(self, tesla_id, instrument_id): return None
# limitations under the License. # Note: YOUR-ACCESSKEYID, YOUR-SECRETACCESSKEY, my-testfile, my-bucketname and # my-objectname are dummy values, please replace them with original values. import os from minio import Minio from minio.error import ResponseError client = Minio('s3.amazonaws.com', access_key='YOUR-ACCESSKEYID', secret_key='YOUR-SECRETACCESSKEY') # Put a file with default content-type. try: file_stat = os.stat('my-testfile') file_data = open('my-testfile', 'rb') client.put_object('my-bucketname', 'my-objectname', file_data, file_stat.st_size) except ResponseError as err: print(err) # Put a file with 'application/csv' try: file_stat = os.stat('my-testfile.csv') file_data = open('my-testfile.csv', 'rb') client.put_object('my-bucketname', 'my-objectname', file_data, file_stat.st_size, content_type='application/csv') except ResponseError as err: print(err)
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Note: YOUR-ACCESSKEYID, YOUR-SECRETACCESSKEY, my-testfile, my-bucketname and # my-objectname are dummy values, please replace them with original values. import os from minio import Minio client = Minio('s3.amazonaws.com', access_key='YOUR-ACCESSKEYID', secret_key='YOUR-SECRETACCESSKEY') # Put a file with default content-type. file_stat = os.stat('my-testfile') file_data = open('my-testfile', 'rb') client.put_object('my-bucketname', 'my-objectname', file_data, file_stat.st_size) # Put a file with 'application/csv' file_stat = os.stat('my-testfile.csv') file_data = open('my-testfile.csv', 'rb') client.put_object('my-bucketname', 'my-objectname', file_data, file_stat.st_size)