def main(): print('Fetching training configuration') train_config = fetch_json(TRAIN_CONFIG) # Train model if train_config['task_type'] == 'classification': print('Starting classification') accuracy, classes, model_path, acc_plot_path, remove_paths = train_classification(train_config) metadata = {'classes': classes} else: print('Starting sentiment analysis') accuracy, model_path, metadata_path, acc_plot_path, remove_paths = train_sa(train_config) metadata = {'metadata_filename': metadata_path} # Deploy model print('Deploying model') setup_inference( train_config['token'], train_config['task_type'], accuracy, model_path, acc_plot_path, metadata, ) # Clear files for remove_path in remove_paths: shutil.rmtree(remove_path) # Delete training config from S3 # This will also shutdown the instance delete_object(TRAIN_CONFIG)
def main(username): print(" In server training ") os.makedirs(os.path.join(DATA_PATH, 'checkpoints')) print("Created /data/checkpoints folders") # # Download user file userdata_filename = os.path.join(DATA_PATH, f'{username}.json') download_file( os.path.join(TRAINING_CONFIG, f'{username}.json'), userdata_filename, ) (task, username, model_name, ratio, is_reducelrscheduler, patience, factor, min_lr, optimizer, batch_size, learning_rate, epochs, dataset_filename) = get_config_data(userdata_filename) # Download dataset download_file( os.path.join(TRAINING_CONFIG, dataset_filename), os.path.join(DATA_PATH, dataset_filename), ) print(" Completed fetching data from s3 ") inference_data = {} if task == 'image': inference_data = train_image_classification( username, model_name, ratio, is_reducelrscheduler, patience, factor, min_lr, optimizer, batch_size, learning_rate, epochs, dataset_filename) elif task == 'text': inference_data = train_sentiment_analysis(username, model_name, ratio, is_reducelrscheduler, patience, factor, min_lr, optimizer, batch_size, learning_rate, epochs, dataset_filename) # Upload data to S3 upload_model_data(task, username) print('Uploaded inference data to s3') # Update inference json inference_config = fetch_json(INFERENCE_CONFIG) inference_config[username] = inference_data inference_config[username]['created'] = datetime.now().strftime( '%d-%m-%y %H:%M') put_object(INFERENCE_CONFIG, inference_config) print("Added user information to inference.json and uploaded to s3") # Delete train data from S3 delete_object(os.path.join(TRAINING_CONFIG, dataset_filename)) delete_object(os.path.join(TRAINING_CONFIG, f'{username}.json')) print("Deleted user data from training folder in s3") # Delete data shutil.rmtree(DATA_PATH) print("Deleted data folder")
def check_can_delete_manifest(s3_client, package_name, registry_name, push_dest): print( "Attempting to delete package manifest pointer and raw manifest directly" ) generate_new_package_version(package_name, registry_name, push_dest) manifest_pointer_s3_key = f".quilt/named_packages/{package_name}/latest" manifest_pointer_s3_bucket = registry_name.lstrip("s3://").rstrip("/") raw_manifest_hash = None fn_info = FunctionReporter( f"Retrieving hash from manifest point s3://{manifest_pointer_s3_bucket}/{manifest_pointer_s3_key}" ) try: raw_manifest_hash = s3.get_object_as_string( s3_client, manifest_pointer_s3_bucket, manifest_pointer_s3_key) fn_info.succeeded(raw_manifest_hash) except Exception as ex: fn_info.failed(ex) fn_info = FunctionReporter( f"Deleting manifest pointer file s3://{manifest_pointer_s3_bucket}/{manifest_pointer_s3_key}" ) try: delete_response = s3.delete_object(s3_client, manifest_pointer_s3_bucket, manifest_pointer_s3_key) fn_info.succeeded(delete_response) deleted_manifest_pointer = True except Exception as ex: fn_info.failed(ex) deleted_manifest_pointer = False raw_manifest_s3_key = f".quilt/packages/{raw_manifest_hash}" fn_info = FunctionReporter( f"Deleting raw manifest file s3://{manifest_pointer_s3_bucket}/{raw_manifest_s3_key}" ) try: delete_response = s3.delete_object(s3_client, manifest_pointer_s3_bucket, raw_manifest_s3_key) fn_info.succeeded(delete_response) deleted_raw_manifest = True except Exception as ex: fn_info.failed(ex) deleted_raw_manifest = False return deleted_manifest_pointer and deleted_raw_manifest
def clean(event, context): try: # Check if model is currently training clean_status_config = fetch_object('cleanup.json') if clean_status_config['status'] != 'active': print('Status inactive') return create_response({ 'result': 'error', 'message': 'cleanup status is inactive.' }) # Fetch inference data infer_config = fetch_object(INFERENCE_CONFIG) # Loop through configs print('Checking configs') safe_objects = {} current_time = datetime.now() for token, infer_vals in infer_config.items(): if token not in WHITELIST_TOKENS: creation_time = datetime.strptime(infer_vals['created'], '%d-%m-%y %H:%M') if (current_time - creation_time).seconds < 7200: # 2 hours safe_objects[token] = infer_vals else: # Delete objects delete_object(infer_vals['model_filename']) if infer_vals['task_type'] == 'sentimentanalysis': delete_object(infer_vals['metadata_filename']) print('Deleted:', token) else: safe_objects[token] = infer_vals # Update inference json update_object(INFERENCE_CONFIG, safe_objects) return create_response({ 'result': 'success', 'message': 'Old objects deleted' }) except Exception as e: print(repr(e)) return create_response({ 'result': 'internal_error', 'message': repr(e), }, status_code=500)
def delete_all_segments(bucket, prefix): segment = 0 while True: object = '%s.%03d' % (prefix, segment) # See if object exists r = s3.data_to_url('HEAD', '/%s/%s' % (bucket, object), None, None) if r.status != 200: break print 'Deleting', bucket, object r = s3.delete_object(bucket, object) segment += 1
def check_normal_creds_can_create_and_delete_random_file(bucket, key): s3_client = s3.get_s3_client(use_quilt3_botocore_session=False) put_fn_info = FunctionReporter(f"Putting test file in s3://{bucket}/{key}") try: put_response = s3.put_object(s3_client, bucket, key) put_fn_info.succeeded(put_response) except Exception as ex: put_fn_info.failed(ex) del_fn_info = FunctionReporter( f"Deleting test file in s3://{bucket}/{key}") try: delete_response = s3.delete_object(s3_client, bucket, key) del_fn_info.succeeded(delete_response) return True except Exception as ex: del_fn_info.failed(ex) return False
def main(package_name, bucket): """ Debugging process: 1. Using normal boto3 credentials, see if there are any restrictions on the bucket/keys in the bucket (legal hold, object lock, etc) 2. Using normal boto3 credentials, confirm that we can delete some object from the bucket 3. Record information about the normal boto3 credentials 4. Reproduce the bug using quilt3 library directly 5. Using normal boto3 credentials, confirm that we can delete the manifest objects (is problem key-specific?). 6. Using s3_client from data_transfer.get_s3_client(), see if we can delete the manifest objects (is problem a bug in delete logic or pure permissions) 7. Using session.get_botocore_session, see if we can delete the manifest objects (is the problem a bug in the data_transfer.get_s3_client logic?) 8. We expect that the above problem will fail indicating that the problem originates in the permissions retrieved from session.get_botocore_session. Record information about the permissions. """ registry_name = f"s3://{bucket}" push_dest_key_prefix = "quilt-tmp" push_dest = f"{registry_name}/{push_dest_key_prefix}" manifest_pointer_s3_key = f"{registry_name}./quilt/named_packages/{package_name}/latest" test_file_s3_key = "quilt-debug-tmp/tmpfile" header("Get bucket info (policy, object lock, etc)") get_info_about_bucket(bucket) header( "Get bucket + key info (legal hold, object retention). (key = 'latest' manifest pointer) " ) generate_new_package_version(package_name, registry_name, push_dest) get_info_about_key(bucket, manifest_pointer_s3_key) header( "Checking if boto3 with default cred provider chain can create and delete a file on s3" ) normal_creds_can_delete = check_normal_creds_can_create_and_delete_random_file( bucket=bucket, key=test_file_s3_key) print("Normal creds can delete test file?", normal_creds_can_delete) header("Getting info about role/user given by default cred provider chain") get_info_about_normal_creds() header("Trying to reproduce failure during delete package") repro(package_name, registry_name, push_dest) header( "Checking if boto3 with default cred provider chain can delete a manifest" ) normal_creds_can_delete = check_can_delete_manifest( s3.get_s3_client(use_quilt3_botocore_session=False), package_name, registry_name, push_dest) print("Normal creds can delete raw manifest and manifest pointer?", normal_creds_can_delete) header( "Checking if s3_client from quilt3.data_transfer.create_s3_client() can delete a manifest" ) quilt3_create_s3_client_can_delete = check_can_delete_manifest( quilt3.data_transfer.create_s3_client(), package_name, registry_name, push_dest) print( "S3 client from quilt3.data_transfer.create_s3_client() can delete raw manifest and manifest pointer?", quilt3_create_s3_client_can_delete) header( "Checking if s3_client from quilt3.session.create_botocore_session() can delete a manifest" ) quilt3_botocore_session_can_delete = check_can_delete_manifest( s3.get_s3_client(use_quilt3_botocore_session=True), package_name, registry_name, push_dest) print( "S3 client from quilt3.session.create_botocore_session() can delete raw manifest and manifest pointer?", quilt3_botocore_session_can_delete) header( "Getting IAM info about credentials provided via quilt3.session.create_botocore_session()" ) get_info_about_quilt3_creds() header("Trying to clean up any leftover files") try: s3_client = s3.get_s3_client(use_quilt3_botocore_session=False) fn_info = FunctionReporter( f"Deleting all keys in s3://{bucket}/{push_dest_key_prefix}") try: s3.empty_keyspace(s3_client, bucket, push_dest_key_prefix) fn_info.succeeded(None) except Exception as ex: fn_info.failed(ex) manifest_pointer_prefix = f".quilt/named_packages/{package_name}/" for manifest_pointer_object in s3.list_keyspace( s3_client, bucket, prefix=manifest_pointer_prefix): manifest_pointer_key = manifest_pointer_object["Key"] raw_manifest_hash = None fn_info = FunctionReporter( f"Trying to get contents of manifest pointer s3://{bucket}/{manifest_pointer_key}" ) try: raw_manifest_hash = s3.get_object_as_string( s3_client, bucket, manifest_pointer_key) fn_info.succeeded(raw_manifest_hash) except Exception as ex: fn_info.failed(ex) fn_info = FunctionReporter( f"Trying to delete manifest_pointer: s3://{bucket}/{manifest_pointer_key}" ) try: delete_response = s3.delete_object(s3_client, bucket, manifest_pointer_key) fn_info.succeeded(delete_response) except Exception as ex: fn_info.failed(ex) if raw_manifest_hash is not None: raw_manifest_key = f".quilt/packages/{raw_manifest_hash}" fn_info = FunctionReporter( f"Trying to delete raw_manifest: s3://{bucket}/{raw_manifest_key}" ) try: delete_response = s3.delete_object(s3_client, bucket, raw_manifest_key) fn_info.succeeded(delete_response) except Exception as ex: fn_info.failed(ex) except Exception as ex: print("Cleanup failed with exception:", ex)