def crds(authenticated_user): get_local_bucket(empty=True) creds = StorageCreds(user=authenticated_user, service_url="storage") drive = Drive(creds=creds, name="test_drive") filepath = os.path.join( os.path.dirname(__file__), "../../../tests/data/proc_test_data/CRDS/bsd.picarro.1minute.248m.dat", ) filemeta = drive.upload(filepath) par = PAR(location=filemeta.location(), user=authenticated_user) hugs = Service(service_url="hugs") par_secret = hugs.encrypt_data(par.secret()) auth = Authorisation(resource="process", user=authenticated_user) args = { "authorisation": auth.to_data(), "par": { "data": par.to_data() }, "par_secret": { "data": par_secret }, "data_type": "CRDS", "source_name": "bsd.picarro.1minute.248m", } hugs.call_function(function="process", args=args)
def __init__(self, user=None, name=None, service=None, account_uid=None, account_name=None, accounting_service=None, accounting_url=None): """Construct the Account with the passed account_name, which is owned by the passed user. The account must already exist on the service, or else an exception will be raised """ if user is not None: self._account_uid = None self._account_name = None if account_uid is not None: self._account_uid = str(account_uid) elif account_name is not None: self._account_name = str(account_name) elif name is not None: self._account_name = str(name) self._user = user if service is not None: from Acquire.Service import Service as _Service s = _Service.resolve(service, fetch=True) accounting_service = s["service"] elif accounting_service is None: if accounting_url is None: accounting_service = user.accounting_service() else: accounting_service = _get_accounting_service( accounting_url) from Acquire.Accounting import AccountingService \ as _AccountingService if not isinstance(accounting_service, _AccountingService): raise TypeError( "You can only create an account on a valid " "AccountingService object") if not accounting_service.is_accounting_service(): raise TypeError( "The passed service - %s - is not an accounting " "service." % str(accounting_service)) self._accounting_service = accounting_service if self._account_uid is None: if self._account_name is None: self._account_name = "main" self._account_uid = _get_account_uid(user, self._account_name, accounting_service) else: self._account_uid = None self._last_update = None self._description = None
def __init__(self, user=None, service=None, storage_service=None, service_url=None, par=None, secret=None): """Create these credentials either from a logged-in user and associated storage service (or URL), or from a valid PAR with associated secret """ self._user = None self._storage_service = None self._par = None self._secret = None if user is not None: from Acquire.Client import User as _User if not isinstance(user, _User): raise TypeError("The user must be type User") if not user.is_logged_in(): raise PermissionError("The user must be logged in!") self._user = user if service is not None: from Acquire.Service import Service as _Service storage_service = _Service.resolve(service, fetch=True)["service"] if storage_service is None: if service_url is None: storage_service = user.storage_service() else: storage_service = _get_storage_service(service_url) self._storage_service = storage_service assert (storage_service is not None) elif par is not None: from Acquire.Client import PAR as _PAR if not isinstance(par, _PAR): raise TypeError("par must be type PAR") if par.expired(): raise PermissionError( "The passed PAR is either invalid or expired!") self._par = par self._secret = secret self._storage_service = par.service() assert (self._storage_service is not None) if self._storage_service is not None: from Acquire.Storage import StorageService as _StorageService if not isinstance(self._storage_service, _StorageService): raise TypeError("storage service must be type StorageService")
def test_process_CRDS(authenticated_user, tempdir): creds = StorageCreds(user=authenticated_user, service_url="storage") drive = Drive(creds=creds, name="test_drive") filepath = os.path.join( os.path.dirname(__file__), "../../../tests/data/proc_test_data/CRDS/bsd.picarro.1minute.248m.dat", ) filemeta = drive.upload(filepath) Path("/tmp/bsd.picarro.1minute.248m.dat").unlink(missing_ok=True) par = PAR(location=filemeta.location(), user=authenticated_user) hugs = Service(service_url="hugs") par_secret = hugs.encrypt_data(par.secret()) auth = Authorisation(resource="process", user=authenticated_user) args = { "authorisation": auth.to_data(), "par": { "data": par.to_data() }, "par_secret": { "data": par_secret }, "data_type": "CRDS", "source_name": "bsd.picarro.1minute.248m", } response = hugs.call_function(function="process", args=args) expected_keys = [ "bsd.picarro.1minute.248m_ch4", "bsd.picarro.1minute.248m_co", "bsd.picarro.1minute.248m_co2", ] results = response["results"]["bsd.picarro.1minute.248m.dat"] return False assert sorted(results.keys()) == expected_keys
def add_service(self, service): """Add the passed service to this wallet""" from Acquire.Service import Service as _Service from Acquire.ObjectStore import string_to_safestring \ as _string_to_safestring s = _Service.resolve(service, fetch=True) service = s["service"] service_dir = Wallet._get_service_dir(service.uid()) service_file = "%s/service_%s.json" % ( service_dir, _string_to_safestring(service.canonical_url())) _write_service(service=service, filename=service_file)
def test_process_GC(authenticated_user, tempdir): creds = StorageCreds(user=authenticated_user, service_url="storage") drive = Drive(creds=creds, name="test_drive") data_filepath = os.path.join( os.path.dirname(__file__), "../../../tests/data/proc_test_data/GC/capegrim-medusa.18.C", ) precision_filepath = os.path.join( os.path.dirname(__file__), "../../../tests/data/proc_test_data/GC/capegrim-medusa.18.precisions.C", ) Path("/tmp/capegrim-medusa.18.C").unlink(missing_ok=True) Path("/tmp/capegrim-medusa.18.precisions.C").unlink(missing_ok=True) data_meta = drive.upload(data_filepath) precision_meta = drive.upload(precision_filepath) data_par = PAR(location=data_meta.location(), user=authenticated_user) precision_par = PAR(location=precision_meta.location(), user=authenticated_user) hugs = Service(service_url="hugs") data_secret = hugs.encrypt_data(data_par.secret()) precision_secret = hugs.encrypt_data(precision_par.secret()) auth = Authorisation(resource="process", user=authenticated_user) args = { "authorisation": auth.to_data(), "par": { "data": data_par.to_data(), "precision": precision_par.to_data() }, "par_secret": { "data": data_secret, "precision": precision_secret }, "data_type": "GCWERKS", "source_name": "capegrim-medusa", "site": "CGO", "instrument": "medusa", } response = hugs.call_function(function="process", args=args) result_keys = (sorted( response["results"]["capegrim-medusa.18.C"].keys()))[:8] expected_keys = [ "capegrim-medusa.18_C4F10", "capegrim-medusa.18_C6F14", "capegrim-medusa.18_CCl4", "capegrim-medusa.18_CF4", "capegrim-medusa.18_CFC-11", "capegrim-medusa.18_CFC-112", "capegrim-medusa.18_CFC-113", "capegrim-medusa.18_CFC-114", ] assert result_keys == expected_keys
def create_job( self, auth_user, requirements, key_password, data_files, hugs_url=None, storage_url=None, ): """ Create a job Args: auth_user (Acquire.User): Authenticated Acquire user The following keys are required: "hostname", "username", "name", "run_command", "partition", "n_nodes", "n_tasks_per_node", "n_cpus_per_task", "memory_req", "job_duration" where partition must be one of: "cpu_test", "dcv", "gpu", "gpu_veryshort", "hmem", "serial", "test", "veryshort" Example: requirements = {"hostname": hostname, "username": username, "name": "test_job, "n_nodes": 2,"n_tasks_per_node": 2, "n_cpus_per_task": 2, "memory": "128G", ...} requirements (dict): Dictionary containing job details and requested resources key_password (str): Password for private key used to access the HPC data_files (dict): Data file(s) to be uploaded to the cloud drive to run the simulation. Simulation code files should be given in the "app" key and data files in the "data" key TODO - having to pass in a password and get it through to Paramiko seems long winded, is there a better way to do this? hugs_url (str): URL of HUGS service storage_url (str): URL of storage service Returns: dict: Dictionary containing information regarding job running on resource This will contain the PAR for access for data upload and download. """ from Acquire.Client import ( Drive, Service, PAR, Authorisation, StorageCreds, Location, ACLRule, ) from Acquire.ObjectStore import create_uuid import datetime import os if self._service is None: raise PermissionError("Cannot use a null service") if storage_url is None: storage_url = self._service_url + "/storage" if hugs_url is None: hugs_url = self._service_url + "/hugs" if not isinstance(data_files["app"], list): data_files["app"] = [data_files["app"]] try: if not isinstance(data_files["data"], list): data_files["data"] = [data_files["data"]] except KeyError: pass # Get an authorisaton to pass to the service hugs = Service(service_url=hugs_url) # Credentials to create the cloud storage drive creds = StorageCreds(user=auth_user, service_url=storage_url) # Append a shortened UUID to the job name to ensure we don't multiple drives with the same name short_uuid = create_uuid(short_uid=True) job_name = requirements["name"] job_name = f"{job_name.lower()}_{short_uuid}" # Create a cloud drive for the input and output data to be written to drive = Drive(creds=creds, name=job_name) # Check the size of the files and if we want to use the chunk uploader # Now we want to upload the files to the cloud drive we've created for this job chunk_limit = 50 * 1024 * 1024 # Store the metadata for the uploaded files uploaded_files = {"app": {}, "data": {}} # These probably won't be very big so don't check their size for f in data_files["app"]: file_meta = drive.upload(f, dir="app") uploaded_files["app"][f] = file_meta # We might not have any data files to upload try: for f in data_files["data"]: filesize = os.path.getsize(f) if filesize < chunk_limit: file_meta = drive.upload(f, dir="data") else: file_meta = drive.chunk_upload(f, dir="data") uploaded_files["data"][f] = file_meta except KeyError: pass auth = Authorisation(resource="job_runner", user=auth_user) # Create a PAR with a long lifetime here and return a version to the user # and another to the server to allow writing of result data drive_guid = drive.metadata().guid() location = Location(drive_guid=drive_guid) # Read the duration from the requirements dictionary # TODO - add in some reading of the duration # try: # duration = requirements["duration"] # par_expiry = datetime.datetime par_lifetime = datetime.datetime.now() + datetime.timedelta(days=1) # Create an ACL rule for this PAR so we can read and write to it aclrule = ACLRule.owner() par = PAR( location=location, user=auth_user, aclrule=aclrule, expires_datetime=par_lifetime, ) par_secret = par.secret() encryped_par_secret = hugs.encrypt_data(par_secret) # Encrypt the password we use to decrypt the private key used to access the HPC cluster # TODO - is this a sensible way of doing this? encrypted_password = hugs.encrypt_data(key_password) par_data = par.to_data() args = {} args["authorisation"] = auth.to_data() args["par"] = par_data args["par_secret"] = encryped_par_secret args["requirements"] = requirements args["key_password"] = encrypted_password function_response = self._service.call_function(function="job_runner", args=args) response = {} response["function_response"] = function_response response["par"] = par_data response["par_secret"] = par_secret response["upload_data"] = uploaded_files return response
def process_files( self, user, files, data_type, source_name=None, overwrite=False, hugs_url=None, storage_url=None, datasource=None, site=None, instrument=None, ): """ Process the passed file(s) Args: user (User): Authenticated Acquire User files (str, list): Path of files to be processed data_type (str): Type of data to be processed (CRDS, GC etc) hugs_url (str): URL of HUGS service. Currently used for testing datasource (str): Datasource name or UUID This may be removed in the future. storage_url (str): URL of storage service. Currently used for testing This may be removed in the future. site (str, default=None): Name of site, three letter code or long name instrument (str, default=None): If no instrument name is passed we will attempt to find it from the filename. Returns: dict: UUIDs of Datasources storing data of processed files keyed by filename """ data_type = data_type.upper() if self._service is None: raise PermissionError("Cannot use a null service") if not isinstance(files, list): files = [files] if data_type.upper() == "GC": if not all(isinstance(item, tuple) for item in files): return TypeError( "If data type is GC, a list of tuples for data and precision filenames must be passed" ) files = [(Path(f), Path(p)) for f, p in files] else: files = [Path(f) for f in files] if storage_url is None: storage_url = self._service_url + "/storage" if hugs_url is None: hugs_url = self._service_url + "/hugs" # # Take the filename without the file extension # source_name = [os.path.splitext((filepath.name).split("/")[-1])[0] for filepath in files] hugs = Service(service_url=hugs_url) creds = StorageCreds(user=user, service_url=storage_url) drive = Drive(creds=creds, name="test_drive") auth = Authorisation(resource="process", user=user) # Here we'll need special cases for different data types. As GC requires # both the data file and precision data and they need to be kept together # for use in processing. # We can maybe reconsider the way this is done if there ends up being a lot of test # cases and this gets a bit clunky results = {} for file in files: if data_type == "GC": if source_name is None: source_name = file[0].stem if site is None: site = source_name.split(".")[0] if "-" in site and data_type == "GC": site = site.split("-")[0] filemeta = drive.upload(file[0]) par = PAR(location=filemeta.location(), user=user) par_secret = hugs.encrypt_data(par.secret()) prec_meta = drive.upload(file[1]) prec_par = PAR(location=prec_meta.location(), user=user) prec_par_secret = hugs.encrypt_data(prec_par.secret()) args = { "authorisation": auth.to_data(), "par": {"data": par.to_data(), "precision": prec_par.to_data()}, "par_secret": {"data": par_secret, "precision": prec_par_secret}, "data_type": data_type, "datasource": datasource, "source_name": source_name, "overwrite": overwrite, "site": site, "instrument": instrument, } else: filemeta = drive.upload(file) par = PAR(location=filemeta.location(), user=user) par_secret = hugs.encrypt_data(par.secret()) args = { "authorisation": auth.to_data(), "par": {"data": par.to_data()}, "par_secret": {"data": par_secret}, "data_type": data_type, "datasource": datasource, "source_name": source_name, "overwrite": overwrite, } # If we try to upload many files we don't want it to fail if a single # file contains overlapping data try: response = self._service.call_function(function="process", args=args) results.update(response["results"]) except ValueError as err: results[file.name] = err return results
def _read_service(filename): """Read and return the service written to 'filename'""" from Acquire.Client import Service as _Service return _Service.from_data(_read_json(filename))
def get_service(self, service=None, service_url=None, service_uid=None, service_type=None, autofetch=True): """Return the service at either 'service_url', or that has UID 'service_uid'. This will return the cached service if it exists, or will add a new service if we are able to validate it from a trusted registry """ from Acquire.ObjectStore import string_to_safestring \ as _string_to_safestring from Acquire.Service import Service as _Service if service is not None: s = _Service.resolve(service, fetch=False) if s["service"] is not None: self.add_service(s["service"]) return s["service"] service_uid = s["service_uid"] service_url = s["service_url"] service = None import glob as _glob if service_url is None: if service_uid is None: raise PermissionError( "You need to specify one of service_uid or service_url") # we need to look up the name... service_dir = Wallet._get_service_dir(service_uid) service_files = _glob.glob("%s/service_*.json" % service_dir) for service_file in service_files: s = _read_service(service_file) if s.uid() == service_uid: service = s break else: from Acquire.Service import Service as _Service service_url = _Service.get_canonical_url(service_url, service_type=service_type) service_files = _glob.glob("%s/*/service_%s.json" % ( self._wallet_dir, _string_to_safestring(service_url))) for service_file in service_files: s = _read_service(service_file) if s.canonical_url() == service_url: service = s break must_write = False if service is None: if not autofetch: from Acquire.Service import ServiceError raise ServiceError("No service at %s:%s" % (service_url, service_uid)) # we need to look this service up from the registry service = self._get_service_from_registry(service_url=service_url, service_uid=service_uid) must_write = True # check if the keys need rotating - if they do, load up # the new keys and save them to the service file... elif service.should_refresh_keys(): try: service.refresh_keys() must_write = True except: # something went wrong refreshing keys - go back to the # registry... _output("Something went wrong refreshing keys...") _output("Refreshing service from the registry.") service = self._get_service_from_registry( service_url=service_url, service_uid=service_uid) must_write = True if service_uid is not None: if service.uid() != service_uid: raise PermissionError( "Disagreement over the service UID for '%s' (%s)" % (service, service_uid)) if must_write: self.add_service(service) return service
def get_service(self, service_url=None, service_uid=None, service_type=None, autofetch=True): """Return the service at either 'service_url', or that has UID 'service_uid'. This will return the cached service if it exists, or will add a new service if we are able to validate it from a trusted registry """ from Acquire.ObjectStore import string_to_safestring \ as _string_to_safestring service = None if service_url is None: if service_uid is None: raise PermissionError( "You need to specify one of service_uid or service_url") # we need to look up the name... import glob as _glob service_files = _glob.glob("%s/service_*" % self._wallet_dir) for service_file in service_files: s = _read_service(service_file) if s.uid() == service_uid: service = s break else: from Acquire.Service import Service as _Service service_url = _Service.get_canonical_url(service_url, service_type=service_type) service_file = "%s/service_%s" % ( self._wallet_dir, _string_to_safestring(service_url)) try: service = _read_service(service_file) except: pass must_write = False if service is None: if not autofetch: from Acquire.Service import ServiceError raise ServiceError("No service at %s:%s" % (service_url, service_uid)) # we need to look this service up from the registry from Acquire.Registry import get_trusted_registry_service \ as _get_trusted_registry_service _output("Connecting to registry...") _flush_output() registry = _get_trusted_registry_service(service_uid=service_uid, service_url=service_url) _output("...connected to registry %s" % registry) _flush_output() # ensure we cache this registry... registry_file = "%s/service_%s" % ( self._wallet_dir, _string_to_safestring(registry.canonical_url())) _write_service(service=registry, filename=registry_file) if service_url is not None: _output("Securely fetching keys for %s..." % service_url) _flush_output() else: _output("Securely fetching keys for UID %s..." % service_uid) _flush_output() service = registry.get_service(service_url=service_url, service_uid=service_uid) _output("...success.\nFetched %s" % service) _flush_output() must_write = True # check if the keys need rotating - if they do, load up # the new keys and save them to the service file... if service.should_refresh_keys(): service.refresh_keys() must_write = True if service_uid is not None: if service.uid() != service_uid: raise PermissionError( "Disagreement over the service UID for '%s' (%s)" % (service, service_uid)) if must_write: service_file = "%s/service_%s" % ( self._wallet_dir, _string_to_safestring(service.canonical_url())) _write_service(service=service, filename=service_file) return service
def test_run_calc(aaai_services, authenticated_user): # create and register the cluster on which this job will take place... cluster = Cluster.create(service_url="compute", user=aaai_services["compute"]["user"]) user = authenticated_user assert (user.is_logged_in()) # ensure that the main account has money deposit(user, 100.0, "Adding money to the account", accounting_url="accounting") # get a handle to the financial account used to pay for the job account = Account(user=user, account_name="deposits", accounting_url="accounting") assert (account.balance() >= 100.0) # Upload a directory that will contain all of the input creds = StorageCreds(user=user, service_url="storage") drive = Drive(name="sim", creds=creds, autocreate=True) uploaded = drive.upload(_testdata()) location = uploaded.location() print(drive.list_files(dir="example_sim/input")) print(location) # create a request for a job to be run using: # 1. 'image_name' as the specified container image for the software # 2. 'location' as the location containing all input files # authorise it using the authenticated user (who may be different to the # user who pays for the job - hence the need for a different # authorisation for the request and for the cheque) r = RunRequest(image="docker://test_image:latest", input=location) # now write a cheque which will provide authorisation to spend money from # this account to pay for this request. This will be written to the access # service to give it the authority to create a transation in the account. # This cheque authorises only a single transaction, performable only # by the service whose canonical URL is supplied, and the access service # should check that the requested resource signature matches that # authorised by the cheque cheque = Cheque.write(account=account, recipient_url="access", resource=r.fingerprint(), max_spend=50.0) func = "run_calculation" args = {} args["request"] = r.to_data() args["authorisation"] = Authorisation(user=user, resource=r.fingerprint()).to_data() args["cheque"] = cheque.to_data() access_service = Service("access") result = access_service.call_function(func, args) print(result) pending_uids = cluster.get_pending_job_uids() print(pending_uids) for uid in pending_uids: job = cluster.submit_job(uid) print(job) pending_uids = cluster.get_pending_job_uids() print(pending_uids)
def aaai_services(tmpdir_factory): """This function creates mocked versions of all of the main services of the system, returning the json describing each service as a dictionary (which is passed to the test functions as the fixture) """ from Acquire.Identity import Authorisation from Acquire.Crypto import PrivateKey, OTP from Acquire.Service import call_function, Service _services = {} _services["registry"] = tmpdir_factory.mktemp("registry") _services["identity"] = tmpdir_factory.mktemp("identity") _services["accounting"] = tmpdir_factory.mktemp("accounting") _services["access"] = tmpdir_factory.mktemp("access") _services["storage"] = tmpdir_factory.mktemp("storage") _services["userdata"] = tmpdir_factory.mktemp("userdata") _services["compute"] = tmpdir_factory.mktemp("compute") wallet_dir = tmpdir_factory.mktemp("wallet") wallet_password = PrivateKey.random_passphrase() _set_services(_services, wallet_dir, wallet_password) password = PrivateKey.random_passphrase() args = {"password": password} responses = {} os.environ["SERVICE_PASSWORD"] = "******" os.environ["STORAGE_COMPARTMENT"] = str(_services["userdata"]) args["canonical_url"] = "registry" args["service_type"] = "registry" args["registry_uid"] = "Z9-Z9" # UID of testing registry response = call_function("registry", function="admin/setup", args=args) registry_service = Service.from_data(response["service"]) registry_otp = OTP(OTP.extract_secret(response["provisioning_uri"])) registry_user = _login_admin("registry", "admin", password, registry_otp) responses["registry"] = { "service": registry_service, "user": registry_user, "response": response } assert (registry_service.registry_uid() == registry_service.uid()) service_uids = [registry_service.uid()] args["canonical_url"] = "identity" args["service_type"] = "identity" response = call_function("identity", function="admin/setup", args=args) identity_service = Service.from_data(response["service"]) identity_otp = OTP(OTP.extract_secret(response["provisioning_uri"])) identity_user = _login_admin("identity", "admin", password, identity_otp) responses["identity"] = { "service": identity_service, "user": identity_user, "response": response } assert (identity_service.registry_uid() == registry_service.uid()) assert (identity_service.uid() not in service_uids) service_uids.append(identity_service.uid()) args["canonical_url"] = "accounting" args["service_type"] = 'accounting' response = call_function("accounting", function="admin/setup", args=args) accounting_service = Service.from_data(response["service"]) accounting_otp = OTP(OTP.extract_secret(response["provisioning_uri"])) accounting_user = _login_admin("accounting", "admin", password, accounting_otp) responses["accounting"] = { "service": accounting_service, "user": accounting_user, "response": response } assert (accounting_service.registry_uid() == registry_service.uid()) assert (accounting_service.uid() not in service_uids) service_uids.append(accounting_service.uid()) args["canonical_url"] = "access" args["service_type"] = "access" response = call_function("access", function="admin/setup", args=args) responses["access"] = response access_service = Service.from_data(response["service"]) access_otp = OTP(OTP.extract_secret(response["provisioning_uri"])) access_user = _login_admin("access", "admin", password, access_otp) responses["access"] = { "service": access_service, "user": access_user, "response": response } assert (access_service.registry_uid() == registry_service.uid()) assert (access_service.uid() not in service_uids) service_uids.append(access_service.uid()) args["canonical_url"] = "compute" args["service_type"] = "compute" response = call_function("compute", function="admin/setup", args=args) responses["compute"] = response compute_service = Service.from_data(response["service"]) compute_otp = OTP(OTP.extract_secret(response["provisioning_uri"])) compute_user = _login_admin("compute", "admin", password, compute_otp) responses["compute"] = { "service": compute_service, "user": compute_user, "response": response } assert (compute_service.registry_uid() == registry_service.uid()) assert (compute_service.uid() not in service_uids) service_uids.append(compute_service.uid()) args["canonical_url"] = "storage" args["service_type"] = "storage" response = call_function("storage", function="admin/setup", args=args) storage_service = Service.from_data(response["service"]) storage_otp = OTP(OTP.extract_secret(response["provisioning_uri"])) storage_user = _login_admin("storage", "admin", password, storage_otp) responses["storage"] = { "service": storage_service, "user": storage_user, "response": response } assert (storage_service.registry_uid() == registry_service.uid()) assert (storage_service.uid() not in service_uids) service_uids.append(storage_service.uid()) resource = "trust_accounting_service %s" % accounting_service.uid() args = { "service_url": accounting_service.canonical_url(), "authorisation": Authorisation(user=access_user, resource=resource).to_data() } access_service.call_function(function="admin/trust_accounting_service", args=args) resource = "trust_accounting_service %s" % accounting_service.uid() args = { "service_url": accounting_service.canonical_url(), "authorisation": Authorisation(user=compute_user, resource=resource).to_data() } compute_service.call_function(function="admin/trust_accounting_service", args=args) responses["_services"] = _services return responses