def test_drive_par(authenticated_user, tempdir): drive_name = "test å∫ç∂ pars" creds = StorageCreds(user=authenticated_user, service_url="storage") drive = Drive(name=drive_name, creds=creds) drive.upload(filename=__file__, uploaded_name="tmp_test.py") downloaded_name = drive.download(filename="tmp_test.py", dir=tempdir) assert (_same_file(__file__, downloaded_name)) drive_guid = drive.metadata().guid() location = Location(drive_guid=drive_guid) par = PAR(location=location, user=authenticated_user, aclrule=ACLRule.reader()) par_drive = par.resolve() assert (par_drive.metadata().acl() == ACLRule.reader()) assert (par_drive.metadata().uid() == drive.metadata().uid()) files = par_drive.list_files() assert (len(files) == 1) assert (files[0].filename() == "tmp_test.py") downloaded_name = files[0].open().download(dir=tempdir, force_par=True) assert (_same_file(__file__, downloaded_name)) par2 = PAR(location=location, user=authenticated_user, aclrule=ACLRule.writer()) par_drive = par2.resolve() assert (par_drive.metadata().acl() == ACLRule.writer()) assert (par_drive.metadata().uid() == drive.metadata().uid()) files = par_drive.list_files() assert (len(files) == 1) assert (files[0].filename() == "tmp_test.py") par_drive.upload(filename=__file__, uploaded_name="tmp_test2.py") files = par_drive.list_files() assert (len(files) == 2) f = {} f[files[0].filename()] = files[0] f[files[1].filename()] = files[1] files = f assert ("tmp_test.py" in files) assert ("tmp_test2.py" in files) downloaded_name = files["tmp_test2.py"].open().download(dir=tempdir) assert (_same_file(__file__, downloaded_name)) par = PAR(location=files["tmp_test.py"].location(), user=authenticated_user, aclrule=ACLRule.reader()) par_file = par.resolve() assert (par_file.metadata().acl() == ACLRule.reader()) downloaded_name = par_file.download(dir=tempdir) assert (_same_file(__file__, downloaded_name)) with pytest.raises(PermissionError): par_file.upload(__file__) par = PAR(location=files["tmp_test.py"].location(), user=authenticated_user, aclrule=ACLRule.writer()) par_file = par.resolve() assert (par_file.metadata().acl() == ACLRule.writer()) par_file.upload(__file__)
def create_job( self, auth_user, requirements, key_password, data_files, hugs_url=None, storage_url=None, ): """ Create a job Args: auth_user (Acquire.User): Authenticated Acquire user The following keys are required: "hostname", "username", "name", "run_command", "partition", "n_nodes", "n_tasks_per_node", "n_cpus_per_task", "memory_req", "job_duration" where partition must be one of: "cpu_test", "dcv", "gpu", "gpu_veryshort", "hmem", "serial", "test", "veryshort" Example: requirements = {"hostname": hostname, "username": username, "name": "test_job, "n_nodes": 2,"n_tasks_per_node": 2, "n_cpus_per_task": 2, "memory": "128G", ...} requirements (dict): Dictionary containing job details and requested resources key_password (str): Password for private key used to access the HPC data_files (dict): Data file(s) to be uploaded to the cloud drive to run the simulation. Simulation code files should be given in the "app" key and data files in the "data" key TODO - having to pass in a password and get it through to Paramiko seems long winded, is there a better way to do this? hugs_url (str): URL of HUGS service storage_url (str): URL of storage service Returns: dict: Dictionary containing information regarding job running on resource This will contain the PAR for access for data upload and download. """ from Acquire.Client import ( Drive, Service, PAR, Authorisation, StorageCreds, Location, ACLRule, ) from Acquire.ObjectStore import create_uuid import datetime import os if self._service is None: raise PermissionError("Cannot use a null service") if storage_url is None: storage_url = self._service_url + "/storage" if hugs_url is None: hugs_url = self._service_url + "/hugs" if not isinstance(data_files["app"], list): data_files["app"] = [data_files["app"]] try: if not isinstance(data_files["data"], list): data_files["data"] = [data_files["data"]] except KeyError: pass # Get an authorisaton to pass to the service hugs = Service(service_url=hugs_url) # Credentials to create the cloud storage drive creds = StorageCreds(user=auth_user, service_url=storage_url) # Append a shortened UUID to the job name to ensure we don't multiple drives with the same name short_uuid = create_uuid(short_uid=True) job_name = requirements["name"] job_name = f"{job_name.lower()}_{short_uuid}" # Create a cloud drive for the input and output data to be written to drive = Drive(creds=creds, name=job_name) # Check the size of the files and if we want to use the chunk uploader # Now we want to upload the files to the cloud drive we've created for this job chunk_limit = 50 * 1024 * 1024 # Store the metadata for the uploaded files uploaded_files = {"app": {}, "data": {}} # These probably won't be very big so don't check their size for f in data_files["app"]: file_meta = drive.upload(f, dir="app") uploaded_files["app"][f] = file_meta # We might not have any data files to upload try: for f in data_files["data"]: filesize = os.path.getsize(f) if filesize < chunk_limit: file_meta = drive.upload(f, dir="data") else: file_meta = drive.chunk_upload(f, dir="data") uploaded_files["data"][f] = file_meta except KeyError: pass auth = Authorisation(resource="job_runner", user=auth_user) # Create a PAR with a long lifetime here and return a version to the user # and another to the server to allow writing of result data drive_guid = drive.metadata().guid() location = Location(drive_guid=drive_guid) # Read the duration from the requirements dictionary # TODO - add in some reading of the duration # try: # duration = requirements["duration"] # par_expiry = datetime.datetime par_lifetime = datetime.datetime.now() + datetime.timedelta(days=1) # Create an ACL rule for this PAR so we can read and write to it aclrule = ACLRule.owner() par = PAR( location=location, user=auth_user, aclrule=aclrule, expires_datetime=par_lifetime, ) par_secret = par.secret() encryped_par_secret = hugs.encrypt_data(par_secret) # Encrypt the password we use to decrypt the private key used to access the HPC cluster # TODO - is this a sensible way of doing this? encrypted_password = hugs.encrypt_data(key_password) par_data = par.to_data() args = {} args["authorisation"] = auth.to_data() args["par"] = par_data args["par_secret"] = encryped_par_secret args["requirements"] = requirements args["key_password"] = encrypted_password function_response = self._service.call_function(function="job_runner", args=args) response = {} response["function_response"] = function_response response["par"] = par_data response["par_secret"] = par_secret response["upload_data"] = uploaded_files return response
def test_drives(authenticated_user, tempdir): creds = StorageCreds(user=authenticated_user, service_url="storage") nstart = len(Drive.list_toplevel_drives(creds=creds)) drive_name = "test å∫ç∂ something" drive = Drive(name=drive_name, creds=creds, autocreate=True) assert (drive.metadata().name() == drive_name) assert (drive.metadata().acl().is_owner()) drive2_name = "test/this/is/a/../../dir" drive2 = Drive(name=drive2_name, creds=creds) drives = Drive.list_toplevel_drives(creds=creds) assert (len(drives) == nstart + 2) drives = drive2.list_drives() assert (len(drives) == 0) drives = drive.list_drives() assert (len(drives) == 0) filename = __file__ files = drive.list_files() assert (len(files) == 0) filemeta = drive.upload(filename=filename) assert (filemeta.is_complete()) assert (filemeta.acl().is_owner()) assert (filemeta.acl().is_readable()) assert (filemeta.acl().is_writeable()) assert (filemeta.uploaded_by() == authenticated_user.guid()) assert (filemeta.uploaded_when() is not None) upload_datetime = filemeta.uploaded_when() (_, filename) = os.path.split(filename) assert (filemeta.filename() == filename) files = drive.list_files() assert (len(files) == 1) assert (files[0].filename() == filemeta.filename()) assert (not files[0].is_complete()) files = drive.list_files(include_metadata=True) assert (len(files) == 1) assert (files[0].filename() == filemeta.filename()) assert (files[0].is_complete()) assert (files[0].uid() == filemeta.uid()) assert (files[0].filesize() == filemeta.filesize()) assert (files[0].checksum() == filemeta.checksum()) assert (files[0].compression_type() == filemeta.compression_type()) assert (files[0].uploaded_by() == filemeta.uploaded_by()) assert (files[0].uploaded_when() == filemeta.uploaded_when()) assert (files[0].acl().is_owner()) assert (files[0].uploaded_by() == authenticated_user.guid()) assert (files[0].uploaded_when() == upload_datetime) f = files[0].open() filename = f.download(dir=tempdir) # make sure that the two files are identical with open(filename, "rb") as FILE: data1 = FILE.read() # remove this tmp file os.unlink(filename) with open(__file__, "rb") as FILE: data2 = FILE.read() assert (data1 == data2) assert (files[0].uid() == filemeta.uid()) assert (files[0].filesize() == filemeta.filesize()) assert (files[0].checksum() == filemeta.checksum()) assert (files[0].compression_type() == filemeta.compression_type()) assert (files[0].uploaded_by() == filemeta.uploaded_by()) assert (files[0].uploaded_when() == filemeta.uploaded_when()) assert (files[0].acl().is_owner()) assert (files[0].uploaded_by() == authenticated_user.guid()) assert (files[0].uploaded_when() == upload_datetime) versions = f.list_versions() assert (len(versions) == 1) assert (versions[0].filename() == filemeta.filename()) assert (versions[0].uploaded_when() == filemeta.uploaded_when()) new_filemeta = drive.upload(filename=__file__, force_par=True) versions = f.list_versions() assert (len(versions) == 2) filename = new_filemeta.open().download(dir=tempdir) # make sure that the two files are identical with open(filename, "rb") as FILE: data1 = FILE.read() # remove this tmp file os.unlink(filename) with open(__file__, "rb") as FILE: data2 = FILE.read() assert (data1 == data2) # should be in upload order assert (versions[0].uid() == filemeta.uid()) assert (versions[1].uid() == new_filemeta.uid()) filename = new_filemeta.open().download(dir=tempdir, force_par=True) # make sure that the two files are identical with open(filename, "rb") as FILE: data1 = FILE.read() # remove this tmp file os.unlink(filename) assert (data1 == data2) # try to upload a file with path to the drive filemeta = drive.upload(filename=__file__, uploaded_name="/test/one/../two/test.py") assert (filemeta.filename() == "test/two/test.py") # cannot create a new Drive with non-owner ACLs with pytest.raises(PermissionError): drive = Drive(name="broken_acl", creds=creds, aclrules=ACLRules.owner("12345@z0-z0")) drive = Drive(name="working_acl", creds=creds, aclrules=ACLRules.owner(authenticated_user.guid()))