def test_delete_init(self): """ Verifies DeleteData field initialization """ # default init default_ddata = globus_sdk.DeleteData(self.tc, GO_EP1_ID) self.assertEqual(default_ddata["DATA_TYPE"], "delete") self.assertEqual(default_ddata["endpoint"], GO_EP1_ID) self.assertIn("submission_id", default_ddata) self.assertIn("DATA", default_ddata) self.assertEqual(len(default_ddata["DATA"]), 0) # init with params label = "label" params = {"param1": "value1", "param2": "value2"} param_ddata = globus_sdk.DeleteData(self.tc, GO_EP1_ID, label=label, recursive="True", **params) self.assertEqual(param_ddata["label"], label) self.assertEqual(param_ddata["recursive"], "True") for par in params: self.assertEqual(param_ddata[par], params[par])
def delete_file(self, path): # Delete the file or directory at the given path try: # if possible, delete local file/directory super().delete_file(path) except: pass try: # get the id of the endpoint that the file/directory exists on # endpoint_id = input("Enter the Endpoint ID") # get the transfer client auth = self.nc.get_authorizers()['transfer.api.globus.org'] transfer_client = globus_sdk.TransferClient(authorizer=auth) ddata = globus_sdk.DeleteData(transfer_client, self.globus_remote_endpoint, recursive=True) # Recursively delete path contents (because of recursive flag set above) ddata.add_item(path) # Make sure that endpoint is activated transfer_client.endpoint_autoactivate(self.globus_remote_endpoint) submit_result = transfer_client.submit_delete(ddata) print("Task ID:", submit_result["task_id"]) except: pass return
def __init__(self, one=None, globus_client_id=None, local_endpoint=None, label='ibllib patch'): assert globus_client_id assert one self.local_endpoint = local_endpoint or globus.get_local_endpoint() self.label = label self.transfer_client = globus.login_auto( globus_client_id=globus_client_id, str_app='globus/admin') # transfers/delete from the current computer to the flatiron: mandatory and executed first self.globus_transfer = globus_sdk.TransferData(self.transfer_client, self.local_endpoint, FLAT_IRON_GLOBUS_ID, verify_checksum=True, sync_level='checksum', label=label) self.globus_delete = globus_sdk.DeleteData(self.transfer_client, FLAT_IRON_GLOBUS_ID, verify_checksum=True, sync_level='checksum', label=label) # get a dictionary of data repositories from Alyx (with globus ids) self.repos = { r['name']: r for r in one.alyx.rest('data-repository', 'list') } # transfers/delete from flatiron to optional third parties to synchronize / delete self.globus_transfers_locals = {} self.globus_deletes_locals = {} super().__init__(one=one)
def test_endpoint_manager_get_task(self): """ Has sdktester2b submit a no-op task on the managed endpoint Confirms sdktester1a can view the task as an admin. Confirms 403 when non manager attempts to use this resource. """ # sdktester2b subits no-op delete task ddata = globus_sdk.DeleteData(self.tc2, self.managed_ep_id, notify_on_fail=False) ddata.add_item("no-op.txt") task_id = self.tc2.submit_delete(ddata)["task_id"] # sdktester1a gets the task as admin task_doc = self.tc.endpoint_manager_get_task(task_id) self.assertEqual(task_doc["task_id"], task_id) self.assertEqual(task_doc["owner_id"], get_user_data()["sdktester2b"]["id"]) self.assertEqual(task_doc["type"], "DELETE") self.assertIn("status", task_doc) # 403 for non managers, even if they submitted the task with self.assertRaises(TransferAPIError) as apiErr: self.tc2.endpoint_manager_get_task(task_id) self.assertEqual(apiErr.exception.http_status, 403) self.assertEqual(apiErr.exception.code, "PermissionDenied")
def test_endpoint_manager_task_event_list(self): """ Has sdktester2b submit a no-op task on the managed endpoint. Waits for task to fail, and confirms sdktester1a can see the failure event as an admin. Confirms 403 when non manager attempts to use this resource. """ # sdktester2b subits no-op delete task and waits for completion ddata = globus_sdk.DeleteData(self.tc2, self.managed_ep_id, notify_on_fail=False) ddata.add_item("no-op.txt") task_id = self.tc2.submit_delete(ddata)["task_id"] self.assertTrue( self.tc2.task_wait( task_id, timeout=DEFAULT_TASK_WAIT_TIMEOUT, polling_interval=DEFAULT_TASK_WAIT_POLLING_INTERVAL)) # sdktester1a gets the task event list as admin events_doc = self.tc.endpoint_manager_task_event_list(task_id) self.assertIsInstance(events_doc, PaginatedResource) failure_event = events_doc[0] # most recent event is first self.assertEqual(failure_event["DATA_TYPE"], "event") self.assertEqual(failure_event["code"], "FILE_NOT_FOUND") self.assertEqual(failure_event["description"], "file not found") # 403 for non managers, even if they submitted the task with self.assertRaises(TransferAPIError) as apiErr: self.tc2.endpoint_manager_task_event_list(task_id) self.assertEqual(apiErr.exception.http_status, 403) self.assertEqual(apiErr.exception.code, "PermissionDenied")
def setUp(self): """ Creates a TransferData objects and a DeleteData object for testing """ super(DataTests, self).setUp() self.tdata = globus_sdk.TransferData(self.tc, GO_EP1_ID, GO_EP2_ID) self.ddata = globus_sdk.DeleteData(self.tc, GO_EP1_ID)
def test_task_pause_info(self): """ Creates a pause rule on the shared endpoint, then submits a task against it. Gets pause info for the task, validates results, and confirms the task is paused (or about to be). """ # create pause rule rule_id = self.test_endpoint_manager_create_pause_rule() # submit a no-op delete task ddata = globus_sdk.DeleteData(self.tc, self.test_share_ep_id, notify_on_fail=False) ddata.add_item("no-op.txt") task_id = self.tc.submit_delete(ddata)["task_id"] # get pause info and validate pause_doc = self.tc.task_pause_info(task_id) # validate top level results self.assertEqual(pause_doc["DATA_TYPE"], "pause_info_limited") self.assertIsNone(pause_doc["source_pause_message"]) self.assertIsNone(pause_doc["destination_pause_message"]) # validate the rule results rule = pause_doc["pause_rules"][0] # should be the only rule self.assertEqual(rule["DATA_TYPE"], "pause_rule_limited") self.assertEqual(rule["id"], rule_id) self.assertEqual(rule["message"], "SDK Test Pause Rule") self.assertNotIn("modified_by", rule) self.assertNotIn("modified_by_id", rule)
def clean_sharing(): """ Cleans out any files in ~/.globus/sharing/ on go#ep1 older than an hour at the start of each testsuite run """ with patch_config(): tc = get_transfer_client() path = "~/.globus/sharing/" hour_ago = datetime.utcnow() - timedelta(hours=1) filter_string = "last_modified:," + hour_ago.strftime("%Y-%m-%d %H:%M:%S") try: old_files = tc.operation_ls( GO_EP1_ID, path=path, filter=filter_string, num_results=None ) except globus_sdk.TransferAPIError: return kwargs = {"notify_on_succeeded": False, "notify_on_fail": False} ddata = globus_sdk.DeleteData(tc, GO_EP1_ID, **kwargs) for item in old_files: ddata.add_item(path + item["name"]) if len(ddata["DATA"]): tc.submit_delete(ddata)
def launch_transfers(self, globus_transfer_client, wait=True): gtc = globus_transfer_client def _wait_for_task(resp): if wait: status = gtc.task_wait(task_id=resp['task_id'], timeout=1) while gtc.get_task(task_id=resp['task_id'])['nice_status'] == 'OK': status = gtc.task_wait(task_id=resp['task_id'], timeout=30) if status is False: tinfo = gtc.get_task(task_id=resp['task_id'])['nice_status'] raise ConnectionError(f"Could not connect to Globus {tinfo}") # handles the transfers first if len(self.globus_transfer['DATA']) > 0: # launch the transfer _wait_for_task(gtc.submit_transfer(self.globus_transfer)) # re-initialize the globus_transfer property self.globus_transfer = globus_sdk.TransferData( gtc, self.globus_transfer['source_endpoint'], self.globus_transfer['destination_endpoint'], label=self.globus_transfer['label'], verify_checksum=True, sync_level='checksum') # do the same for deletes if len(self.globus_delete['DATA']) > 0: _wait_for_task(gtc.submit_delete(self.globus_delete)) self.globus_delete = globus_sdk.DeleteData( gtc, endpoint=self.globus_delete['endpoint'], label=self.globus_delete['label'], verify_checksum=True, sync_level='checksum')
def submit_delete(self, skip_list=[128, 192, 256, 320, 384], for_real=False): ''' Delete snapdirs and sub_id files for one snapnum in specified range of runs, except for those in skip_list (full simulations). ''' skip_flocs = [] for run_num in skip_list: skipx, skipy, skipz = get_xyz(run_num) skip_flocs.append(self.fd[run_num % 36] + '{}_{}_{}/'.format(skipx, skipy, skipz)) # Recursive set at upper level of DeleteData (unlike TransferData), so submit TWO Delete tasks to Globus. snapdata = globus_sdk.DeleteData( self.tc, self.jhu_endpoint, label='Indra_delete_snap_{}_dirs'.format(self.snapstr), recursive=True) filedata = globus_sdk.DeleteData( self.tc, self.jhu_endpoint, label='Indra_delete_snap_{}_files'.format(self.snapstr)) for i, run_num in enumerate( range(self.runfirst, self.runfirst + self.nruns)): p = self.flocs[i] if p in skip_flocs: print('Keeping snapdir_{} in {}'.format(self.snapstr, p)) else: snapdata.add_item(('{}snapdir_{}/'.format(p, self.snapstr))) NTask = get_NTask(run_num, self.snapnum) for file in range(NTask): filedata.add_item( ('{0}postproc_{1}/sub_ids_{1}.{2}'.format( p, self.snapstr, file))) if for_real: print('Deleting!') delete_result = self.tc.submit_delete(snapdata) delete_result = self.tc.submit_delete(filedata) else: print('Not deleting; returning snapdata, filedata') print( 'To delete, call NAME.tc.submit_delete(snapdata) and (filedata) or set for_real=True' ) return snapdata, filedata
def delete_file(self): tc = self.get_transfer_client() ddata = globus_sdk.DeleteData(tc, self.GLOBUS_DTN_ENDPOINT, recursive=True) ddata.add_item("/~/project/covid-vpr/snr_20091409000102.csv") delete_result = tc.submit_delete(ddata) print(str(delete_result))
def test_endpoint_manager_task_list(self): """ Has sdktester2b submit transfer and delete task to the managed_ep Then has sdktester1a get its endpoint manager task list Confirms tasks submitted by sdktester2b on the managed endpoint are visible, and some expected fields are present. """ # sdktester2b submits tasks # new dir with randomized name to prevent collision dest_dir = "transfer_dest_dir-" + str(getrandbits(128)) dest_path = "/" + dest_dir + "/" self.tc2.operation_mkdir(self.managed_ep_id, dest_path) # transfer a file to the new dir tdata = globus_sdk.TransferData(self.tc2, GO_EP1_ID, self.managed_ep_id, notify_on_succeeded=False) source_path = "/share/godata/" file_name = "file1.txt" tdata.add_item(source_path + file_name, dest_path + file_name) transfer_id = self.tc2.submit_transfer(tdata)["task_id"] # delete the new dir ddata = globus_sdk.DeleteData(self.tc2, self.managed_ep_id, recursive=True, notify_on_succeeded=False) ddata.add_item(dest_path) delete_id = self.tc2.submit_delete(ddata)["task_id"] # sdktester1a gets endpoint manager task list tasks_doc = self.tc.endpoint_manager_task_list( filter_endpoint=GO_EP2_ID, filter_user_id=get_user_data()["sdktester2b"]["id"]) # confirm submitted tasks can be found # and tasks have some expected fields expected_fields = [ "username", "deadline", "type", "source_endpoint_id" ] delete_found = False transfer_found = False self.assertIsInstance(tasks_doc, PaginatedResource) for task in tasks_doc: for field in expected_fields: self.assertIn(field, task) if task["task_id"] == transfer_id: transfer_found = True if task["task_id"] == delete_id: delete_found = True if transfer_found and delete_found: break # fail if both not found self.assertTrue(delete_found and transfer_found)
def delete_file(tc, local_path, file_name): print("deleting file {}".format(local_path + file_name)) ddata = globus_sdk.DeleteData(tc, LOCAL_ID) ddata.add_item(local_path + file_name) # # Ensure endpoint is activated # tc.endpoint_autoactivate(endpoint_id) result = tc.submit_delete(ddata)
def launch_transfers(self, local_servers=False): """ patcher.launch_transfers() Launches the globus transfer and delete from the local patch computer to the flat-rion :param: local_servers (False): if True, sync the local servers after the main transfer :return: None """ gtc = self.transfer_client def _wait_for_task(resp): # patcher.transfer_client.get_task(task_id='364fbdd2-4deb-11eb-8ffb-0a34088e79f9') # on a good status: # Out[22]: TransferResponse({'bytes_checksummed': 377736912, 'bytes_transferred': 3011090432, 'canceled_by_admin': None, 'canceled_by_admin_message': None, 'command': 'API 0.10', 'completion_time': None, 'deadline': '2021-01-06T18:10:05+00:00', 'delete_destination_extra': False, 'destination_endpoint': 'simonsfoundation#ibl', 'destination_endpoint_display_name': 'IBL Flatiron SDSC Data', 'destination_endpoint_id': 'ab2d064c-413d-11eb-b188-0ee0d5d9299f', 'directories': 0, 'effective_bytes_per_second': 873268, 'encrypt_data': False, 'fatal_error': None, 'faults': 6, 'files': 186, 'files_skipped': 12, 'files_transferred': 76, 'history_deleted': False, 'is_ok': True, 'is_paused': False, 'key': 'active,2021-01-03T17:52:34.427087', 'label': '3B analog sync patch', 'nice_status': 'OK', 'nice_status_details': None, 'nice_status_expires_in': -1, 'nice_status_short_description': 'OK', 'owner_id': 'e633663a-8561-4a5d-ac92-f198d43b14dc', 'preserve_timestamp': False, 'recursive_symlinks': 'ignore', 'request_time': '2021-01-03T17:52:34+00:00', 'source_endpoint': 'internationalbrainlab#916c2766-bd2a-11ea-8f22-0a21f750d19b', 'source_endpoint_display_name': 'olivier_laptop', 'source_endpoint_id': '916c2766-bd2a-11ea-8f22-0a21f750d19b', 'status': 'ACTIVE', 'subtasks_canceled': 0, 'subtasks_expired': 0, 'subtasks_failed': 0, 'subtasks_pending': 98, 'subtasks_retrying': 0, 'subtasks_succeeded': 274, 'subtasks_total': 372, 'symlinks': 0, 'sync_level': 3, 'task_id': '364fbdd2-4deb-11eb-8ffb-0a34088e79f9', 'type': 'TRANSFER', 'username': '******', 'verify_checksum': True}) # noqa # on a checksum error # Out[26]: TransferResponse({'bytes_checksummed': 377736912, 'bytes_transferred': 3715901232, 'canceled_by_admin': None, 'canceled_by_admin_message': None, 'command': 'API 0.10', 'completion_time': None, 'deadline': '2021-01-06T18:10:05+00:00', 'delete_destination_extra': False, 'destination_endpoint': 'simonsfoundation#ibl', 'destination_endpoint_display_name': 'IBL Flatiron SDSC Data', 'destination_endpoint_id': 'ab2d064c-413d-11eb-b188-0ee0d5d9299f', 'directories': 0, 'effective_bytes_per_second': 912410, 'encrypt_data': False, 'fatal_error': None, 'faults': 7, 'files': 186, 'files_skipped': 12, 'files_transferred': 102, 'history_deleted': False, 'is_ok': False, 'is_paused': False, 'key': 'active,2021-01-03T17:52:34.427087', 'label': '3B analog sync patch', 'nice_status': 'VERIFY_CHECKSUM', 'nice_status_details': None, 'nice_status_expires_in': -1, 'nice_status_short_description': 'checksum verification failed', 'owner_id': 'e633663a-8561-4a5d-ac92-f198d43b14dc', 'preserve_timestamp': False, 'recursive_symlinks': 'ignore', 'request_time': '2021-01-03T17:52:34+00:00', 'source_endpoint': 'internationalbrainlab#916c2766-bd2a-11ea-8f22-0a21f750d19b', 'source_endpoint_display_name': 'olivier_laptop', 'source_endpoint_id': '916c2766-bd2a-11ea-8f22-0a21f750d19b', 'status': 'ACTIVE', 'subtasks_canceled': 0, 'subtasks_expired': 0, 'subtasks_failed': 0, 'subtasks_pending': 72, 'subtasks_retrying': 0, 'subtasks_succeeded': 300, 'subtasks_total': 372, 'symlinks': 0, 'sync_level': 3, 'task_id': '364fbdd2-4deb-11eb-8ffb-0a34088e79f9', 'type': 'TRANSFER', 'username': '******', 'verify_checksum': True}) # noqa # on a finished task # Out[4]: TransferResponse({'bytes_checksummed': 377736912, 'bytes_transferred': 4998806664, 'canceled_by_admin': None, 'canceled_by_admin_message': None, 'command': 'API 0.10', 'completion_time': '2021-01-03T20:04:50+00:00', 'deadline': '2021-01-06T19:11:00+00:00', 'delete_destination_extra': False, 'destination_endpoint': 'simonsfoundation#ibl', 'destination_endpoint_display_name': 'IBL Flatiron SDSC Data', 'destination_endpoint_id': 'ab2d064c-413d-11eb-b188-0ee0d5d9299f', 'directories': 0, 'effective_bytes_per_second': 629960, 'encrypt_data': False, 'fatal_error': None, 'faults': 15, 'files': 186, 'files_skipped': 12, 'files_transferred': 174, 'history_deleted': False, 'is_ok': None, 'is_paused': False, 'key': 'complete,2021-01-03T20:04:49.540956', 'label': '3B analog sync patch', 'nice_status': None, 'nice_status_details': None, 'nice_status_expires_in': None, 'nice_status_short_description': None, 'owner_id': 'e633663a-8561-4a5d-ac92-f198d43b14dc', 'preserve_timestamp': False, 'recursive_symlinks': 'ignore', 'request_time': '2021-01-03T17:52:34+00:00', 'source_endpoint': 'internationalbrainlab#916c2766-bd2a-11ea-8f22-0a21f750d19b', 'source_endpoint_display_name': 'olivier_laptop', 'source_endpoint_id': '916c2766-bd2a-11ea-8f22-0a21f750d19b', 'status': 'SUCCEEDED', 'subtasks_canceled': 0, 'subtasks_expired': 0, 'subtasks_failed': 0, 'subtasks_pending': 0, 'subtasks_retrying': 0, 'subtasks_succeeded': 372, 'subtasks_total': 372, 'symlinks': 0, 'sync_level': 3, 'task_id': '364fbdd2-4deb-11eb-8ffb-0a34088e79f9', 'type': 'TRANSFER', 'username': '******', 'verify_checksum': True}) # noqa # on an errored task # Out[10]: TransferResponse({'bytes_checksummed': 0, 'bytes_transferred': 0, 'canceled_by_admin': None, 'canceled_by_admin_message': None, 'command': 'API 0.10', 'completion_time': '2021-01-03T17:39:00+00:00', 'deadline': '2021-01-04T17:37:34+00:00', 'delete_destination_extra': False, 'destination_endpoint': 'simonsfoundation#ibl', 'destination_endpoint_display_name': 'IBL Flatiron SDSC Data', 'destination_endpoint_id': 'ab2d064c-413d-11eb-b188-0ee0d5d9299f', 'directories': 0, 'effective_bytes_per_second': 0, 'encrypt_data': False, 'fatal_error': {'code': 'CANCELED', 'description': 'canceled'}, 'faults': 2, 'files': 6, 'files_skipped': 0, 'files_transferred': 0, 'history_deleted': False, 'is_ok': None, 'is_paused': False, 'key': 'complete,2021-01-03T17:38:59.697413', 'label': 'test 3B analog sync patch', 'nice_status': None, 'nice_status_details': None, 'nice_status_expires_in': None, 'nice_status_short_description': None, 'owner_id': 'e633663a-8561-4a5d-ac92-f198d43b14dc', 'preserve_timestamp': False, 'recursive_symlinks': 'ignore', 'request_time': '2021-01-03T17:37:34+00:00', 'source_endpoint': 'internationalbrainlab#916c2766-bd2a-11ea-8f22-0a21f750d19b', 'source_endpoint_display_name': 'olivier_laptop', 'source_endpoint_id': '916c2766-bd2a-11ea-8f22-0a21f750d19b', 'status': 'FAILED', 'subtasks_canceled': 6, 'subtasks_expired': 0, 'subtasks_failed': 0, 'subtasks_pending': 0, 'subtasks_retrying': 0, 'subtasks_succeeded': 6, 'subtasks_total': 12, 'symlinks': 0, 'sync_level': 3, 'task_id': '5706dd2c-4dea-11eb-8ffb-0a34088e79f9', 'type': 'TRANSFER', 'username': '******', 'verify_checksum': True}) # noqa while True: tinfo = gtc.get_task( task_id=resp['task_id'])['completion_time'] if tinfo['completion_time'] is not None: break _ = gtc.task_wait(task_id=resp['task_id'], timeout=30) if tinfo['fatal_error'] is not None: raise ConnectionError(f"Globus transfer failed \n {tinfo}") # handles the transfers first if len(self.globus_transfer['DATA']) > 0: # launch the transfer _wait_for_task(gtc.submit_transfer(self.globus_transfer)) # re-initialize the globus_transfer property self.globus_transfer = globus_sdk.TransferData( gtc, self.globus_transfer['source_endpoint'], self.globus_transfer['destination_endpoint'], label=self.globus_transfer['label'], verify_checksum=True, sync_level='checksum') # do the same for deletes if len(self.globus_delete['DATA']) > 0: _wait_for_task(gtc.submit_delete(self.globus_delete)) self.globus_delete = globus_sdk.DeleteData( gtc, endpoint=self.globus_delete['endpoint'], label=self.globus_delete['label'], verify_checksum=True, sync_level='checksum') # launch the local transfers and local deletes if local_servers: self.launch_transfers_secondary()
def deleteHelper(self, ep_id, path): """ Helper function for cleanup. Deletes by path and endpoint, """ kwargs = {"notify_on_succeeded": False} # prevent email spam ddata = globus_sdk.DeleteData(self.tc, ep_id, label="deleteHelper", recursive=True, **kwargs) ddata.add_item(path) self.tc.submit_delete(ddata)
def remove(request): """ removes files on a remote Globus Online endpoint - API is not complete, so transfer 0 byte files instead of actually deleting anything """ # global so that we can use it in signal handlers global client global transfer_client global task_id # connect to the service client, transfer_client = acquire_clients(request) activate_endpoint(transfer_client, request["endpoint"]) label = None if "PEGASUS_WF_UUID" in os.environ and "PEGASUS_DAG_JOB_ID" in os.environ: label = os.environ["PEGASUS_WF_UUID"] + " - " + os.environ["PEGASUS_DAG_JOB_ID"] # set up a new delete transfer deadline = datetime.utcnow() + timedelta(hours=24) del_data = globus_sdk.DeleteData( transfer_client, request["endpoint"], label=label, recursive=request["recursive"], deadline=deadline, notify_on_succeeded=False, notify_on_failed=False, notify_on_inactive=False, ) for f in request["files"]: del_data.add_item(f) # finalize and submit the transfer delete_result = transfer_client.submit_delete(del_data) task_id = delete_result["task_id"] # how many faults will we accept before giving up? acceptable_faults = min(100, len(request["files"]) * 3) # wait for the task to complete, and see the tasks and # endpoint ls change try: wait_for_task(transfer_client, task_id, acceptable_faults) except Exception as err: logger.error(err) cancel_task(transfer_client, task_id) sys.exit(1) logger.info("Delete complete")
def my_delete(tclient, endpoint, mylabel, path, isrecursive): """ DeleteData call wrapper """ now = datetime.datetime.utcnow() mydeadline = now + datetime.timedelta(minutes=DEADLINE) ddata = globus_sdk.DeleteData(tclient, endpoint, label=mylabel, recursive=isrecursive, deadline=str(mydeadline)) ddata.add_item(path) delete_result = tclient.submit_delete(ddata) print("ddata task_id = ", delete_result["task_id"]) my_task_wait(tclient, delete_result)
def dir_operations(self, input_name, expected_name=None): """ Given an input directory name, makes, renames, and ls's that directory transfers files into that directory, and deletes the directory. If an expected_name is given, confirms output matches that name rather than the input_name. """ # mkdir # name randomized to prevent collision rand = str(getrandbits(128)) path = "/~/" + input_name + rand mkdir_doc = self.tc.operation_mkdir(GO_EP1_ID, path) self.assertEqual(mkdir_doc["code"], "DirectoryCreated") # confirm ls sees dir ls_doc = self.tc.operation_ls(GO_EP1_ID, path="/~/") expected = (expected_name or input_name) + rand self.assertIn(expected, [x["name"] for x in ls_doc]) # rename new_rand = str(getrandbits(128)) new_path = "/~/" + input_name + new_rand rename_doc = self.tc.operation_rename(GO_EP1_ID, path, new_path) self.assertEqual(rename_doc["code"], "FileRenamed") # confirm ls sees new dir name expected = (expected_name or input_name) + new_rand ls_doc = self.tc.operation_ls(GO_EP1_ID, path="/~/") self.assertIn(expected, [x["name"] for x in ls_doc]) # transfer source_path = "/share/godata/" tdata = globus_sdk.TransferData(self.tc, GO_EP1_ID, GO_EP1_ID) tdata.add_item(source_path, new_path, recursive=True) transfer_id = self.tc.submit_transfer(tdata)["task_id"] self.assertTrue( self.tc.task_wait( transfer_id, timeout=DEFAULT_TASK_WAIT_TIMEOUT, polling_interval=DEFAULT_TASK_WAIT_POLLING_INTERVAL)) # confirm ls sees files inside the directory ls_doc = self.tc.operation_ls(GO_EP1_ID, path=new_path) expected = ["file1.txt", "file2.txt", "file3.txt"] self.assertEqual(expected, [x["name"] for x in ls_doc]) # delete ddata = globus_sdk.DeleteData(self.tc, GO_EP1_ID, recursive=True) ddata.add_item(new_path) delete_doc = self.tc.submit_delete(ddata) self.assertEqual(delete_doc["code"], "Accepted")
def test_endpoint_manager_task_pause_info(self): """ Creates a pause rule on the shared endpoint, then has sdktester2b submit a no-op task on the shared endpoint. Confirms sdktester1a can see the task is paused (or about to be). Confirms 403 when non manager attempts to use this resource. """ # sdktester1a creates pause rule and gives sdktester2b an ACL rule_id = self.test_endpoint_manager_create_pause_rule() acl_data = { "path": "/", "permissions": "rw", "principal_type": "identity", "principal": get_user_data()["sdktester2b"]["id"] } self.tc.add_endpoint_acl_rule(self.test_share_ep_id, acl_data) # sdktester2b subits no-op delete task ddata = globus_sdk.DeleteData(self.tc2, self.test_share_ep_id, notify_on_fail=False) ddata.add_item("no-op.txt") task_id = self.tc2.submit_delete(ddata)["task_id"] # sdktester1a gets the task pause info as admin pause_doc = self.tc.endpoint_manager_task_pause_info(task_id) # validate top level results self.assertEqual(pause_doc["DATA_TYPE"], "pause_info_limited") self.assertIsNone(pause_doc["source_pause_message"]) self.assertIsNone(pause_doc["destination_pause_message"]) # validate the rule results rule = pause_doc["pause_rules"][0] # should be the only rule self.assertEqual(rule["DATA_TYPE"], "pause_rule_limited") self.assertEqual(rule["id"], rule_id) self.assertEqual(rule["message"], "SDK Test Pause Rule") # self.assertEqual(rule["modified_by_id"], # get_user_data()["sdktester1a"]["id"]) # 403 for non managers, even if they submitted the task with self.assertRaises(TransferAPIError) as apiErr: self.tc2.endpoint_manager_task_pause_info(task_id) self.assertEqual(apiErr.exception.http_status, 403) self.assertEqual(apiErr.exception.code, "PermissionDenied")
def setUp(self): self.one = _ONE remote_repo = '15f76c0c-10ee-11e8-a7ed-0a448319c2f8' # flatiron self.par = params.read('globus') label = 'test_patcher' authorizer = globus_sdk.AccessTokenAuthorizer(self.par.TRANSFER_TOKEN) self.gtc = globus_sdk.TransferClient(authorizer=authorizer) globus_transfer = globus_sdk.TransferData(self.gtc, self.par.LOCAL_REPO, remote_repo, verify_checksum=True, sync_level='checksum', label=label) globus_delete = globus_sdk.DeleteData(self.gtc, remote_repo, verify_checksum=True, sync_level='checksum', label=label) self.patcher = GlobusPatcher(one=self.one, globus_delete=globus_delete, globus_transfer=globus_transfer)
def cleanup(self) -> None: """Cleanup directories used by ProxyStore in the Globus endpoints. Warning: Will delete the directory at `local_path` on each endpoint. Warning: This method should only be called at the end of the program when the store will no longer be used, for example once all proxies have been resolved. """ for endpoint in self.endpoints: delete_task = globus_sdk.DeleteData( self._transfer_client, endpoint=endpoint.uuid, recursive=True, ) delete_task["notify_on_succeeded"] = False delete_task["notify_on_failed"] = False delete_task["notify_on_inactive"] = False delete_task.add_item(endpoint.endpoint_path) tdata = self._transfer_client.submit_delete(delete_task) self._wait_on_tasks(tdata["task_id"])
def cleanSharing(tc): """ Cleans out any files in ~/.globus/sharing/ on go#ep1 older than an hour TODO: remove this once deleting shared directories does full cleanup """ path = "~/.globus/sharing/" hour_ago = datetime.utcnow() - timedelta(hours=1) filter_string = ("last_modified:," + hour_ago.strftime("%Y-%m-%d %H:%M:%S")) try: old_files = tc.operation_ls(GO_EP1_ID, path=path, filter=filter_string) except TransferAPIError: # no .globus dir exists return ddata = globus_sdk.DeleteData(tc, GO_EP1_ID, notify_on_fail=False, notify_on_succeeded=False) for item in old_files: ddata.add_item(path + item["name"]) if len(ddata["DATA"]): tc.submit_delete(ddata)
def submit_delete(self, for_real=False): ''' Delete ALL contents of N_Y_Z including upper-level folders in cosmo/indra/ on FileDB. Assuming you have delete permissions! ''' ddata = globus_sdk.DeleteData(self.tc, self.jhu_endpoint, label='Indra_delete_{}_series'.format( self.series_num), recursive=True) for i in range(self.nruns): ddata.add_item(self.flocs[i][:-1]) if for_real: print('Deleting!') delete_result = self.tc.submit_delete(ddata) else: print('Not deleting; returning data') print( 'To delete, call NAME.tc.submit_delete(data) or set for_real=True' ) return ddata
def _delete_data(**kwargs): return globus_sdk.DeleteData(client, GO_EP1_ID, **kwargs)
def submit_delete(self, skip_example=129, for_real=False): ''' Delete main dir files and snapdirs and sub_id files in non-priority snaps, which are determined by checking existing snapdirs in run given by skip_example (deafults to 2_0_1). This means that skip_example only contains priority snapshots and is not complete on FileDB. ''' skipx, skipy, skipz = get_xyz(skip_example) skip_floc = self.fd[skip_example % 36] + '{}_{}_{}/'.format( skipx, skipy, skipz) # list snaps in skip_example: keep these l = [ entry["name"] for entry in self.tc.operation_ls(self.jhu_tc['display_name'], path=skip_floc) if entry['type'] == 'dir' ] # Recursive set at upper level of DeleteData (unlike TransferData), so submit TWO Delete tasks to Globus. snapdata = globus_sdk.DeleteData( self.tc, self.jhu_endpoint, label='Indra_delete_{}_{}_{}_dirs'.format(self.x, self.y, self.z), recursive=True) filedata = globus_sdk.DeleteData( self.tc, self.jhu_endpoint, label='Indra_delete_{}_{}_{}_files'.format(self.x, self.y, self.z)) for snapnum in range(64): if 'snapdir_{:03d}'.format(snapnum) in l: print('Keeping snapdir_{:03d} in {}'.format( snapnum, self.floc)) else: snapdata.add_item( ('{}snapdir_{:03d}/'.format(self.floc, snapnum))) # recursive = True NTask = get_NTask(self.run_num, snapnum) for file in range(NTask): filedata.add_item( ('{0}postproc_{1:03d}/sub_ids_{1:03d}.{2}'.format( self.floc, snapnum, file))) l = [ entry['name'] for entry in self.tc.operation_ls(self.jhu_tc['display_name'], path=self.floc) if entry['type'] == 'file' ] for item in l: filedata.add_item((self.floc + item)) if for_real: print('Deleting!') delete_result = self.tc.submit_delete(snapdata) delete_result = self.tc.submit_delete(filedata) else: print('Not deleting; returning snapdata, filedata') print( 'To delete, call NAME.tc.submit_delete(snapdata) and (filedata) or set for_real=True' ) return snapdata, filedata
def share_data(args): user_source_endpoint = args.source_endpoint or source_endpoint user_shared_endpoint = args.shared_endpoint or shared_endpoint user_source_path = args.source_path or source_path user_destination_path = args.destination_path or destination_path if not user_source_path.startswith('/'): eprint('Source path must be absolute') sys.exit(1) if not user_destination_path.startswith('/'): eprint('Destination path must be absolute') sys.exit(1) if args.auth == 'native': # get an authorizer if it is a Native App authorizer = get_native_app_authorizer(client_id=CLIENT_ID) elif args.auth == 'client-credentials': secret = args.client_secret or CLIENT_SECRET if not secret: eprint('--auth client-credentials chosen, but no secret provided!' ' Set "--client-secret <your secret>"' ) sys.exit(1) # get an authorizer if it is a Confidential App authorizer = get_confidential_app_authorizer( client_id=CLIENT_ID, client_secret=secret ) else: raise ValueError('Invalid Authenticator, this script only understands ' 'Native and Client Credential') # create a TransferClient object tc = globus_sdk.TransferClient(authorizer=authorizer) # check if a destination directory exists at all try: tc.operation_ls(user_shared_endpoint, path=user_destination_path) except TransferAPIError as e: eprint(e) sys.exit(1) dirname, leaf = os.path.split(user_source_path) if leaf == '': _, leaf = os.path.split(dirname) destination_directory = os.path.join(user_destination_path, leaf) + '/' """ check if a directory with the same name was already transferred to the destination path if it was and --delete option is specified, delete the directory """ try: tc.operation_ls(user_shared_endpoint, path=destination_directory) if not args.delete: eprint('Destination directory exists. Delete the directory or ' 'use --delete option') sys.exit(1) print('Destination directory, {}, exists and will be deleted' .format(destination_directory)) ddata = globus_sdk.DeleteData( tc, user_shared_endpoint, label='Share Data Example', recursive=True) ddata.add_item(destination_directory) print('Submitting a delete task') task = tc.submit_delete(ddata) print('\ttask_id: {}'.format(task['task_id'])) tc.task_wait(task['task_id']) except TransferAPIError as e: if e.code != u'ClientError.NotFound': eprint(e) sys.exit(1) # create a destination directory try: print('Creating destination directory {}' .format(destination_directory)) tc.operation_mkdir(user_shared_endpoint, destination_directory) except TransferAPIError as e: eprint(e) sys.exit(1) # grant group/user read access to the destination directory if args.user_uuid: rule_data = { "DATA_TYPE": "access", "principal_type": "identity", "principal": args.user_uuid, "path": destination_directory, "permissions": "r", } try: print('Granting user, {}, read access to the destination directory' .format(args.user_uuid)) tc.add_endpoint_acl_rule(user_shared_endpoint, rule_data) except TransferAPIError as e: if e.code != u'Exists': eprint(e) sys.exit(1) if args.group_uuid: rule_data = { "DATA_TYPE": "access", "principal_type": "group", "principal": args.group_uuid, "path": destination_directory, "permissions": "r", } try: print('Granting group, {}, read access to '.format(args.user_uuid)) tc.add_endpoint_acl_rule(user_shared_endpoint, rule_data) except TransferAPIError as e: if e.code != u'Exists': eprint(e) sys.exit(1) # transfer data - source directory recursively tdata = globus_sdk.TransferData( tc, user_source_endpoint, user_shared_endpoint, label='Share Data Example') tdata.add_item(user_source_path, destination_directory, recursive=True) try: print('Submitting a transfer task') task = tc.submit_transfer(tdata) except TransferAPIError as e: eprint(e) sys.exit(1) print('\ttask_id: {}'.format(task['task_id'])) print('You can monitor the transfer task programmatically using Globus SDK' ', or go to the Web UI, https://www.globus.org/app/activity/{}.' .format(task['task_id']))
def globus_delete_datasets(datasets, dry=True, local_only=False): """ For each dataset in the queryset, delete the dataset record in the database and attempt a Globus delete for all physical file-records associated. Admin territory. :param datasets: :param dry: default True :param local_only: only delete from non-FlatIron locations: in this case only file records will be removed from the database, not the datasets records. :return: """ # first get the list of Globus endpoints concerned file_records = FileRecord.objects.filter(dataset__in=datasets) if local_only: file_records = file_records.filter( data_repository__globus_is_personal=True) file_records = file_records.exclude( data_repository__name__icontains='flatiron') globus_endpoints = file_records.values_list( 'data_repository__globus_endpoint_id', flat=True).distinct() # create a globus delete_client for each globus endpoint gtc = globus_transfer_client() if not dry: delete_clients = [] for ge in globus_endpoints: delete_clients.append(globus_sdk.DeleteData(gtc, ge, label='')) # appends each file for deletion current_path = None for i, ge in enumerate(globus_endpoints): # get endpoint status before continuing endpoint_info = gtc.get_endpoint(ge) # if the endpoint is not globus_connect (ie. not personal) this returns None endpoint_connected = endpoint_info.data['gcp_connected'] is not False # if the endpoint is offline skip if not endpoint_connected: logger.warning(endpoint_info.data['display_name'] + 'is offline. SKIPPING.') continue frs = FileRecord.objects.filter( dataset__in=datasets, data_repository__globus_endpoint_id=ge).order_by('relative_path') for fr in frs: add_uuid = not fr.data_repository.globus_is_personal file2del = _filename_from_file_record(fr, add_uuid=add_uuid) if dry: logger.info(file2del) else: if current_path != Path(file2del).parent: current_path = Path(file2del).parent try: ls_current_path = [ f['name'] for f in gtc.operation_ls(ge, path=current_path) ] except globus_sdk.exc.TransferAPIError as err: if 'ClientError.NotFound' in str(err): ls_current_path = [] else: raise err if Path(file2del).name in ls_current_path: logger.info('DELETE: ' + file2del) delete_clients[i].add_item(file2del) # launch the deletion jobs and remove records from the database if dry: return for dc in delete_clients: # submitting a deletion without data will create an error if dc['DATA'] == []: continue gtc.submit_delete(dc) file_records.delete() if not local_only: for ds in datasets: ds.delete()
def globus_delete_local_datasets(datasets, dry=True): """ For each dataset in the queryset delete the file records belonging to a Globus personal repo only if a server file exists and matches the size. :param datasets: :param dry: default True :return: """ gtc = [] # first get the list of Globus endpoints concerned file_records = FileRecord.objects.filter(dataset__in=datasets) globus_endpoints = file_records.values_list( 'data_repository__globus_endpoint_id', flat=True).distinct() # create a globus delete_client for each globus endpoint gtc = globus_transfer_client() delete_clients = [] for ge in globus_endpoints: delete_clients.append(globus_sdk.DeleteData(gtc, ge, label='')) def _ls_globus(file_record, dry=dry, add_uuid=False): try: path = Path( _filename_from_file_record(file_record, add_uuid=add_uuid)) ls_obj = gtc.operation_ls( file_record.data_repository.globus_endpoint_id, path=path.parent) except globus_sdk.exc.TransferAPIError as err: if 'ClientError.NotFound' in str(err): return else: raise err return [ls for ls in ls_obj['DATA'] if ls['name'] == path.name] # appends each file for deletion fr2delete = [] for ds in datasets: # check the existence of the server file fr_server = ds.file_records.filter( exists=True, data_repository__globus_is_personal=False).first() if fr_server is None: logger.warning( str(ds.session) + '/' + ds.collection + '/' + ds.name + " doesnt exist on server - skipping") continue ls_server = _ls_globus(fr_server, add_uuid=True) # if the file is not found on the remote server, do nothing if ls_server == [] or ls_server is None: logger.warning(fr_server.relative_path + " not found on server - skipping") continue fr_local = ds.file_records.filter( data_repository__globus_is_personal=True) for frloc in fr_local: ls_local = _ls_globus(frloc) # if the data is not found on the local server, remove the file record from database if ls_local == [] or ls_local is None: logger.info('NO FILE ON LOCAL, SKIP: ' + _filename_from_file_record(frloc)) fr2delete.append(frloc.id) continue # if the file sizes don't match throw a warning and continue if not ls_local[0]['size'] == ls_server[0]['size']: logger.warning(frloc.relative_path + " sizes don't check out, skipping") continue # the files exist local and remote, fr2delete.append(frloc.id) file2del = _filename_from_file_record(frloc) del_client = [ dc for dc in delete_clients if dc['endpoint'] == str( frloc.data_repository.globus_endpoint_id) ][0] del_client.add_item(file2del) logger.info('DELETE: ' + _filename_from_file_record(frloc)) # launch the deletion jobs and remove records from the database if dry: return for dc in delete_clients: # submitting a deletion without data will create an error if dc['DATA'] == []: continue gtc.submit_delete(dc) # remove file records frecs = FileRecord.objects.filter(id__in=fr2delete).exclude( data_repository__globus_is_personal=False) frecs.delete()
def clean(): # constants SDK_USER_ID = "84942ca8-17c4-4080-9036-2f58e0093869" GO_EP1_ID = "ddb59aef-6d04-11e5-ba46-22000b92c6ec" GO_EP2_ID = "ddb59af0-6d04-11e5-ba46-22000b92c6ec" # TODO: remove EP3 when EP1 and EP2 support symlinks GO_EP3_ID = "4be6107f-634d-11e7-a979-22000bf2d287" CLIENT_ID = 'd0f1d9b0-bd81-4108-be74-ea981664453a' SCOPES = 'urn:globus:auth:scope:transfer.api.globus.org:all' get_input = getattr(__builtins__, 'raw_input', input) # create an authorized transfer client client = globus_sdk.NativeAppAuthClient(client_id=CLIENT_ID) client.oauth2_start_flow(requested_scopes=SCOPES) url = client.oauth2_get_authorize_url() print("Login with SDK Tester: \n{}".format(url)) auth_code = get_input("Enter auth code: ").strip() # get tokens and make a transfer client tokens = client.oauth2_exchange_code_for_tokens( auth_code).by_resource_server globus_transfer_data = tokens['transfer.api.globus.org'] transfer_rt = globus_transfer_data['refresh_token'] transfer_at = globus_transfer_data['access_token'] expires_at_s = globus_transfer_data['expires_at_seconds'] authorizer = globus_sdk.RefreshTokenAuthorizer(transfer_rt, client, access_token=transfer_at, expires_at=expires_at_s) tc = globus_sdk.TransferClient(authorizer=authorizer) # prevent accidental cleaning of a personal account auth_client = globus_sdk.AuthClient(authorizer=authorizer) res = auth_client.get('/p/whoami') if res['identities'][0]["id"] != SDK_USER_ID: # assume the primary ID print("The primary ID was not the SDK Tester, stopping clean") return # now clean test assets # clean SDK Tester's home /~/ on go#ep1 go#ep2 and go#ep3 ep_ids = [GO_EP1_ID, GO_EP2_ID, GO_EP3_ID] task_ids = [] file_deletions = 0 for ep_id in ep_ids: kwargs = {"notify_on_succeeded": False} # prevent email spam ddata = globus_sdk.DeleteData(tc, ep_id, recursive=True, **kwargs) r = tc.operation_ls(ep_id) for item in r: ddata.add_item("/~/" + item["name"]) print("deleting {}: {}".format(item["type"], item["name"])) file_deletions += 1 if len(ddata["DATA"]): r = tc.submit_delete(ddata) task_ids.append(r["task_id"]) # clean SDK Tester's bookmarks bookmark_deletions = 0 r = tc.bookmark_list() for bookmark in r: tc.delete_bookmark(bookmark["id"]) print("deleting bookmark: {}".format(bookmark["name"])) bookmark_deletions += 1 # clean endpoints owned by SDK Tester endpoint_deletions = 0 cleaning = True while (cleaning): cleaning = False r = tc.endpoint_search(filter_scope="my-endpoints", num_results=None) for ep in r: tc.delete_endpoint(ep["id"]) print("deleting endpoint: {}".format(ep["display_name"])) endpoint_deletions += 1 cleaning = True # wait for deletes to complete for task_id in task_ids: tc.task_wait(task_id, polling_interval=1) print("{} files or folders cleaned".format(file_deletions)) print("{} endpoints cleaned".format(endpoint_deletions)) print("{} bookmarks cleaned".format(bookmark_deletions))
def test_submit_delete(self): """ Transfers a file and makes a dir in go#ep1, then deletes them, validates results and that the items are no longer visible by ls Confirms resubmission using the same data returns a Duplicate response """ # dir for testing deletes in, name randomized to prevent collision dest_dir = "delete_dest_dir-" + str(getrandbits(128)) dest_path = "/~/" + dest_dir + "/" self.tc.operation_mkdir(GO_EP1_ID, dest_path) # track asset for cleanup self.asset_cleanup.append({ "function": self.deleteHelper, "args": [GO_EP1_ID, dest_path] }) # transfer file into go#ep1/~/dir_name source_path = "/share/godata/" kwargs = {"notify_on_succeeded": False} # prevent email spam tdata = globus_sdk.TransferData(self.tc, GO_EP2_ID, GO_EP1_ID, **kwargs) file_name = "file1.txt" tdata.add_item(source_path + file_name, dest_path + file_name) transfer_doc = self.tc.submit_transfer(tdata) # make a dir to delete dir_name = "test_dir" path = dest_path + dir_name self.tc.operation_mkdir(GO_EP1_ID, path) # wait for transfer to complete self.assertTrue( self.tc.task_wait( transfer_doc["task_id"], timeout=DEFAULT_TASK_WAIT_TIMEOUT, polling_interval=DEFAULT_TASK_WAIT_POLLING_INTERVAL)) # delete the items ddata = globus_sdk.DeleteData(self.tc, GO_EP1_ID, recursive=True, **kwargs) ddata.add_item(dest_path + file_name) ddata.add_item(dest_path + dir_name) delete_doc = self.tc.submit_delete(ddata) # validate results self.assertEqual(delete_doc["DATA_TYPE"], "delete_result") self.assertEqual(delete_doc["code"], "Accepted") self.assertIn("task_id", delete_doc) self.assertIn("submission_id", delete_doc) task_id = delete_doc["task_id"] sub_id = delete_doc["submission_id"] # confirm the task completed and the files were deleted # wait for transfer to complete self.assertTrue( self.tc.task_wait( task_id, timeout=DEFAULT_TASK_WAIT_TIMEOUT, polling_interval=DEFAULT_TASK_WAIT_POLLING_INTERVAL)) # confirm file and dir are no longer visible by ls filter_string = "name:" + file_name + "," + dir_name params = {"path": dest_path, "filter": filter_string} ls_doc = self.tc.operation_ls(GO_EP1_ID, **params) self.assertEqual(ls_doc["DATA"], []) # confirm re-submission of ddata returns Duplicate response resub_delete_doc = self.tc.submit_delete(ddata) self.assertEqual(resub_delete_doc["DATA_TYPE"], "delete_result") self.assertEqual(resub_delete_doc["code"], "Duplicate") self.assertEqual(resub_delete_doc["submission_id"], sub_id) self.assertEqual(resub_delete_doc["task_id"], task_id)