def transfer(otherEndpoint, filename): source_endpoint_id = "636505f6-d784-11e7-96f1-22000a8cbd7d" source_path = str(filename) dest_endpoint_id = str(otherEndpoint) dest_path = "/~/" + source_path label = "tutorial transfer" tdata = globus_sdk.TransferData(tc, source_endpoint_id, dest_endpoint_id, label=label) tdata.add_item(source_path, dest_path) tc.endpoint_autoactivate(source_endpoint_id) tc.endpoint_autoactivate(dest_endpoint_id) submit_result = tc.submit_transfer(tdata) print("Task ID:", submit_result["task_id"]) myEnd = tc.get("/endpoint_search", params=dict(filter_fulltext=endChoice, limit=1, filter_scope="my-endpoints")) tutorial = tc.get_endpoint("ddb59aef-6d04-11e5-ba46-22000b92c6ec") data = globus_sdk.TransferData(tc, myEnd, tutorial) return json.dumps("Task Submitted")
def test_tranfer_init(self): """ Creates TransferData objects with and without parameters, Verifies TransferData field initialization """ # default init default_tdata = globus_sdk.TransferData(self.tc, GO_EP1_ID, GO_EP2_ID) self.assertEqual(default_tdata["DATA_TYPE"], "transfer") self.assertEqual(default_tdata["source_endpoint"], GO_EP1_ID) self.assertEqual(default_tdata["destination_endpoint"], GO_EP2_ID) self.assertIn("submission_id", default_tdata) self.assertIn("DATA", default_tdata) self.assertEqual(len(default_tdata["DATA"]), 0) # init with params label = "label" params = {"param1": "value1", "param2": "value2"} param_tdata = globus_sdk.TransferData(self.tc, GO_EP1_ID, GO_EP2_ID, label=label, sync_level="exists", **params) self.assertEqual(param_tdata["label"], label) # sync_level of "exists" should be converted to 0 self.assertEqual(param_tdata["sync_level"], 0) for par in params: self.assertEqual(param_tdata[par], params[par])
def transfer_catalog(transfer_manifest, dest_endpoint, dest_prefix, transfer_token, sync_level=settings.GLOBUS_DEFAULT_SYNC_LEVEL): task_ids = [] transfer_authorizer = globus_sdk.AccessTokenAuthorizer(transfer_token) tc = globus_sdk.TransferClient(authorizer=transfer_authorizer) tc.endpoint_autoactivate(dest_endpoint) if not transfer_manifest: raise ValidationError('No valid data to transfer', code='no_data') for globus_source_endpoint, data_list in transfer_manifest.items(): log.debug( 'Starting transfer from {} to {}:{} containing {} files'.format( globus_source_endpoint, dest_endpoint, dest_prefix, len(data_list))) tc.endpoint_autoactivate(globus_source_endpoint) tdata = globus_sdk.TransferData(tc, globus_source_endpoint, dest_endpoint, label=settings.SERVICE_NAME, sync_level=sync_level) for item in data_list: tdata.add_item(item, '/'.join((dest_prefix, item))) task = tc.submit_transfer(tdata) task_ids.append(task['task_id']) return task_ids
def do_transfer(client, source_endpoint, dest_endpoint, images): '''transfer a source_image from source_endpoint to dest_endpoint named as dest_image using client Parameters ========== client: the globus sdk client with transfer client source_endpoint: transfer FROM this endpoint dest_endpoint: transfer TO this endpoint images: a list of (source, dest) to send in the single transaction ''' for image_set in images: source_image = image_set[0] dest_image = image_set[1] tdata = globus_sdk.TransferData(client.transfer_client, source_endpoint, dest_endpoint, label="SRegistry Transfer with Tunel", sync_level="checksum") tdata.add_item(source_image, dest_image) # Send back a single link to show return client.transfer_client.submit_transfer(tdata)
def launch_transfers(self, globus_transfer_client, wait=True): gtc = globus_transfer_client def _wait_for_task(resp): if wait: status = gtc.task_wait(task_id=resp['task_id'], timeout=1) while gtc.get_task(task_id=resp['task_id'])['nice_status'] == 'OK': status = gtc.task_wait(task_id=resp['task_id'], timeout=30) if status is False: tinfo = gtc.get_task(task_id=resp['task_id'])['nice_status'] raise ConnectionError(f"Could not connect to Globus {tinfo}") # handles the transfers first if len(self.globus_transfer['DATA']) > 0: # launch the transfer _wait_for_task(gtc.submit_transfer(self.globus_transfer)) # re-initialize the globus_transfer property self.globus_transfer = globus_sdk.TransferData( gtc, self.globus_transfer['source_endpoint'], self.globus_transfer['destination_endpoint'], label=self.globus_transfer['label'], verify_checksum=True, sync_level='checksum') # do the same for deletes if len(self.globus_delete['DATA']) > 0: _wait_for_task(gtc.submit_delete(self.globus_delete)) self.globus_delete = globus_sdk.DeleteData( gtc, endpoint=self.globus_delete['endpoint'], label=self.globus_delete['label'], verify_checksum=True, sync_level='checksum')
def add_item(self, source_path, label="PY"): """Add an item to send as part of the current bundle.""" if not self.TransferData: # no prior TransferData object create a new one logger.debug("No prior TransferData object found creating") # labels can only be letters, numbers, spaces, dashes, and underscores label = label.replace(".", "-") self.TransferData = globus_sdk.TransferData( self.tc, self.ep_source, self.ep_dest, verify_checksum=True, label=f"Watchdog {label}", **self.notify, ) # add item logger.debug(f"Source Path: {source_path}") # pathlib comes though as absolute we need just the relative string # then append that to the destimations path eg: # cwd /home/brockp # pathlib /home/brockp/dir1/data.txt # result dir1/data.txt # Final Dest path: path_dest/dir1/data.txt relative_paths = os.path.relpath(source_path, self.path_source) path_dest = f"{self.path_dest}{str(relative_paths)}" logger.debug(f"Dest Path: {path_dest}") self.TransferData.add_item(source_path, path_dest)
def transfer_catalog(user, transfer_manifest, dest_endpoint, dest_prefix, label=None, sync_level=settings.GLOBUS_DEFAULT_SYNC_LEVEL): task_ids = [] tc = load_transfer_client(user) tc.endpoint_autoactivate(dest_endpoint) if not transfer_manifest: raise ConciergeException('No valid data to transfer', code='no_data') label = label or settings.SERVICE_NAME for globus_source_endpoint, data_list in transfer_manifest.items(): log.debug('{} starting transfer from {} to {}:{} containing {} files' .format(user, globus_source_endpoint, dest_endpoint, dest_prefix, len(data_list))) tc.endpoint_autoactivate(globus_source_endpoint) tdata = globus_sdk.TransferData(tc, globus_source_endpoint, dest_endpoint, label=label, sync_level=sync_level ) for source, destination in data_list: tdata.add_item(source, os.path.join(dest_prefix, destination)) task = tc.submit_transfer(tdata) task_ids.append(task['task_id']) return task_ids
def test_submit_transfer_ignore_recursive_symlinks(self): # dir for testing transfers to, name randomized to prevent collision ignore_dir = "ignore_symlink_dest_dir-" + str(getrandbits(128)) ignore_path = "/~/" + ignore_dir + "/" self.tc.operation_mkdir(GO_EP3_ID, ignore_path) # track asset for cleanup self.asset_cleanup.append({ "function": self.deleteHelper, "args": [GO_EP3_ID, ignore_path] }) # transfer from /share/symlink/good to ignore_dir tdata = globus_sdk.TransferData(self.tc, GO_EP3_ID, GO_EP3_ID, recursive_symlinks="ignore") tdata.add_item("/share/symlinks/good/", ignore_path, recursive=True) task_id = self.tc.submit_transfer(tdata)["task_id"] # confirm the symlinks have their targets copied self.assertTrue( self.tc.task_wait( task_id, timeout=DEFAULT_TASK_WAIT_TIMEOUT, polling_interval=DEFAULT_TASK_WAIT_POLLING_INTERVAL)) ls_doc = self.tc.operation_ls(GO_EP3_ID, path=ignore_path) self.assertEqual(len(ls_doc["DATA"]), 0)
def test_task_wait(self): """ Waits on complete, and never completing tasks, confirms results """ # complete complete_id = self.test_get_task() self.assertTrue(self.tc.task_wait(complete_id, timeout=1)) # never completing source_path = "/share/godata/" dest_path = "/share/godata/" kwargs = { "notify_on_succeeded": False, "notify_on_fail": False, "notify_on_inactive": False } # prevent email spam tdata = globus_sdk.TransferData(self.tc, GO_EP2_ID, GO_EP1_ID, **kwargs) file_name = "file1.txt" tdata.add_item(source_path + file_name, dest_path + file_name) transfer_doc = self.tc.submit_transfer(tdata) never_id = transfer_doc["task_id"] self.assertFalse(self.tc.task_wait(never_id, timeout=1)) # track asset for cleanup self.asset_cleanup.append({ "function": self.tc.cancel_task, "args": [never_id] })
def test_submit_transfer_copy_recursive_symlinks(self): """ Submits transfer tasks from go#ep3:/share/symlinks/good/ with recursive_symlinks set to "copy" Confirms symlinks are kept as symlinks at the destination. """ # dir for testing transfers to, name randomized to prevent collision copy_dir = "copy_symlink_dest_dir-" + str(getrandbits(128)) copy_path = "/~/" + copy_dir + "/" self.tc.operation_mkdir(GO_EP3_ID, copy_path) # track asset for cleanup self.asset_cleanup.append({ "function": self.deleteHelper, "args": [GO_EP3_ID, copy_path] }) # transfer from /share/symlink/good to copy_dir tdata = globus_sdk.TransferData(self.tc, GO_EP3_ID, GO_EP3_ID, recursive_symlinks="copy") tdata.add_item("/share/symlinks/good/", copy_path, recursive=True) task_id = self.tc.submit_transfer(tdata)["task_id"] # confirm the symlinks have their targets copied self.assertTrue( self.tc.task_wait( task_id, timeout=DEFAULT_TASK_WAIT_TIMEOUT, polling_interval=DEFAULT_TASK_WAIT_POLLING_INTERVAL)) ls_doc = self.tc.operation_ls(GO_EP3_ID, path=copy_path) self.assertEqual(len(ls_doc["DATA"]), 4) for item in ls_doc: self.assertIsNone(item["link_target"])
def __init__(self, one=None, globus_client_id=None, local_endpoint=None, label='ibllib patch'): assert globus_client_id assert one self.local_endpoint = local_endpoint or globus.get_local_endpoint() self.label = label self.transfer_client = globus.login_auto( globus_client_id=globus_client_id, str_app='globus/admin') # transfers/delete from the current computer to the flatiron: mandatory and executed first self.globus_transfer = globus_sdk.TransferData(self.transfer_client, self.local_endpoint, FLAT_IRON_GLOBUS_ID, verify_checksum=True, sync_level='checksum', label=label) self.globus_delete = globus_sdk.DeleteData(self.transfer_client, FLAT_IRON_GLOBUS_ID, verify_checksum=True, sync_level='checksum', label=label) # get a dictionary of data repositories from Alyx (with globus ids) self.repos = { r['name']: r for r in one.alyx.rest('data-repository', 'list') } # transfers/delete from flatiron to optional third parties to synchronize / delete self.globus_transfers_locals = {} self.globus_deletes_locals = {} super().__init__(one=one)
def submit_transfer(self, for_real=False): ''' Submit tasks pairs to the transfer client ''' tlabel = 'Indra_snapdir_{}'.format(self.snapstr) tdata = globus_sdk.TransferData(self.tc, self.jhu_endpoint, self.jhu_endpoint, label=tlabel, sync_level="checksum") for task in self.snaptasks: tdata.add_item( task[0], task[1], recursive=True ) # set recursive = True to transfer entire contents of folders for task in self.subtasks: tdata.add_item(task[0], task[1]) if for_real: transfer_result = self.tc.submit_transfer(tdata) print("Transfer {} submitted!".format(tlabel)) else: print( "Transfer {} not submitted (set for_real=True to submit). Returning tdata." .format(tlabel)) return tdata
def setUp(self): """ Creates a TransferData objects and a DeleteData object for testing """ super(DataTests, self).setUp() self.tdata = globus_sdk.TransferData(self.tc, GO_EP1_ID, GO_EP2_ID) self.ddata = globus_sdk.DeleteData(self.tc, GO_EP1_ID)
def get_file(url, output_path, auth_config, token=None, dest_endpoint=None): try: src_endpoint = urlsplit(url).hostname src_path = urlsplit(url).path if platform.system() == "Windows": dest_path = ''.join( ('/', output_path.replace('\\', '/').replace(':', ''))) else: dest_path = os.path.abspath(output_path) if not token: token, dest_endpoint = authenticate(url, auth_config) if token is None: logger.warn( "A valid Globus access token is required to create transfers. " "Check keychain.cfg for valid parameters.") return False if dest_endpoint is None: logger.warn( "A valid Globus destination endpoint must be specified. " "Check keychain.cfg for valid parameters.") return False # initialize transfer client client = globus_sdk.TransferClient(token=token) # Activate source endpoint logger.debug("Activating source endpoint: %s" % src_endpoint) data = client.endpoint_autoactivate(src_endpoint, if_expires_in=600) # Activate destination endpoint logger.debug("Activating destination endpoint: %s" % dest_endpoint) data = client.endpoint_autoactivate(dest_endpoint, if_expires_in=600) filename = src_path.rsplit('/', 1)[-1] label = "".join(("BDBag Fetch -- ", filename.replace('.', '_'))) # get a unique ID for this transfer tdata = globus_sdk.TransferData(client, src_endpoint, dest_endpoint, label=label) tdata.add_item(src_path, dest_path, recursive=False) # start the transfer data = client.submit_transfer(tdata) task_id = data["task_id"] logger.info("Globus transfer started with ID %s" % task_id) logger.debug("Transferring file %s to %s" % (url, output_path)) return True except Exception as e: logger.error('Globus transfer request exception: %s' % bdbag.get_named_exception(e)) return False
def uploadGlobusData(self, upload_files=None, upload_folders=None): """ Actually submit the upload request. Parameters ---------- upload_folders: list List of individual folder names to download upload_files: list List of individual file names to download """ tdata = globus_sdk.TransferData( self.transfer_client, self.local_ep_id, self.transfer_client.endpoint_search(DATA_ENDPOINT_NAME)[0] ['name']) if upload_files is not None: for f in upload_files: tdata.add_item(os.path.abspath(f), os.path.basename(f), recursive=False) else: for f in upload_folders: tdata.add_item(os.path.abspath(f), os.path.basename(f), recursive=True) self.transfer_result = self.transfer_client.submit_transfer(tdata)
def my_transfer(tclient, srcpoint, destpoint, mylabel, srcpath, destpath, isrecursive): """ TransferData call wrapper """ now = datetime.datetime.utcnow() mydeadline = now + datetime.timedelta(minutes=DEADLINE) tdata = globus_sdk.TransferData(tclient, srcpoint, destpoint, label=mylabel, deadline=str(mydeadline), sync_level="checksum") tdata.add_item(srcpath, destpath, recursive=isrecursive) transfer_result = tclient.submit_transfer(tdata) print("tdata task_id = ", transfer_result["task_id"]) if DEBUG == 1: print(tdata) my_task_wait(tclient, transfer_result) for event in tclient.task_event_list(transfer_result["task_id"]): print("Event on Task({}) at {}:\n{}".format(transfer_result["task_id"], event["time"], event["description"])) # may want to log these also to a file error_file = open('error_file', 'a') if (event["is_error"]) and (event["description"] != "file not found"): print(" is_error:{}".format(event["details"])) error_file.write("%s: %s\n" % (event["time"], event["details"])) error_file.close()
def transfer_file(cls, src_ep, dst_ep, src_path, dst_path): tc = globus_sdk.TransferClient(authorizer=cls.authorizer) td = globus_sdk.TransferData(tc, src_ep, dst_ep) td.add_item(src_path, dst_path) task = tc.submit_transfer(td) last_event_time = None while not tc.task_wait(task['task_id'], 600, 20): task = tc.get_task(task['task_id']) events = tc.task_event_list(task['task_id'], num_results=1, filter='is_error:1') for e in events: if e['time'] == last_event_time: break last_event_time = e['time'] logger.info( 'Non-critical Globus Transfer error event: {} at {}'. format(e['description'], e['time'])) logger.debug('{}'.format(e['details'])) task = tc.get_task(task['task_id']) if task['status'] != 'SUCCEEDED': logger.error(task) raise Exception( 'Transfer {}, from {}{} to {}{} to failed due to error: {}'. format(task['task_id'], src_ep, src_path, dst_ep, dst_path, task['nice_status_short_description']))
def test_endpoint_manager_task_list(self): """ Has sdktester2b submit transfer and delete task to the managed_ep Then has sdktester1a get its endpoint manager task list Confirms tasks submitted by sdktester2b on the managed endpoint are visible, and some expected fields are present. """ # sdktester2b submits tasks # new dir with randomized name to prevent collision dest_dir = "transfer_dest_dir-" + str(getrandbits(128)) dest_path = "/" + dest_dir + "/" self.tc2.operation_mkdir(self.managed_ep_id, dest_path) # transfer a file to the new dir tdata = globus_sdk.TransferData(self.tc2, GO_EP1_ID, self.managed_ep_id, notify_on_succeeded=False) source_path = "/share/godata/" file_name = "file1.txt" tdata.add_item(source_path + file_name, dest_path + file_name) transfer_id = self.tc2.submit_transfer(tdata)["task_id"] # delete the new dir ddata = globus_sdk.DeleteData(self.tc2, self.managed_ep_id, recursive=True, notify_on_succeeded=False) ddata.add_item(dest_path) delete_id = self.tc2.submit_delete(ddata)["task_id"] # sdktester1a gets endpoint manager task list tasks_doc = self.tc.endpoint_manager_task_list( filter_endpoint=GO_EP2_ID, filter_user_id=get_user_data()["sdktester2b"]["id"]) # confirm submitted tasks can be found # and tasks have some expected fields expected_fields = [ "username", "deadline", "type", "source_endpoint_id" ] delete_found = False transfer_found = False self.assertIsInstance(tasks_doc, PaginatedResource) for task in tasks_doc: for field in expected_fields: self.assertIn(field, task) if task["task_id"] == transfer_id: transfer_found = True if task["task_id"] == delete_id: delete_found = True if transfer_found and delete_found: break # fail if both not found self.assertTrue(delete_found and transfer_found)
def start_globus_transfer(source_file_id, destination_file_id, dry_run=False): """Start a globus file transfer between two file record UUIDs.""" c = AlyxClient() source_file_record = c.get('/files/' + source_file_id) destination_file_record = c.get('/files/' + destination_file_id) source_repo = source_file_record['data_repository'] destination_repo = destination_file_record['data_repository'] source_repo_obj = _get_data_repository(c, source_file_record) destination_repo_obj = _get_data_repository(c, destination_file_record) source_id = source_repo_obj['globus_endpoint_id'] destination_id = destination_repo_obj['globus_endpoint_id'] if not source_id and not destination_id: raise Exception( "The Globus endpoint ids of source and destination must be set.") source_path = source_file_record['relative_path'] destination_path = destination_file_record['relative_path'] source_path = op.join(source_repo_obj['path'], source_path) destination_path = op.join(destination_repo_obj['path'], destination_path) label = 'Transfer %s %s to %s %s' % ( source_repo, _escape_label(source_path), destination_repo, _escape_label(destination_path), ) tc = globus_transfer_client() tdata = globus_sdk.TransferData( tc, source_id, destination_id, verify_checksum=True, sync_level='checksum', label=label, ) tdata.add_item(source_path, destination_path) logger.info("Transfer from %s <%s> to %s <%s>%s.", source_repo, source_path, destination_repo, destination_path, ' (dry)' if dry_run else '') if dry_run: return response = tc.submit_transfer(tdata) task_id = response.get('task_id', None) message = response.get('message', None) code = response.get('code', None) logger.info("%s (task UUID: %s)", message, task_id) return response
def _thread_transfer_job_batch(self, jobs: List[Job]) -> None: """Thread function for transferring list of files. Parameters ---------- jobs : list(Job) List of Job objects to transfer as a batch. Returns ------- None """ try: tdata = globus_sdk.TransferData(self.tc, self.globus_source_eid, self.globus_dest_eid) except globus_sdk.exc.TransferAPIError as e: logger.error(f"Prefetcher caught {e}") with self.lock: for job in jobs: source_file_path = job.file_path task_id = self.file_id_mapping[source_file_path] self.id_status[task_id] = PrefetcherStatuses.FAILED return for job in jobs: source_file_path = job.file_path dest_file_name = job.file_id full_path = f"{self.transfer_dir}/{dest_file_name}" tdata.add_item(source_file_path, full_path) task_id = self.tc.submit_transfer(tdata)["task_id"] with self.lock: for job in jobs: file_path = job.file_path self.file_id_mapping[file_path] = task_id self.id_status[task_id] = PrefetcherStatuses.ACTIVE for job in jobs: file_path = job.file_path logger.info(f"{file_path}: ACTIVE") task_data = self.tc.get_task(task_id).data while task_data["status"] == "ACTIVE": time.sleep(5) task_data = self.tc.get_task(task_id).data with self.lock: self.id_status[task_id] = PrefetcherStatuses[task_data["status"]] self.num_current_transfers -= 1 for job in jobs: file_path = job.file_path logger.info(f"{file_path}: {task_data['status']}")
def download_file(tc, endpoint_id, globus_path, file_name, local_path): # print("downloading file {}".format(globus_path + file_name)) tdata = globus_sdk.TransferData(tc, endpoint_id, LOCAL_ID) tdata.add_item(globus_path + file_name, local_path + file_name) result = tc.submit_transfer(tdata) while not tc.task_wait(result["task_id"], polling_interval=1, timeout=60): pass
def transfer_file(cls, src_ep, dst_ep, src_path, dst_path): tc = globus_sdk.TransferClient(authorizer=cls.authorizer) td = globus_sdk.TransferData(tc, src_ep, dst_ep) td.add_item(src_path, dst_path) try: task = tc.submit_transfer(td) except Exception as e: raise Exception( 'Globus transfer from {}{} to {}{} failed due to error: {}'. format(src_ep, src_path, dst_ep, dst_path, e)) last_event_time = None """ A Globus transfer job (task) can be in one of the three states: ACTIVE, SUCCEEDED, FAILED. Parsl every 15 seconds polls a status of the transfer job (task) from the Globus Transfer service, with 60 second timeout limit. If the task is ACTIVE after time runs out 'task_wait' returns False, and True otherwise. """ while not tc.task_wait(task['task_id'], 60, 15): task = tc.get_task(task['task_id']) # Get the last error Globus event events = tc.task_event_list(task['task_id'], num_results=1, filter='is_error:1') try: event = next(events) # No error reported, the transfer is still running except StopIteration: continue # Print the error event to stderr and Parsl file log if it was not yet printed if event['time'] != last_event_time: last_event_time = event['time'] logger.warn( 'Non-critical Globus Transfer error event for globus://{}{}: "{}" at {}. Retrying...' .format(src_ep, src_path, event['description'], event['time'])) logger.debug('Globus Transfer error details: {}'.format( event['details'])) """ The Globus transfer job (task) has been terminated (is not ACTIVE). Check if the transfer SUCCEEDED or FAILED. """ task = tc.get_task(task['task_id']) if task['status'] == 'SUCCEEDED': logger.debug( 'Globus transfer {}, from {}{} to {}{} succeeded'.format( task['task_id'], src_ep, src_path, dst_ep, dst_path)) else: logger.debug('Globus Transfer task: {}'.format(task)) events = tc.task_event_list(task['task_id'], num_results=1, filter='is_error:1') event = events.data[0] raise Exception( 'Globus transfer {}, from {}{} to {}{} failed due to error: "{}"' .format(task['task_id'], src_ep, src_path, dst_ep, dst_path, event['details']))
def request_transfer(self): file_count = self.get_file_count_from_globus() app.logger.info(f"There are {file_count} files ready for transfer from Globus") if (file_count > 0): app.logger.info("Located file(s) in Globus, requesting a transfer.") tc = self.get_transfer_client() tdata = globus_sdk.TransferData(tc, self.GLOBUS_IVY_ENDPOINT, self.GLOBUS_DTN_ENDPOINT, label="Transfer", sync_level="checksum") tdata.add_item(self.GLOBUS_IVY_PATH, self.GLOBUS_DTN_PATH, recursive = True) transfer_result = tc.submit_transfer(tdata)
def executeTransfer(self): """ executeTransfer(self) DESCRIPTION: This function iterates through the self._dataList -- which contains all of the files to transfer -- and submits them to Globus for transfer. self._parallelism number of Globus transfers will be submitted, the files in self._dataList being split among each transfer. The Globus TransferResponse dictionary, returned after calling the Globus submit_transfer function, is saved into the self._transferResult list. ARGUMENTS: self EFFECTS: The Globus destination endpoint, defined in self._destEndpoint, will contain all of the files in self._dataList if the transfer succeeds. RETURN: None """ # If the _dataList is empty, throw an error assert len(self._dataList) > 0 assert self._parallelism > 0 # If our parallelism is greater than the number of entries to be transferred, then # cap the parallelism at the number of entries if self._parallelism > len(self._dataList): self._parallelism = len(self._dataList) # We will have self._parallelism number of TrasferData objects. This is the number of independent Globus # transfer calls that will be made. for i in range(0, self._parallelism): tData = globus_sdk.TransferData(self._transClient, self._sourceEndpoint, self._destEndpoint) self._transferData.append(tData) # For each Transfer "thread", we want to define which entries in self._dataList each thread will be responsible # for transferring. # # This variable is the truncated number of entries each thread source 1 to ( _parallelism - 1) will handle entriesPerThread = len(self._dataList) / self._parallelism # The last thread will handle the remaining number of entries in _dataList. This is just the modulus operator numEntriesLast = len(self._dataList) % self._parallelism i = 0 for j in xrange(0, len(self._dataList)): if (i >= self._parallelism): i = 0 entry = self._dataList[j] # Add this entry to this thread's list of files to transfer self._transferData[i].add_item(entry["source"], entry["dest"]) # Increment the transfer thread i = i + 1 # We now have a list of all the files to transfer ready to pass to the Globus API for i in range(0, self._parallelism): self._transferResult.append( self._transClient.submit_transfer(self._transferData[i])) print(self._transferResult[i])
def quick_transfer(transfer_client, source_ep, dest_ep, path_list, timeout=None): """Perform a Globus Transfer and monitor for success. Arguments: transfer_client (TransferClient): An authenticated Transfer client. source_ep (str): The source Globus Endpoint ID. dest_ep (str): The destination Globus Endpoint ID. path_list (list of tuple of 2 str): A list of tuples containing the paths to transfer as (source, destination). Directory paths must end in a slash, and file paths must not. Example: [("/source/files/file.dat", "/dest/mydocs/doc.dat"), ("/source/all_reports/", "/dest/reports/")] timeout (int): Time, in scores of seconds, to wait for a transfer to complete before erroring. Default None, which will wait until a transfer succeeds or fails. If this argument is -1, the transfer will submit but not wait at all. There is then no error checking. Returns: str: ID of the Globus Transfer. """ INTERVAL_SEC = 10 tdata = globus_sdk.TransferData(transfer_client, source_ep, dest_ep, verify_checksum=True) for item in path_list: # Is not directory if item[0][-1] != "/" and item[1][-1] != "/": tdata.add_item(item[0], item[1]) # Is directory elif item[0][-1] == "/" and item[1][-1] == "/": tdata.add_item(item[0], item[1], recursive=True) # Malformed else: raise globus_sdk.GlobusError("Cannot transfer file to directory or vice-versa: " + str(item)) res = transfer_client.submit_transfer(tdata) if res["code"] != "Accepted": raise globus_sdk.GlobusError("Failed to transfer files: Transfer " + res["code"]) iterations = 0 while timeout is not None and timeout >= 0 and not transfer_client.task_wait( res["task_id"], timeout=INTERVAL_SEC, polling_interval=INTERVAL_SEC): for event in transfer_client.task_event_list(res["task_id"]): if event["is_error"]: transfer_client.cancel_task(res["task_id"]) raise globus_sdk.GlobusError("Error transferring data: " + event["description"]) if timeout and iterations >= timeout: transfer_client.cancel_task(res["task_id"]) raise globus_sdk.GlobusError("Transfer timed out after " + str(iterations * INTERVAL_SEC) + " seconds.") iterations += 1 return res["task_id"]
def request_transfer(self): tc = self.get_transfer_client() tdata = globus_sdk.TransferData(tc, self.GLOBUS_IVY_ENDPOINT, self.GLOBUS_DTN_ENDPOINT, label="Transfer", sync_level="checksum") tdata.add_item("/ics/ics343/ivy-hip-vprcv", "project/covid-vpr/", recursive=True) transfer_result = tc.submit_transfer(tdata)
def launch_transfers(self, local_servers=False): """ patcher.launch_transfers() Launches the globus transfer and delete from the local patch computer to the flat-rion :param: local_servers (False): if True, sync the local servers after the main transfer :return: None """ gtc = self.transfer_client def _wait_for_task(resp): # patcher.transfer_client.get_task(task_id='364fbdd2-4deb-11eb-8ffb-0a34088e79f9') # on a good status: # Out[22]: TransferResponse({'bytes_checksummed': 377736912, 'bytes_transferred': 3011090432, 'canceled_by_admin': None, 'canceled_by_admin_message': None, 'command': 'API 0.10', 'completion_time': None, 'deadline': '2021-01-06T18:10:05+00:00', 'delete_destination_extra': False, 'destination_endpoint': 'simonsfoundation#ibl', 'destination_endpoint_display_name': 'IBL Flatiron SDSC Data', 'destination_endpoint_id': 'ab2d064c-413d-11eb-b188-0ee0d5d9299f', 'directories': 0, 'effective_bytes_per_second': 873268, 'encrypt_data': False, 'fatal_error': None, 'faults': 6, 'files': 186, 'files_skipped': 12, 'files_transferred': 76, 'history_deleted': False, 'is_ok': True, 'is_paused': False, 'key': 'active,2021-01-03T17:52:34.427087', 'label': '3B analog sync patch', 'nice_status': 'OK', 'nice_status_details': None, 'nice_status_expires_in': -1, 'nice_status_short_description': 'OK', 'owner_id': 'e633663a-8561-4a5d-ac92-f198d43b14dc', 'preserve_timestamp': False, 'recursive_symlinks': 'ignore', 'request_time': '2021-01-03T17:52:34+00:00', 'source_endpoint': 'internationalbrainlab#916c2766-bd2a-11ea-8f22-0a21f750d19b', 'source_endpoint_display_name': 'olivier_laptop', 'source_endpoint_id': '916c2766-bd2a-11ea-8f22-0a21f750d19b', 'status': 'ACTIVE', 'subtasks_canceled': 0, 'subtasks_expired': 0, 'subtasks_failed': 0, 'subtasks_pending': 98, 'subtasks_retrying': 0, 'subtasks_succeeded': 274, 'subtasks_total': 372, 'symlinks': 0, 'sync_level': 3, 'task_id': '364fbdd2-4deb-11eb-8ffb-0a34088e79f9', 'type': 'TRANSFER', 'username': '******', 'verify_checksum': True}) # noqa # on a checksum error # Out[26]: TransferResponse({'bytes_checksummed': 377736912, 'bytes_transferred': 3715901232, 'canceled_by_admin': None, 'canceled_by_admin_message': None, 'command': 'API 0.10', 'completion_time': None, 'deadline': '2021-01-06T18:10:05+00:00', 'delete_destination_extra': False, 'destination_endpoint': 'simonsfoundation#ibl', 'destination_endpoint_display_name': 'IBL Flatiron SDSC Data', 'destination_endpoint_id': 'ab2d064c-413d-11eb-b188-0ee0d5d9299f', 'directories': 0, 'effective_bytes_per_second': 912410, 'encrypt_data': False, 'fatal_error': None, 'faults': 7, 'files': 186, 'files_skipped': 12, 'files_transferred': 102, 'history_deleted': False, 'is_ok': False, 'is_paused': False, 'key': 'active,2021-01-03T17:52:34.427087', 'label': '3B analog sync patch', 'nice_status': 'VERIFY_CHECKSUM', 'nice_status_details': None, 'nice_status_expires_in': -1, 'nice_status_short_description': 'checksum verification failed', 'owner_id': 'e633663a-8561-4a5d-ac92-f198d43b14dc', 'preserve_timestamp': False, 'recursive_symlinks': 'ignore', 'request_time': '2021-01-03T17:52:34+00:00', 'source_endpoint': 'internationalbrainlab#916c2766-bd2a-11ea-8f22-0a21f750d19b', 'source_endpoint_display_name': 'olivier_laptop', 'source_endpoint_id': '916c2766-bd2a-11ea-8f22-0a21f750d19b', 'status': 'ACTIVE', 'subtasks_canceled': 0, 'subtasks_expired': 0, 'subtasks_failed': 0, 'subtasks_pending': 72, 'subtasks_retrying': 0, 'subtasks_succeeded': 300, 'subtasks_total': 372, 'symlinks': 0, 'sync_level': 3, 'task_id': '364fbdd2-4deb-11eb-8ffb-0a34088e79f9', 'type': 'TRANSFER', 'username': '******', 'verify_checksum': True}) # noqa # on a finished task # Out[4]: TransferResponse({'bytes_checksummed': 377736912, 'bytes_transferred': 4998806664, 'canceled_by_admin': None, 'canceled_by_admin_message': None, 'command': 'API 0.10', 'completion_time': '2021-01-03T20:04:50+00:00', 'deadline': '2021-01-06T19:11:00+00:00', 'delete_destination_extra': False, 'destination_endpoint': 'simonsfoundation#ibl', 'destination_endpoint_display_name': 'IBL Flatiron SDSC Data', 'destination_endpoint_id': 'ab2d064c-413d-11eb-b188-0ee0d5d9299f', 'directories': 0, 'effective_bytes_per_second': 629960, 'encrypt_data': False, 'fatal_error': None, 'faults': 15, 'files': 186, 'files_skipped': 12, 'files_transferred': 174, 'history_deleted': False, 'is_ok': None, 'is_paused': False, 'key': 'complete,2021-01-03T20:04:49.540956', 'label': '3B analog sync patch', 'nice_status': None, 'nice_status_details': None, 'nice_status_expires_in': None, 'nice_status_short_description': None, 'owner_id': 'e633663a-8561-4a5d-ac92-f198d43b14dc', 'preserve_timestamp': False, 'recursive_symlinks': 'ignore', 'request_time': '2021-01-03T17:52:34+00:00', 'source_endpoint': 'internationalbrainlab#916c2766-bd2a-11ea-8f22-0a21f750d19b', 'source_endpoint_display_name': 'olivier_laptop', 'source_endpoint_id': '916c2766-bd2a-11ea-8f22-0a21f750d19b', 'status': 'SUCCEEDED', 'subtasks_canceled': 0, 'subtasks_expired': 0, 'subtasks_failed': 0, 'subtasks_pending': 0, 'subtasks_retrying': 0, 'subtasks_succeeded': 372, 'subtasks_total': 372, 'symlinks': 0, 'sync_level': 3, 'task_id': '364fbdd2-4deb-11eb-8ffb-0a34088e79f9', 'type': 'TRANSFER', 'username': '******', 'verify_checksum': True}) # noqa # on an errored task # Out[10]: TransferResponse({'bytes_checksummed': 0, 'bytes_transferred': 0, 'canceled_by_admin': None, 'canceled_by_admin_message': None, 'command': 'API 0.10', 'completion_time': '2021-01-03T17:39:00+00:00', 'deadline': '2021-01-04T17:37:34+00:00', 'delete_destination_extra': False, 'destination_endpoint': 'simonsfoundation#ibl', 'destination_endpoint_display_name': 'IBL Flatiron SDSC Data', 'destination_endpoint_id': 'ab2d064c-413d-11eb-b188-0ee0d5d9299f', 'directories': 0, 'effective_bytes_per_second': 0, 'encrypt_data': False, 'fatal_error': {'code': 'CANCELED', 'description': 'canceled'}, 'faults': 2, 'files': 6, 'files_skipped': 0, 'files_transferred': 0, 'history_deleted': False, 'is_ok': None, 'is_paused': False, 'key': 'complete,2021-01-03T17:38:59.697413', 'label': 'test 3B analog sync patch', 'nice_status': None, 'nice_status_details': None, 'nice_status_expires_in': None, 'nice_status_short_description': None, 'owner_id': 'e633663a-8561-4a5d-ac92-f198d43b14dc', 'preserve_timestamp': False, 'recursive_symlinks': 'ignore', 'request_time': '2021-01-03T17:37:34+00:00', 'source_endpoint': 'internationalbrainlab#916c2766-bd2a-11ea-8f22-0a21f750d19b', 'source_endpoint_display_name': 'olivier_laptop', 'source_endpoint_id': '916c2766-bd2a-11ea-8f22-0a21f750d19b', 'status': 'FAILED', 'subtasks_canceled': 6, 'subtasks_expired': 0, 'subtasks_failed': 0, 'subtasks_pending': 0, 'subtasks_retrying': 0, 'subtasks_succeeded': 6, 'subtasks_total': 12, 'symlinks': 0, 'sync_level': 3, 'task_id': '5706dd2c-4dea-11eb-8ffb-0a34088e79f9', 'type': 'TRANSFER', 'username': '******', 'verify_checksum': True}) # noqa while True: tinfo = gtc.get_task( task_id=resp['task_id'])['completion_time'] if tinfo['completion_time'] is not None: break _ = gtc.task_wait(task_id=resp['task_id'], timeout=30) if tinfo['fatal_error'] is not None: raise ConnectionError(f"Globus transfer failed \n {tinfo}") # handles the transfers first if len(self.globus_transfer['DATA']) > 0: # launch the transfer _wait_for_task(gtc.submit_transfer(self.globus_transfer)) # re-initialize the globus_transfer property self.globus_transfer = globus_sdk.TransferData( gtc, self.globus_transfer['source_endpoint'], self.globus_transfer['destination_endpoint'], label=self.globus_transfer['label'], verify_checksum=True, sync_level='checksum') # do the same for deletes if len(self.globus_delete['DATA']) > 0: _wait_for_task(gtc.submit_delete(self.globus_delete)) self.globus_delete = globus_sdk.DeleteData( gtc, endpoint=self.globus_delete['endpoint'], label=self.globus_delete['label'], verify_checksum=True, sync_level='checksum') # launch the local transfers and local deletes if local_servers: self.launch_transfers_secondary()
def submit_transfer(self, for_real=False): ''' Submit tasks pairs to the transfer client: one transfer for the FFT data, and one sub_tab transfer for each run in the series. ''' tlabel_fft = "Indra_{}_FFT".format(self.series_num) tdata_fft = globus_sdk.TransferData(self.tc, self.jhu_endpoint, self.jhu_endpoint, label=tlabel_fft, sync_level="checksum") for task in self.ffttasks: tdata_fft.add_item( task[0], task[1], recursive=True) # set recursive = True to transfer folders tdata_sub = [] for i in range(len(self.tabtasks)): X, Y, Z = get_xyz(self.runfirst + i) tlabel_sub = "Indra_{}_{}_{}_sub_tab".format(X, Y, Z) tdata = globus_sdk.TransferData(self.tc, self.jhu_endpoint, self.jhu_endpoint, label=tlabel_sub, sync_level="checksum") for task in self.tabtasks[i]: tdata.add_item(task[0], task[1]) tdata_sub.append(tdata) if for_real: transfer_result = self.tc.submit_transfer(tdata_fft) print("Transfer {} submitted!".format(tlabel_fft)) for i in range(len(tdata_sub)): transfer_result = self.tc.submit_transfer(tdata_sub[i]) print("Transfer {} submitted!".format(tdata_sub[i]['label'])) else: print( "Transfers not submitted (set for_real=True to submit). Returning (tdata_fft, tdata_sub)." ) return tdata_fft, tdata_sub
def globus_download_files(client: globus_sdk.TransferClient, endpoint_id: str, files: tuple) -> None: """Gets the details of the files in the list Arguments: client: the Globus transfer client to use endpoint_id: the ID of the endpoint to access files: the list of files to fetch Return: Returns an updated list of file details """ # Fetch metadata and pull information out of it file_transfers = {} for one_file in files: globus_save_path = os.path.join(LOCAL_SAVE_PATH, os.path.basename(one_file)) if not os.path.exists(globus_save_path): globus_remote_path = one_file file_transfers[globus_remote_path] = globus_save_path if file_transfers: have_exception = False cnt = 1 for remote_path, save_path in file_transfers.items(): try: logging.info("Trying transfer %s: %s", str(cnt), str(remote_path)) cnt += 1 transfer_setup = globus_sdk.TransferData( client, endpoint_id, GLOBUS_LOCAL_ENDPOINT_ID, label="Get image file", sync_level="checksum") transfer_setup.add_item(remote_path, save_path) transfer_request = client.submit_transfer(transfer_setup) task_result = client.task_wait(transfer_request['task_id'], timeout=600, polling_interval=5) if not task_result: raise RuntimeError("Unable to retrieve JSON metadata: %s" % remote_path) if not os.path.exists(save_path): raise RuntimeError( "Unable to find downloaded file at: %s" % save_path) except RuntimeError as ex: have_exception = True logging.warning("Failed to get image: %s", str(ex)) if have_exception: raise RuntimeError("Unable to retrieve all files individually") del file_transfers
def go(self): transfer_client = get_token(self.failifnotoken) self.activate_client(transfer_client) transfer_label = self.baselabel+"_"+datetime.now().strftime("%Y-%m-%d_%H_%M_%S") transfer_data = globus_sdk.TransferData(transfer_client,self.endpoints[0],self.endpoints[1], label=transfer_label,preserve_timestamp=self.preserve, sync_level="checksum") transfer_data.add_item(self.in_dir,self.out_dir,recursive=True) transfer_result = transfer_client.submit_transfer(transfer_data) task_id = transfer_result["task_id"] print("Transfer ID is",task_id,"label is",transfer_label)