def copy_directory(self, ori, destiny, tc): """ copy a directory using globus transfer :param ori: path where the data is in the source machine :type ori: str :param destiny: path where the data will be put on the destiny machine :type destiny: str :param tc: globus transfer client :type tc: :class:`globus_sdk.TransferClient` :return: status of the transference :rtype: str """ transference_data = TransferData(tc, self._from, self._to, label="SDK example", sync_level="checksum") transference_data.add_item(ori, destiny, recursive=True) transfer_result = tc.submit_transfer(transference_data) while not tc.task_wait(transfer_result["task_id"], timeout=1): task = tc.get_task(transfer_result["task_id"]) if task['nice_status'] == "NOT_A_DIRECTORY": tc.cancel_task(task["task_id"]) return task['nice_status'] return "OK"
def transfer_data(self, src_endpoint: str, src_path: Union[str, Path, PathLike], dest_endpoint: str, dest_path: Union[str, Path, PathLike]): self._src_endpoint = src_endpoint self._dest_endpoint = dest_endpoint src_endpoint_id = self.get_endpoint_id(src_endpoint) if not src_endpoint_id: print(f'ERROR: Unable to find source endpoint id for: "{self._src_endpoint}"') return dest_endpoint_id = self.get_endpoint_id(dest_endpoint) if not dest_endpoint_id: print(f'ERROR: Unable to find destination endpoint id for: "{self._dest_endpoint}"') return transfer_data = TransferData(self._transfer_client, src_endpoint_id, dest_endpoint_id, encrypt_data=True) transfer_data.add_item(src_path, dest_path, recursive=True) try: print( f'Submitting a transfer task from {self._src_endpoint}:{src_path} to {self._dest_endpoint}:{dest_path}') task = self._transfer_client.submit_transfer(transfer_data) except TransferAPIError as e: print(str(e)) sys.exit(1) task_id = task['task_id'] print(f'\tWaiting for transfer to complete with task_id: {task_id}') while not self._transfer_client.task_wait(task_id=task_id, timeout=3600, polling_interval=60): print('.', end='') print('Transferred files:') for info in self._transfer_client.task_successful_transfers(task_id=task_id, num_results=None): print("\t{} -> {}".format(info["source_path"], info["destination_path"]))
def copy_file(self, ori, destiny, tc): """ copy a file using globus :param ori: path where the data is in the source machine :type ori: str :param destiny: path where the data will be put on the destiny machine :type destiny: str :param tc: globus transfer client :type tc: :class:`globus_sdk.TransferClient` :return: status of the transference :rtype: str """ transference_data = TransferData(tc, self._from, self._to, label="SDK example", sync_level="checksum") transference_data.add_item(ori, destiny) transfer_result = tc.submit_transfer(transference_data) while not tc.task_wait(transfer_result["task_id"], timeout=1): # wait until transfer ends continue return "OK"
def transfer(self): self.transfer_client = transfer_client = globus_sdk.TransferClient( authorizer=self.authorizer) try: transfer_client.endpoint_autoactivate(self.source_id) transfer_client.endpoint_autoactivate(self.dest_id) except GlobusAPIError as ex: if ex.http_status == 401: sys.exit('Refresh token has expired. ' 'Please delete the `tokens` object from ' '{} and try again.'.format(self.refresh_token_file)) else: raise ex while True: self._check_end_point(self.source_id, self.source_folder) self._check_end_point(self.dest_id, self.dest_folder) tdata = TransferData(self.transfer_client, self.source_id, self.dest_id, label=self.transfer_label, sync_level="checksum") tdata.add_item(self.source_folder, self.dest_folder, recursive=True) task = transfer_client.submit_transfer(tdata) task_id = task['task_id'] self.logger.info("Task id {} submitted".format(task_id)) transfer_client.task_wait(task_id=task_id, timeout=self.poll_time, polling_interval=5) td = transfer_client.get_task(task_id) self.logger.info("Task id {} complete".format(task_id)) self.logger.debug(td) time.sleep(self.poll_time)
def submit_xfer(source_endpoint_id, destination_endpoint_id, source_path, dest_path, job_label, recursive=False, logger=logging.log): tc = get_transfer_client(logger=logger) # as both endpoints are expected to be Globus Server endpoints, send auto-activate commands for both globus endpoints auto_activate_endpoint(tc, source_endpoint_id, logger=logger) auto_activate_endpoint(tc, destination_endpoint_id, logger=logger) # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source and # destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means that after # a file is transferred, Globus will compute checksums on the source and destination files to verify that the file was transferred # correctly. If the checksums do not match, it will redo the transfer of that file. # tdata = TransferData(tc, source_endpoint_id, destination_endpoint_id, label=job_label, sync_level="checksum", verify_checksum=True) tdata = TransferData(tc, source_endpoint_id, destination_endpoint_id, label=job_label, sync_level="checksum", notify_on_succeeded=False, notify_on_failed=False) tdata.add_item(source_path, dest_path, recursive=recursive) # logging.info('submitting transfer...') transfer_result = tc.submit_transfer(tdata) # logging.info("task_id =", transfer_result["task_id"]) return transfer_result["task_id"]
def main(): tokens = None client = NativeClient(client_id=CLIENT_ID, app_name=APP_NAME) try: # if we already have tokens, load and use them tokens = client.load_tokens(requested_scope=SCOPES) except: pass if not tokens: # if we need to get tokens, start the Native App authentication process # need to specify that we want refresh tokens tokens = client.login(requested_scopes=SCOPES, refresh_tokens=True) try: client.save_tokens(tokens) except: pass transfer = setup_transfer_client(tokens['transfer.api.globus.org']) try: task_data = load_data_from_file(DATA_FILE)['task'] task = transfer.get_task(task_data['task_id']) if task['status'] not in PREVIOUS_TASK_RUN_CASES: print('The last transfer status is {}, skipping run...'.format( task['status'])) sys.exit(1) except KeyError: # Ignore if there is no previous task pass check_endpoint_path(transfer, SOURCE_ENDPOINT, SOURCE_PATH) if CREATE_DESTINATION_FOLDER: create_destination_directory(transfer, DESTINATION_ENDPOINT, DESTINATION_PATH) else: check_endpoint_path(transfer, DESTINATION_ENDPOINT, DESTINATION_PATH) tdata = TransferData(transfer, SOURCE_ENDPOINT, DESTINATION_ENDPOINT, label=TRANSFER_LABEL, sync_level="checksum") tdata.add_item(SOURCE_PATH, DESTINATION_PATH, recursive=True) task = transfer.submit_transfer(tdata) save_data_to_file(DATA_FILE, 'task', task.data) print('Transfer has been started from\n {}:{}\nto\n {}:{}'.format( SOURCE_ENDPOINT, SOURCE_PATH, DESTINATION_ENDPOINT, DESTINATION_PATH)) url_string = 'https://globus.org/app/transfer?' + \ six.moves.urllib.parse.urlencode({ 'origin_id': SOURCE_ENDPOINT, 'origin_path': SOURCE_PATH, 'destination_id': DESTINATION_ENDPOINT, 'destination_path': DESTINATION_PATH }) print('Visit the link below to see the changes:\n{}'.format(url_string))
def transfer_file(self, src_file, dst_file): transfer_data = TransferData(self.transferClient, self.src_endpoint['id'], self.dst_endpoint['id']) transfer_data.add_item(src_file, dst_file) result = self.transferClient.submit_transfer(transfer_data) self.logger.info('task_id [{0}] {1}'.format(result['task_id'], result['code'])) return result
def transfer(client, remote_uuid, local_uuid, file_list, event=None): """ Setup a file transfer between two endpoints Parameters: remote_uuid (str): the globus uuid of the source endpoint local_uuid (str): the globus uuid of the destination endpoint file_list (list): a list of dictionaries with keys remote_path, local_path event (Threadding.Event): a kill event for running inside a thread """ # create the transfer object try: task_label = 'Processflow auto transfer' transfer_task = TransferData(client, remote_uuid, local_uuid, sync_level='checksum', label=task_label) except Exception as e: logging.error('Error creating transfer task') logging.error(format_debug(e)) return # add in our transfer items for datafile in file_list: transfer_task.add_item(source_path=datafile['remote_path'], destination_path=datafile['local_path'], recursive=False) # Start the transfer task_id = None result = None try: result = client.submit_transfer(transfer_task) task_id = result["task_id"] logging.info('starting transfer with task id %s', task_id) except Exception as e: if result: logging.error("result: %s", str(result)) logging.error("Could not submit the transfer") logging.error(format_debug(e)) return # loop until transfer is complete while True: status = client.get_task(task_id) if status['status'] == 'SUCCEEDED': return True, None elif status['status'] == 'FAILED': return False, status.get('nice_status_details') if event and event.is_set(): client.cancel_task(task_id) return None, None sleep(10)
def bulk_submit_xfer(submitjob, recursive=False): cfg = load_config() client_id = cfg['globus']['apps'][GLOBUS_AUTH_APP]['client_id'] auth_client = NativeAppAuthClient(client_id) refresh_token = cfg['globus']['apps'][GLOBUS_AUTH_APP]['refresh_token'] source_endpoint_id = submitjob[0].get('metadata').get( 'source_globus_endpoint_id') destination_endpoint_id = submitjob[0].get('metadata').get( 'dest_globus_endpoint_id') authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token, auth_client=auth_client) tc = TransferClient(authorizer=authorizer) # as both endpoints are expected to be Globus Server endpoints, send auto-activate commands for both globus endpoints a = auto_activate_endpoint(tc, source_endpoint_id) logging.debug('a: %s' % a) if a != 'AlreadyActivated': return None b = auto_activate_endpoint(tc, destination_endpoint_id) logging.debug('b: %s' % b) if b != 'AlreadyActivated': return None # make job_label for task a timestamp x = datetime.now() job_label = x.strftime('%Y%m%d%H%M%s') # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source # and destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means # that after a file is transferred, Globus will compute checksums on the source and destination files to verify that the # file was transferred correctly. If the checksums do not match, it will redo the transfer of that file. tdata = TransferData(tc, source_endpoint_id, destination_endpoint_id, label=job_label, sync_level="checksum") for file in submitjob: source_path = file.get('sources')[0] dest_path = file.get('destinations')[0] filesize = file['metadata']['filesize'] # TODO: support passing a recursive parameter to Globus # md5 = file['metadata']['md5'] # tdata.add_item(source_path, dest_path, recursive=False, external_checksum=md5) tdata.add_item(source_path, dest_path, recursive=False) record_counter( 'daemons.conveyor.transfer_submitter.globus.transfers.submit.filesize', filesize) # logging.info('submitting transfer...') transfer_result = tc.submit_transfer(tdata) # logging.info("task_id =", transfer_result["task_id"]) return transfer_result["task_id"]
def transfer_directory(**kwargs): """ Transfer all the contents from source_endpoint:src_path to destination_endpoint:dst_path parameters: source_endpoint (str) the globus UUID for the source files destination_endpoint (str) the globus UUID for the destination src_path (str) the path to the source directory to copy dst_path (str) the path on the destination directory """ source_endpoint = kwargs['source_endpoint'] destination_endpoint = kwargs['destination_endpoint'] src_path = kwargs['src_path'] dst_path = kwargs['dst_path'] event_list = kwargs['event_list'] event = kwargs['event'] client = get_client() transfer = TransferData( client, source_endpoint, destination_endpoint, sync_level='checksum') transfer.add_item( source_path=src_path, destination_path=dst_path, recursive=True) try: result = client.submit_transfer(transfer) task_id = result['task_id'] except: return False head, directory_name = os.path.split(src_path) msg = '{dir} transfer starting'.format(dir=directory_name) event_list.push(message=msg) retcode = 0 while True: if event and event.is_set(): client.cancel_task(task_id) return status = client.get_task(task_id).get('status') if status == 'SUCCEEDED': msg = '{dir} transfer complete'.format(dir=directory_name) retcode = True break elif status == 'FAILED': msg = '{dir} transfer FAILED'.format(dir=directory_name) retcode = False break else: sleep(5) event_list.push(message=msg) return retcode
def transfer_directory(src_uuid, dst_uuid, src_path, dst_path, event_list=None, killevent=None): """ Transfer all the contents from source_endpoint:src_path to destination_endpoint:dst_path parameters: src_uuid (str): the globus UUID for the source files dst_uuid (str) the globus UUID for the destination src_path (str) the path to the source directory to copy dst_path (str) the path on the destination directory event_list (EventList): an eventlist to push user notifications into killevent (Threadding.Event): an event to listen for if running inside a thread to terminate """ client = get_client() transfer = TransferData(client, src_uuid, dst_uuid, sync_level='checksum') transfer.add_item(source_path=src_path, destination_path=dst_path, recursive=True) try: msg = 'Starting globus directory transfer from {src} to {dst}'.format( src=src_path, dst=dst_path) print_line(msg, event_list) logging.info(msg) result = client.submit_transfer(transfer) task_id = result['task_id'] except: msg = 'Transfer setup for {src_uuid}:{src_path} tp {dst_uuid}:{dst_pathj} failed'.format( src_uuid=src_uuid, src_path=src_path, dst_uuid=dst_uuid, dst_path=dst_path) logging.error(msg) return False while True: status = client.get_task(task_id).get('status') if status == 'SUCCEEDED': return True elif status == 'FAILED': return False else: msg = 'Unexpected globus code: {}'.format(status) print_line(msg, event_list) if event and event.is_set(): client.cancel_task(task_id) return False sleep(10)
def __init__(self, source_id, dest_id, label=None, sync_level=None): self.source_id = auth.verify_uuid(source_id) self.dest_id = auth.verify_uuid(dest_id) self.transfer_client = get_transfer_client() self.transfer_data = TransferData(self.transfer_client, source_id, dest_id, label=label, sync_level=sync_level)
def submit_transfer(): """ - Take the data returned by the Browse Endpoint helper page and make a Globus transfer request. - Send the user to the transfer status page with the task id from the transfer. """ browse_endpoint_form = request.form selected = session['form']['datasets'] filtered_datasets = [ds for ds in datasets if ds['id'] in selected] transfer_tokens = session['tokens']['transfer.api.globus.org'] authorizer = RefreshTokenAuthorizer( transfer_tokens['refresh_token'], load_portal_client(), access_token=transfer_tokens['access_token'], expires_at=transfer_tokens['expires_at_seconds']) transfer = TransferClient(authorizer=authorizer) source_endpoint_id = app.config['DATASET_ENDPOINT_ID'] source_endpoint_base = app.config['DATASET_ENDPOINT_BASE'] destination_endpoint_id = browse_endpoint_form['endpoint_id'] destination_folder = browse_endpoint_form.get('folder[0]') transfer_data = TransferData(transfer_client=transfer, source_endpoint=source_endpoint_id, destination_endpoint=destination_endpoint_id, label=browse_endpoint_form.get('label')) for ds in filtered_datasets: source_path = source_endpoint_base + ds['path'] dest_path = browse_endpoint_form['path'] if destination_folder: dest_path += destination_folder + '/' dest_path += ds['name'] + '/' transfer_data.add_item(source_path=source_path, destination_path=dest_path, recursive=True) transfer.endpoint_autoactivate(source_endpoint_id) transfer.endpoint_autoactivate(destination_endpoint_id) task_id = transfer.submit_transfer(transfer_data)['task_id'] flash('Transfer request submitted successfully. Task ID: ' + task_id) return (redirect(url_for('transfer_status', task_id=task_id)))
def bulk_submit_xfer(submitjob, recursive=False, logger=logging.log): cfg = load_config(logger=logger) client_id = cfg['globus']['apps'][GLOBUS_AUTH_APP]['client_id'] auth_client = NativeAppAuthClient(client_id) refresh_token = cfg['globus']['apps'][GLOBUS_AUTH_APP]['refresh_token'] source_endpoint_id = submitjob[0].get('metadata').get( 'source_globus_endpoint_id') destination_endpoint_id = submitjob[0].get('metadata').get( 'dest_globus_endpoint_id') authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token, auth_client=auth_client) tc = TransferClient(authorizer=authorizer) # make job_label for task a timestamp now = datetime.datetime.now() job_label = now.strftime('%Y%m%d%H%M%s') # retrieve globus_task_deadline value to enforce time window to complete transfers # default is 2880 minutes or 48 hours globus_task_deadline = config_get_int('conveyor', 'globus_task_deadline', False, 2880) deadline = now + datetime.timedelta(minutes=globus_task_deadline) # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source # and destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means # that after a file is transferred, Globus will compute checksums on the source and destination files to verify that the # file was transferred correctly. If the checksums do not match, it will redo the transfer of that file. tdata = TransferData(tc, source_endpoint_id, destination_endpoint_id, label=job_label, sync_level="checksum", deadline=str(deadline)) for file in submitjob: source_path = file.get('sources')[0] dest_path = file.get('destinations')[0] filesize = file['metadata']['filesize'] # TODO: support passing a recursive parameter to Globus # md5 = file['metadata']['md5'] # tdata.add_item(source_path, dest_path, recursive=False, external_checksum=md5) tdata.add_item(source_path, dest_path, recursive=False) record_counter( 'daemons.conveyor.transfer_submitter.globus.transfers.submit.filesize', filesize) # logging.info('submitting transfer...') transfer_result = tc.submit_transfer(tdata) logger(logging.INFO, "transfer_result: %s" % transfer_result) return transfer_result["task_id"]
def sync(source_endpoint, source_path, destination_endpoint, destination_path, synctype, sync_data_file, transfer_label): global DATA_FILE if source_endpoint == destination_endpoint: raise click.UsageError( 'Source and destination endpoints must be different.') DATA_FILE = sync_data_file tokens = get_tokens() transfer = setup_transfer_client(tokens['transfer.api.globus.org'], source_endpoint, destination_endpoint) try: task_data = load_data_from_file(DATA_FILE)['task'] task = transfer.get_task(task_data['task_id']) if task['status'] not in ['SUCCEEDED', 'FAILED']: print('The last transfer status is {}, skipping run...'.format( task['status'])) sys.exit(1) except KeyError: # Ignore if there is no previous task pass check_endpoint_path(transfer, source_endpoint, source_path) create_destination_directory(transfer, destination_endpoint, destination_path) tdata = TransferData(transfer, source_endpoint, destination_endpoint, label=transfer_label, sync_level=synctype) tdata.add_item(source_path, destination_path, recursive=True) task = transfer.submit_transfer(tdata) save_data_to_file(DATA_FILE, 'task', task.data) print('Transfer has been started from\n {}:{}\nto\n {}:{}'.format( source_endpoint, source_path, destination_endpoint, destination_path)) url_string = 'https://globus.org/app/transfer?' + \ six.moves.urllib.parse.urlencode({ 'origin_id': source_endpoint, 'origin_path': source_path, 'destination_id': destination_endpoint, 'destination_path': destination_path }) print('Visit the link below to see the changes:\n{}'.format(url_string))
def make_transfer(self, night, project, ndir): """ Function to create the data transfer object """ self.active_transfer_count += 1 tdata = TransferData(self.client, self.config['src_ep'], self.config['dest_ep'], label='transfer %s_%s' % (night, project), sync_level="checksum", verify_checksum=True, preserve_timestamp=True) tdata.add_item(os.path.join(self.config['noao_root'], ndir), os.path.join(self.config['transfer_dir'], ndir), recursive=True) return tdata
def cp(self, src_ep_path, dst_ep_path, recursive=False): ''' copy file/path todo: support label, sync_level, etc? sync_level: ["exists", "size", "mtime", "checksum"] ''' tc = self.xfer_client sep, spath = self.ep_parts(src_ep_path) dep, dpath = self.ep_parts(dst_ep_path) td = TransferData(tc, sep, dep) td.add_item(spath, dpath, recursive=recursive) task_id = tc.submit_transfer(td)['task_id'] return self._wait(task_id)
def test_create_job(timer_client, start, interval): meta = load_response(timer_client.create_job).metadata transfer_client = TransferClient() transfer_client.get_submission_id = lambda *_0, **_1: {"value": "mock"} transfer_data = TransferData(transfer_client, GO_EP1_ID, GO_EP2_ID) timer_job = TimerJob.from_transfer_data(transfer_data, start, interval) response = timer_client.create_job(timer_job) assert response.http_status == 201 assert response.data["job_id"] == meta["job_id"] timer_job = TimerJob.from_transfer_data(dict(transfer_data), start, interval) response = timer_client.create_job(timer_job) assert response.http_status == 201 assert response.data["job_id"] == meta["job_id"] req_body = json.loads(get_last_request().body) if isinstance(start, datetime): assert req_body["start"] == start.isoformat() else: assert req_body["start"] == start if isinstance(interval, timedelta): assert req_body["interval"] == interval.total_seconds() else: assert req_body["interval"] == interval assert req_body["callback_url"] == slash_join(get_service_url("actions"), "/transfer/transfer/run")
def getTransferData(): cfg = load_config() client_id = cfg['globus']['apps']['SDK Tutorial App']['client_id'] auth_client = NativeAppAuthClient(client_id) refresh_token = cfg['globus']['apps']['SDK Tutorial App']['refresh_token'] source_endpoint_id = cfg['globus']['apps']['SDK Tutorial App'][ 'win10_endpoint_id'] destination_endpoint_id = cfg['globus']['apps']['SDK Tutorial App'][ 'sdccfed_endpoint_id'] authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token, auth_client=auth_client) tc = TransferClient(authorizer=authorizer) # as both endpoints are expected to be Globus Server endpoints, send auto-activate commands for both globus endpoints auto_activate_endpoint(tc, source_endpoint_id) auto_activate_endpoint(tc, destination_endpoint_id) # make job_label for task a timestamp x = datetime.now() job_label = x.strftime('%Y%m%d%H%M%s') # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source and destination files, # and only transfer files that have different checksums are transferred. verify_checksum=True means that after a file is transferred, Globus will # compute checksums on the source and destination files to verify that the file was transferred correctly. If the checksums do not match, it will # redo the transfer of that file. tdata = TransferData(tc, source_endpoint_id, destination_endpoint_id, label=job_label, sync_level="checksum", verify_checksum=True) return tdata
def main(): tokens = get_tokens() transfer = setup_transfer_client(tokens['transfer.api.globus.org']) try: task_data = load_data_from_file(DATA_FILE)['task'] task = transfer.get_task(task_data['task_id']) if task['status'] not in PREVIOUS_TASK_RUN_CASES: print('The last transfer status is {}, skipping run...'.format( task['status'])) sys.exit(1) except KeyError: # Ignore if there is no previous task pass check_endpoint_path(transfer, SOURCE_ENDPOINT, SOURCE_PATH) if CREATE_DESTINATION_FOLDER: create_destination_directory(transfer, DESTINATION_ENDPOINT, DESTINATION_PATH) else: check_endpoint_path(transfer, DESTINATION_ENDPOINT, DESTINATION_PATH) tdata = TransferData(transfer, SOURCE_ENDPOINT, DESTINATION_ENDPOINT, label=TRANSFER_LABEL, sync_level="checksum") tdata.add_item(SOURCE_PATH, DESTINATION_PATH, recursive=True) task = transfer.submit_transfer(tdata) save_data_to_file(DATA_FILE, 'task', task.data) print('Transfer has been started from\n {}:{}\nto\n {}:{}'.format( SOURCE_ENDPOINT, SOURCE_PATH, DESTINATION_ENDPOINT, DESTINATION_PATH)) url_string = 'https://globus.org/app/transfer?' + \ six.moves.urllib.parse.urlencode({ 'origin_id': SOURCE_ENDPOINT, 'origin_path': SOURCE_PATH, 'destination_id': DESTINATION_ENDPOINT, 'destination_path': DESTINATION_PATH }) print('Visit the link below to see the changes:\n{}'.format(url_string))
def make_transfer(self, path): """ Function to create the data transfer object """ self.active_transfer_count += 1 tdata = TransferData(self.client, self.config['src_ep'], self.config['dest_ep'], label='transfer %s'%(path), sync_level="checksum", verify_checksum=True, preserve_timestamp=True ) return tdata
class Transfer: ''' This class describes a Globus transfer task. ''' def __init__(self, source_id, dest_id, label=None, sync_level=None): self.source_id = auth.verify_uuid(source_id) self.dest_id = auth.verify_uuid(dest_id) self.transfer_client = get_transfer_client() self.transfer_data = TransferData(self.transfer_client, source_id, dest_id, label=label, sync_level=sync_level) def add_item(self, src_path, dst_path, recursive=False): self.transfer_data.add_item(src_path, dst_path, recursive=recursive) def submit(self, **kwargs): self.transfer_client.submit_transfer(self.transfer_data, **kwargs)
def run(self): logger.info(f"{self} - started") source = self.source destination = self.destination src_path = os.path.join(source.src_path, self.dataset) dst_path = destination.dst_path tc = GlobusTransfer.transfer_client td = TransferData(tc, source.uuid, destination.uuid) td.add_item(src_path, dst_path, recursive=True) try: task = tc.submit_transfer(td) request_time = datetime.utcnow() task_id = task.get("task_id") """ A Globus transfer job (task) can be in one of the three states: ACTIVE, SUCCEEDED, FAILED. The script every 60 seconds polls a status of the transfer job (task) from the Globus Transfer service, with 60 second timeout limit. If the task is ACTIVE after time runs out 'task_wait' returns False, and True otherwise. """ while not tc.task_wait(task_id, 60, 60): if datetime.utcnow() - request_time >= timedelta( seconds=GlobusTransfer.deadline): break task = tc.get_task(task_id) if task.get("is_paused"): break """ The Globus transfer job (task) has been finished (SUCCEEDED or FAILED), or is still active (ACTIVE). Check if the transfer SUCCEEDED or FAILED. """ task = tc.get_task(task_id) if task["status"] == "SUCCEEDED": bps = task.get("effective_bytes_per_second") rate = GlobusTransfer.convert_bps(bps) logger.info( f"Globus transfer {task_id}, from {source.uuid}{src_path} to {destination.uuid}{dst_path} succeeded" ) logger.info( "{} - files transferred: {}, bytes transferred: {}, effective transfer rate: {}, faults: {}" .format(self, task.get("files_transferred"), task.get("bytes_transferred"), rate, task.get("faults"))) faults = task.get("faults") message = None if faults > 0: message = self.get_error_events(tc, task_id) t = TransferModel(uuid=task_id, set=self.set, source=source.name, destination=destination.name, dataset=self.dataset, status=SUCCEEDED, rate=bps, message=message, faults=faults) elif task.get("status") == "ACTIVE": if task.get("is_paused"): pause_info = tc.task_pause_info(task_id) paused_rules = pause_info.get("pause_rules") reason = paused_rules[0].get("message") message = f"The task was paused. Reason: {reason}" status = PAUSED logger.info("{} - {}".format(self, message)) else: message = f"The task reached a {GlobusTransfer.deadline} second deadline\n" events = tc.task_event_list(task_id, num_results=5, filter="is_error:1") message += self.get_error_events(tc, task_id) status = DEADLINE logger.warning("{} - faults: {}, error: {}".format( self, task.get("faults"), message)) tc.cancel_task(task_id) t = TransferModel(uuid=task_id, set=self.set, source=source.name, destination=destination.name, dataset=self.dataset, status=status, message=message, faults=task.get("faults")) else: t = TransferModel(uuid=task_id, set=self.set, source=source.name, destination=destination.name, dataset=self.dataset, status=FAILED) except Exception as e: logger.error(f"{self} - exception: {e}") t = TransferModel(set=self.set, source=source.name, destination=destination.name, dataset=self.dataset, status=EXCEPTION, message=f"Globus SDK Exception: {e}") self.done = True GlobusTransfer.transfers2do -= 1 session = DBSession() session.add(t) session.commit() DBSession.remove() self.release() logger.info(f"{self} - finished")
def trigger_preparation(self, jobspec): # get logger tmpLog = core_utils.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID), method_name='trigger_preparation') tmpLog.debug('start') # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format( jobspec.computingSite)) # test we have a Globus Transfer Client if not self.tc: errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # get label label = self.make_label(jobspec) tmpLog.debug('label={0}'.format(label)) # get transfer tasks tmpStat, transferTasks = globus_utils.get_transfer_tasks( tmpLog, self.tc, label) if not tmpStat: errStr = 'failed to get transfer tasks' tmpLog.error(errStr) return False, errStr # check if already queued if label in transferTasks: tmpLog.debug('skip since already queued with {0}'.format( str(transferTasks[label]))) return True, '' # set the Globus destination Endpoint id and path will get them from Agis eventually from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) self.Globus_srcPath = queueConfig.preparator['Globus_srcPath'] self.srcEndpoint = queueConfig.preparator['srcEndpoint'] self.Globus_dstPath = self.basePath #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath'] self.dstEndpoint = queueConfig.preparator['dstEndpoint'] # get input files files = [] lfns = [] inFiles = jobspec.get_input_file_attributes(skip_ready=True) for inLFN, inFile in iteritems(inFiles): # set path to each file inFile['path'] = mover_utils.construct_file_path( self.basePath, inFile['scope'], inLFN) dstpath = inFile['path'] # check if path exists if not create it. if not os.access(self.basePath, os.F_OK): os.makedirs(self.basePath) # create the file paths for the Globus source and destination endpoints Globus_srcpath = mover_utils.construct_file_path( self.Globus_srcPath, inFile['scope'], inLFN) Globus_dstpath = mover_utils.construct_file_path( self.Globus_dstPath, inFile['scope'], inLFN) files.append({ 'scope': inFile['scope'], 'name': inLFN, 'Globus_dstPath': Globus_dstpath, 'Globus_srcPath': Globus_srcpath }) lfns.append(inLFN) tmpLog.debug('files[] {0}'.format(files)) try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errStr = '' if not tmpStatsrc: errStr += ' source Endpoint not activated ' if not tmpStatdst: errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) return False, errStr # both endpoints activated now prepare to transfer data if len(files) > 0: tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, label=label, sync_level="checksum") # loop over all input files and add for myfile in files: tdata.add_item(myfile['Globus_srcPath'], myfile['Globus_dstPath']) # submit transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] jobspec.set_groups_to_files( {transferID: { 'lfns': lfns, 'groupStatus': 'active' }}) tmpLog.debug('done') return True, '' else: return False, transfer_result['message'] # if no files to transfer return True return True, 'No files to transfer' except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) return errStat, {}
def run_agent(): # In[2]: search_client = globus_auth.login("https://search.api.globus.org/", "globus_search") transfer_client = transfer_auth.login() # In[3]: dataset_name = "pppdb" local_ep = "0bc1cb98-d2af-11e6-9cb1-22000a1e3b52" dest_ep = "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec" dest_path = "/sample_data/"+dataset_name+"_train.csv" timeout = False timeout_intervals = 10 interval_time = 10 verbose = True # In[4]: if not local_ep: pgr_res = transfer_client.endpoint_search(filter_scope="my-endpoints") ep_candidates = pgr_res.data if len(ep_candidates) < 1: #Nothing found raise GlobusError("Error: No local endpoints found") elif len(ep_candidates) == 1: #Exactly one candidate if ep_candidates[0]["gcp_connected"] == False: #Is GCP, is not on raise GlobusError("Error: Globus Connect is not running") else: #Is GCServer or GCP and connected local_ep = ep_candidates[0]["id"] else: # >1 found #Filter out disconnected GCP ep_connections = [candidate for candidate in ep_candidates if candidate["gcp_connected"] is not False] #Recheck list if len(ep_connections) < 1: #Nothing found raise GlobusError("Error: No local endpoints running") elif len(ep_connections) == 1: #Exactly one candidate if ep_connections[0]["gcp_connected"] == False: #Is GCP, is not on raise GlobusError("Error: Globus Connect is not active") else: #Is GCServer or GCP and connected local_ep = ep_connections[0]["id"] else: # >1 found #Prompt user print("Multiple endpoints found:") count = 0 for ep in ep_connections: count += 1 print(count, ": ", ep["display_name"], "\t", ep["id"]) print("\nPlease choose the endpoint on this machine") ep_num = 0 while ep_num == 0: usr_choice = input("Enter the number of the correct endpoint (-1 to cancel): ") try: ep_choice = int(usr_choice) if ep_choice == -1: #User wants to quit ep_num = -1 #Will break out of while to exit program elif ep_choice in range(1, count+1): #Valid selection ep_num = ep_choice #Break out of while, return valid ID else: #Invalid number print("Invalid selection") except: print("Invalid input") if ep_num == -1: print("Cancelling") sys.exit() local_ep = ep_connections[ep_num-1]["id"] # # Fetch and aggregate records into training set # In[5]: count = 0 num_processed = 0 data_list = [] while True: query = { "q": ("mdf_source_name:"+dataset_name+" AND mdf_node_type:record AND " "globus_scroll_id:(>=" + str(count) + " AND <" + str(count + 10000) + ")"), "advanced": True, "limit": 10000 } raw_res = search_client.structured_search(query) search_res = gmeta_pop(raw_res, True) for res in search_res: data_dict = json.loads(res["data"]["raw"]) data_list.append(data_dict) num_ret = len(search_res) if num_ret: num_processed += num_ret count += 10000 else: break if verbose: print("Processed:", len(data_list), "/", num_processed, "|", len(data_list) - num_processed) # In[6]: df = pd.DataFrame(data_list) df.to_csv(os.path.join(os.getcwd(), "temp_train.csv")) # # Upload to NCSA endpoint # In[7]: try: tdata = TransferData(transfer_client, local_ep, dest_ep, verify_checksum=True, notify_on_succeeded=False, notify_on_failed=False, notify_on_inactive=False) tdata.add_item(os.path.join(os.getcwd(), "temp_train.csv"), dest_path) res = transfer_client.submit_transfer(tdata) if res["code"] != "Accepted": raise GlobusError("Failed to transfer files: Transfer " + res["code"]) else: intervals = 0 while not transfer_client.task_wait(res["task_id"], timeout=interval_time, polling_interval=interval_time): for event in transfer_client.task_event_list(res["task_id"]): if event["is_error"]: transfer_client.cancel_task(res["task_id"]) raise GlobusError("Error: " + event["description"]) if timeout and intervals >= timeout_intervals: transfer_client.cancel_task(res["task_id"]) raise GlobusError("Transfer timed out.") intervals += 1 except Exception as e: raise finally: os.remove(os.path.join(os.getcwd(), "temp_train.csv")) # # Update dataset entry # In[8]: query = { "q": "mdf_source_name:"+dataset_name+" AND mdf_node_type:dataset", "advanced": True } raw_res = search_client.structured_search(query) search_res = gmeta_pop(raw_res) if len(search_res) != 1: raise ValueError("Incorrect number of results: " + str(len(search_res))) ingest = search_res[0] ingest["globus_subject"] = raw_res["gmeta"][0]["subject"] ingest["acl"] = ["public"] ingest["http://materialsdatafacility.org/#training_set"] = { "http://materialsdatafacility.org/#endpoint": dest_ep, "http://materialsdatafacility.org/#path": dest_path, "http://materialsdatafacility.org/#https": "https://data.materialsdatafacility.org" + dest_path } gmeta = format_gmeta([format_gmeta(ingest)]) gmeta = json.loads(json.dumps(gmeta).replace("mdf-publish.publication.community", "http://globus.org/publish-terms/#publication/community")) # In[9]: search_client.ingest(gmeta) # # Check ingest # In[10]: query = { "q": "mdf_source_name:"+dataset_name+" AND mdf_node_type:dataset", "advanced": True } raw_res = search_client.structured_search(query) search_res = gmeta_pop(raw_res, True) # In[11]: if verbose: print("Verification:\n", json.dumps(search_res[0]["training_set"], sort_keys=True, indent=4, separators=(',', ': ')))
on_refresh=update_tokens_file_on_refresh) try: tc = TransferClient(authorizer=authorizer) except: print( "ERROR: TransferClient() call failed! Unable to call the Globus transfer interface with the provided auth info!" ) sys.exit(-1) # print(transfer) # Now we should have auth, try setting up a transfer. tdata = TransferData(tc, source_endpoint_id, destination_endpoint_id, label="DCDE Relion transfer", sync_level="size") tdata.add_item(source_dir, dest_dir, recursive=True) transfer_result = tc.submit_transfer(tdata) print("task_id =", transfer_result["task_id"]) while not tc.task_wait( transfer_result['task_id'], timeout=1200, polling_interval=10): print(".", end="") print("\n{} completed!".format(transfer_result['task_id'])) os.listdir(path=dest_dir)
tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,tc,dstEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,tc,srcEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errStr = '' if not tmpStatsrc : errStr += ' source Endpoint not activated ' if not tmpStatdst : errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) sys.exit(2) # both endpoints activated now prepare to transfer data # We are sending test files from our destination machine to the source machine tdata = TransferData(tc,dstEndpoint,srcEndpoint,sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) sys.exit(1) # loop over the job id's creating various JobSpecs jobSpec_list = [] for job_id in range(begin_job_id,end_job_id+1): jobSpec = JobSpec() jobSpec.jobParams = { 'scopeLog': 'panda', 'logFile': 'log', } jobSpec.computingSite = queueName jobSpec.PandaID = job_id jobSpec.modificationTime = datetime.datetime.now()
def check_status(self, jobspec): # make logger tmpLog = self.make_logger(_logger, 'PandaID={0} ThreadID={1}'.format(jobspec.PandaID,threading.current_thread().ident), method_name='check_status') tmpLog.debug('start') # show the dummy transfer id and set to a value with the PandaID if needed. tmpLog.debug('self.dummy_transfer_id = {}'.format(self.dummy_transfer_id)) if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base,'XXXX') : old_dummy_transfer_id = self.dummy_transfer_id self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base,jobspec.PandaID) tmpLog.debug('Change self.dummy_transfer_id from {0} to {1}'.format(old_dummy_transfer_id,self.dummy_transfer_id)) # default return tmpRetVal = (True, '') # set flag if have db lock have_db_lock = False # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite)) # get the queueConfig and corresponding objStoreID_ES queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) # check queueConfig stager section to see if jobtype is set if 'jobtype' in queueConfig.stager: if queueConfig.stager['jobtype'] == "Yoda" : self.Yodajob = True # set the location of the files in fileSpec.objstoreID # see file /cvmfs/atlas.cern.ch/repo/sw/local/etc/agis_ddmendpoints.json self.objstoreID = int(queueConfig.stager['objStoreID_ES']) if self.Yodajob : self.pathConvention = int(queueConfig.stager['pathConvention']) tmpLog.debug('Yoda Job - PandaID = {0} objstoreID = {1} pathConvention ={2}'.format(jobspec.PandaID,self.objstoreID,self.pathConvention)) else: self.pathConvention = None tmpLog.debug('PandaID = {0} objstoreID = {1}'.format(jobspec.PandaID,self.objstoreID)) # test we have a Globus Transfer Client if not self.tc : errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # set transferID to None transferID = None # get the scope of the log files outfileattrib = jobspec.get_output_file_attributes() scopeLog = 'xxxx' for key in outfileattrib.keys(): if "log.tgz" in key : scopeLog = outfileattrib[key]['scope'] # get transfer groups groups = jobspec.get_groups_of_output_files() tmpLog.debug('jobspec.get_groups_of_output_files() = : {0}'.format(groups)) # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests for dummy_transferID in groups: # skip if valid transfer ID not dummy one if validate_transferid(dummy_transferID) : continue # lock for 120 sec tmpLog.debug('attempt to set DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) have_db_lock = self.dbInterface.get_object_lock(dummy_transferID, lock_interval=120) if not have_db_lock: # escape since locked by another thread msgStr = 'escape since locked by another thread' tmpLog.debug(msgStr) return None, msgStr # refresh group information since that could have been updated by another thread before getting the lock tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)') self.dbInterface.refresh_file_group_info(jobspec) # get transfer groups again with refreshed info tmpLog.debug('After db refresh call groups=jobspec.get_groups_of_output_files()') groups = jobspec.get_groups_of_output_files() tmpLog.debug('jobspec.get_groups_of_output_files() = : {0}'.format(groups)) # the dummy transfer ID is still there if dummy_transferID in groups: groupUpdateTime = groups[dummy_transferID]['groupUpdateTime'] # get files with the dummy transfer ID across jobs fileSpecs = self.dbInterface.get_files_with_group_id(dummy_transferID) # submit transfer if there are more than 10 files or the group was made before more than 10 min msgStr = 'dummy_transferID = {0} number of files = {1}'.format(dummy_transferID,len(fileSpecs)) tmpLog.debug(msgStr) if len(fileSpecs) >= 10 or \ groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10): tmpLog.debug('prepare to transfer files') # submit transfer and get a real transfer ID # set the Globus destination Endpoint id and path will get them from Agis eventually #self.Globus_srcPath = queueConfig.stager['Globus_srcPath'] self.srcEndpoint = queueConfig.stager['srcEndpoint'] self.Globus_srcPath = self.basePath self.Globus_dstPath = queueConfig.stager['Globus_dstPath'] self.dstEndpoint = queueConfig.stager['dstEndpoint'] # Test the endpoints and create the transfer data class errMsg = None try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errMsg = '' if not tmpStatsrc : errMsg += ' source Endpoint not activated ' if not tmpStatdst : errMsg += ' destination Endpoint not activated ' # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) self.have_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not self.have_db_lock: errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (None,errMsg) return tmpRetVal # both endpoints activated now prepare to transfer data tdata = None tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (errStat, errMsg) return tmpRetVal # loop over all files ifile = 0 for fileSpec in fileSpecs: logfile = False scope ='panda' if fileSpec.scope is not None : scope = fileSpec.scope # for Yoda job set the scope to transient for non log files if self.Yodajob : scope = 'transient' if fileSpec.fileType == "log" : logfile = True scope = scopeLog # only print to log file first 25 files if ifile < 25 : msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) if ifile == 25 : msgStr = "printed first 25 files skipping the rest".format(fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) srcURL = fileSpec.path dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) if logfile : tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL)) if ifile < 25 : tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL)) # add files to transfer object - tdata if os.access(srcURL, os.R_OK): if ifile < 25 : tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL)) tdata.add_item(srcURL,dstURL) else: errMsg = "source file {} does not exist".format(srcURL) # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (False,errMsg) return tmpRetVal ifile += 1 # submit transfer tmpLog.debug('Number of files to transfer - {}'.format(len(tdata['DATA']))) try: transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] tmpLog.debug('successfully submitted id={0}'.format(transferID)) # set status for files self.dbInterface.set_file_group(fileSpecs, transferID, 'running') msgStr = 'submitted transfer with ID={0}'.format(transferID) tmpLog.debug(msgStr) else: # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not release_db_lock: errMsg = 'Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (None, transfer_result['message']) return tmpRetVal except Exception as e: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) return errStat, errMsg else: msgStr = 'wait until enough files are pooled' tmpLog.debug(msgStr) # release the lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if release_db_lock: tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) have_db_lock = False else: msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(msgStr) # return None to retry later return None, msgStr # release the db lock if needed if have_db_lock: tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if release_db_lock: tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) have_db_lock = False else: msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(msgStr) return None, msgStr # check transfer with real transfer IDs # get transfer groups tmpLog.debug("groups = jobspec.get_groups_of_output_files()") groups = jobspec.get_groups_of_output_files() tmpLog.debug('Number of transfer groups - {0}'.format(len(groups))) tmpLog.debug('transfer groups any state - {0}'.format(groups)) if len(groups) == 0: tmpLog.debug("jobspec.get_groups_of_output_files(skip_done=True) returned no files ") tmpLog.debug("check_status return status - True ") return True,'' for transferID in groups: # allow only valid UUID if validate_transferid(transferID) : # get transfer task tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(tmpLog,self.tc,transferID) # return a temporary error when failed to get task if not tmpStat: errStr = 'failed to get transfer task; tc = %s; transferID = %s' % (str(self.tc),str(transferID)) tmpLog.error(errStr) return None, errStr # return a temporary error when task is missing if transferID not in transferTasks: errStr = 'transfer task ID - {} is missing'.format(transferID) tmpLog.error(errStr) return None, errStr # succeeded in finding a transfer task by tranferID if transferTasks[transferID]['status'] == 'SUCCEEDED': tmpLog.debug('transfer task {} succeeded'.format(transferID)) self.set_FileSpec_objstoreID(jobspec, self.objstoreID, self.pathConvention) if self.changeFileStatusOnSuccess: self.set_FileSpec_status(jobspec, 'finished') return True, '' # failed if transferTasks[transferID]['status'] == 'FAILED': errStr = 'transfer task {} failed'.format(transferID) tmpLog.error(errStr) self.set_FileSpec_status(jobspec,'failed') return False, errStr # another status tmpStr = 'transfer task {0} status: {1}'.format(transferID,transferTasks[transferID]['status']) tmpLog.debug(tmpStr) return None, '' # end of loop over transfer groups tmpLog.debug('End of loop over transfers groups - ending check_status function') return None,'no valid transfer id found'
def trigger_preparation(self, jobspec): # get logger tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID), method_name='trigger_preparation') tmpLog.debug('start') # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite)) # test we have a Globus Transfer Client if not self.tc : errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # get label label = self.make_label(jobspec) tmpLog.debug('label={0}'.format(label)) # get transfer tasks tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label) if not tmpStat: errStr = 'failed to get transfer tasks' tmpLog.error(errStr) return False, errStr # check if already queued if label in transferTasks: tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label]))) return True, '' # set the Globus destination Endpoint id and path will get them from Agis eventually from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) self.Globus_srcPath = queueConfig.preparator['Globus_srcPath'] self.srcEndpoint = queueConfig.preparator['srcEndpoint'] self.Globus_dstPath = self.basePath #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath'] self.dstEndpoint = queueConfig.preparator['dstEndpoint'] # get input files files = [] lfns = [] inFiles = jobspec.get_input_file_attributes(skip_ready=True) for inLFN, inFile in iteritems(inFiles): # set path to each file inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN) dstpath = inFile['path'] # check if path exists if not create it. if not os.access(self.basePath, os.F_OK): os.makedirs(self.basePath) # create the file paths for the Globus source and destination endpoints Globus_srcpath = mover_utils.construct_file_path(self.Globus_srcPath, inFile['scope'], inLFN) Globus_dstpath = mover_utils.construct_file_path(self.Globus_dstPath, inFile['scope'], inLFN) files.append({'scope': inFile['scope'], 'name': inLFN, 'Globus_dstPath': Globus_dstpath, 'Globus_srcPath': Globus_srcpath}) lfns.append(inLFN) tmpLog.debug('files[] {0}'.format(files)) try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errStr = '' if not tmpStatsrc : errStr += ' source Endpoint not activated ' if not tmpStatdst : errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) return False,errStr # both endpoints activated now prepare to transfer data if len(files) > 0: tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, label=label, sync_level="checksum") # loop over all input files and add for myfile in files: tdata.add_item(myfile['Globus_srcPath'],myfile['Globus_dstPath']) # submit transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}}) tmpLog.debug('done') return True,'' else: return False,transfer_result['message'] # if no files to transfer return True return True, 'No files to transfer' except: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) return errStat, {}
def check_stage_out_status(self, jobspec): # make logger tmpLog = self.make_logger(_logger, 'PandaID={0} ThreadID={1}'.format( jobspec.PandaID, threading.current_thread().ident), method_name='check_stage_out_status') tmpLog.debug('start') # show the dummy transfer id and set to a value with the PandaID if needed. tmpLog.debug('self.dummy_transfer_id = {}'.format( self.dummy_transfer_id)) if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base, 'XXXX'): old_dummy_transfer_id = self.dummy_transfer_id self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base, jobspec.PandaID) tmpLog.debug( 'Change self.dummy_transfer_id from {0} to {1}'.format( old_dummy_transfer_id, self.dummy_transfer_id)) # default return tmpRetVal = (True, '') # set flag if have db lock have_db_lock = False # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format( jobspec.computingSite)) # get the queueConfig and corresponding objStoreID_ES queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) # check queueConfig stager section to see if jobtype is set if 'jobtype' in queueConfig.stager: if queueConfig.stager['jobtype'] == "Yoda": self.Yodajob = True # set the location of the files in fileSpec.objstoreID # see file /cvmfs/atlas.cern.ch/repo/sw/local/etc/agis_ddmendpoints.json self.objstoreID = int(queueConfig.stager['objStoreID_ES']) if self.Yodajob: self.pathConvention = int(queueConfig.stager['pathConvention']) tmpLog.debug( 'Yoda Job - PandaID = {0} objstoreID = {1} pathConvention ={2}' .format(jobspec.PandaID, self.objstoreID, self.pathConvention)) else: self.pathConvention = None tmpLog.debug('PandaID = {0} objstoreID = {1}'.format( jobspec.PandaID, self.objstoreID)) # test we have a Globus Transfer Client if not self.tc: errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # set transferID to None transferID = None # get the scope of the log files outfileattrib = jobspec.get_output_file_attributes() scopeLog = 'xxxx' for key in outfileattrib.keys(): if "log.tgz" in key: scopeLog = outfileattrib[key]['scope'] # get transfer groups groups = jobspec.get_groups_of_output_files() tmpLog.debug( 'jobspec.get_groups_of_output_files() = : {0}'.format(groups)) # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests for dummy_transferID in groups: # skip if valid transfer ID not dummy one if validate_transferid(dummy_transferID): continue # lock for 120 sec tmpLog.debug( 'attempt to set DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) have_db_lock = self.dbInterface.get_object_lock(dummy_transferID, lock_interval=120) if not have_db_lock: # escape since locked by another thread msgStr = 'escape since locked by another thread' tmpLog.debug(msgStr) return None, msgStr # refresh group information since that could have been updated by another thread before getting the lock tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)') self.dbInterface.refresh_file_group_info(jobspec) # get transfer groups again with refreshed info tmpLog.debug( 'After db refresh call groups=jobspec.get_groups_of_output_files()' ) groups = jobspec.get_groups_of_output_files() tmpLog.debug( 'jobspec.get_groups_of_output_files() = : {0}'.format(groups)) # the dummy transfer ID is still there if dummy_transferID in groups: groupUpdateTime = groups[dummy_transferID]['groupUpdateTime'] # get files with the dummy transfer ID across jobs fileSpecs = self.dbInterface.get_files_with_group_id( dummy_transferID) # submit transfer if there are more than 10 files or the group was made before more than 10 min msgStr = 'dummy_transferID = {0} number of files = {1}'.format( dummy_transferID, len(fileSpecs)) tmpLog.debug(msgStr) if len(fileSpecs) >= 10 or \ groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10): tmpLog.debug('prepare to transfer files') # submit transfer and get a real transfer ID # set the Globus destination Endpoint id and path will get them from Agis eventually #self.Globus_srcPath = queueConfig.stager['Globus_srcPath'] self.srcEndpoint = queueConfig.stager['srcEndpoint'] self.Globus_srcPath = self.basePath self.Globus_dstPath = queueConfig.stager['Globus_dstPath'] self.dstEndpoint = queueConfig.stager['dstEndpoint'] # Test the endpoints and create the transfer data class errMsg = None try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errMsg = '' if not tmpStatsrc: errMsg += ' source Endpoint not activated ' if not tmpStatdst: errMsg += ' destination Endpoint not activated ' # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) self.have_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not self.have_db_lock: errMsg += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (None, errMsg) return tmpRetVal # both endpoints activated now prepare to transfer data tdata = None tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception( tmpLog) # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (errStat, errMsg) return tmpRetVal # loop over all files ifile = 0 for fileSpec in fileSpecs: logfile = False scope = 'panda' if fileSpec.scope is not None: scope = fileSpec.scope # for Yoda job set the scope to transient for non log files if self.Yodajob: scope = 'transient' if fileSpec.fileType == "log": logfile = True scope = scopeLog # only print to log file first 25 files if ifile < 25: msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format( fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) if ifile == 25: msgStr = "printed first 25 files skipping the rest".format( fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) srcURL = fileSpec.path dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format( endPoint=self.Globus_dstPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) if logfile: tmpLog.debug('src={srcURL} dst={dstURL}'.format( srcURL=srcURL, dstURL=dstURL)) if ifile < 25: tmpLog.debug('src={srcURL} dst={dstURL}'.format( srcURL=srcURL, dstURL=dstURL)) # add files to transfer object - tdata if os.access(srcURL, os.R_OK): if ifile < 25: tmpLog.debug("tdata.add_item({},{})".format( srcURL, dstURL)) tdata.add_item(srcURL, dstURL) else: errMsg = "source file {} does not exist".format( srcURL) # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (False, errMsg) return tmpRetVal ifile += 1 # submit transfer tmpLog.debug('Number of files to transfer - {}'.format( len(tdata['DATA']))) try: transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] tmpLog.debug( 'successfully submitted id={0}'.format( transferID)) # set status for files self.dbInterface.set_file_group( fileSpecs, transferID, 'running') msgStr = 'submitted transfer with ID={0}'.format( transferID) tmpLog.debug(msgStr) else: # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not release_db_lock: errMsg = 'Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (None, transfer_result['message']) return tmpRetVal except Exception as e: errStat, errMsg = globus_utils.handle_globus_exception( tmpLog) # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) return errStat, errMsg else: msgStr = 'wait until enough files are pooled' tmpLog.debug(msgStr) # release the lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if release_db_lock: tmpLog.debug( 'released DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) have_db_lock = False else: msgStr += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(msgStr) # return None to retry later return None, msgStr # release the db lock if needed if have_db_lock: tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if release_db_lock: tmpLog.debug( 'released DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) have_db_lock = False else: msgStr += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(msgStr) return None, msgStr # check transfer with real transfer IDs # get transfer groups tmpLog.debug("groups = jobspec.get_groups_of_output_files()") groups = jobspec.get_groups_of_output_files() tmpLog.debug('Number of transfer groups - {0}'.format(len(groups))) tmpLog.debug('transfer groups any state - {0}'.format(groups)) if len(groups) == 0: tmpLog.debug( "jobspec.get_groups_of_output_files(skip_done=True) returned no files " ) tmpLog.debug("check_stage_out_status return status - True ") return True, '' for transferID in groups: # allow only valid UUID if validate_transferid(transferID): # get transfer task tmpStat, transferTasks = globus_utils.get_transfer_task_by_id( tmpLog, self.tc, transferID) # return a temporary error when failed to get task if not tmpStat: errStr = 'failed to get transfer task; tc = %s; transferID = %s' % ( str(self.tc), str(transferID)) tmpLog.error(errStr) return None, errStr # return a temporary error when task is missing if transferID not in transferTasks: errStr = 'transfer task ID - {} is missing'.format( transferID) tmpLog.error(errStr) return None, errStr # succeeded in finding a transfer task by tranferID if transferTasks[transferID]['status'] == 'SUCCEEDED': tmpLog.debug( 'transfer task {} succeeded'.format(transferID)) self.set_FileSpec_objstoreID(jobspec, self.objstoreID, self.pathConvention) if self.changeFileStatusOnSuccess: self.set_FileSpec_status(jobspec, 'finished') return True, '' # failed if transferTasks[transferID]['status'] == 'FAILED': errStr = 'transfer task {} failed'.format(transferID) tmpLog.error(errStr) self.set_FileSpec_status(jobspec, 'failed') return False, errStr # another status tmpStr = 'transfer task {0} status: {1}'.format( transferID, transferTasks[transferID]['status']) tmpLog.debug(tmpStr) return None, '' # end of loop over transfer groups tmpLog.debug( 'End of loop over transfers groups - ending check_stage_out_status function' ) return None, 'no valid transfer id found'
def stage_upload_files(self, project_id, inbound_endpoint_id, inbound_endpoint_path): if not self.transfer_client: error = "Missing authenticated transfer client" self.log.info("Error: " + str(error)) raise AuthenticationException(error) conn = DbConnection().connection() r = DbConnection().interface() proj = r.table('projects').get(project_id).run(conn) if not proj: error = "Unable to find project, " + project_id self.log.info("Error: " + str(error)) raise NoSuchItem(error) if not proj['owner'] == self.mc_user_id: error = "Current user is not project owner, " + self.mc_user_id + ", " + project_id self.log.info("Error: " + str(error)) raise AccessNotAllowedException(error) transfer = self.transfer_client self.log.info( "Starting upload staging... function: stage_upload_files(inbound_endpoint_id)" ) self.log.info("Materials Commons user = "******"Globus transfer endpoint uuid = " + inbound_endpoint_id) # confirm target and inbound endpoints target_endpoint = transfer.get_endpoint(self.mc_target_ep_id) inbound_endpoint = transfer.get_endpoint(inbound_endpoint_id) if not target_endpoint: error = "Missing target endpoint, Materials Commons staging" self.log.info("Error: " + str(error)) raise NoSuchItem(error) if not inbound_endpoint: error = "Missing inbound endpoint, user's input for staging" self.log.info("Error: " + str(error)) raise NoSuchItem(error) target_endpoint_id = target_endpoint['id'] self.log.info("About to confirm inbound path: " + inbound_endpoint_path) # confirm inbound path try: transfer.operation_ls(inbound_endpoint_id, path=inbound_endpoint_path) except TransferAPIError as error: self.log.info("Error: " + str(error)) raise error self.log.info("Finished confirm of inbound path: " + inbound_endpoint_path) # database entries and one-time-directory on target dir_name = "transfer-" + project_id response = transfer.operation_mkdir(target_endpoint_id, dir_name) if not response["code"] == "DirectoryCreated": error = "Unable to create directory on target endpoint " + dir_name self.log.info("Error: " + str(error)) raise TransferAPIError(error) self.log.info("Found for target endpoint: " + target_endpoint['display_name']) self.log.info(" - target endpoint id " + target_endpoint_id) self.log.info("Found inbound endpoint: " + inbound_endpoint['display_name'] + " from " + inbound_endpoint["owner_string"]) self.log.info("Initiating transfer to target directory: " + dir_name) # initiate transfer transfer_label = "Transfer from " + inbound_endpoint['display_name'] + \ "Materials Commons" transfer_data = TransferData(transfer, inbound_endpoint_id, target_endpoint_id, label=transfer_label, sync_level="checksum") transfer_data.add_item(inbound_endpoint_path, "/" + dir_name, recursive=True) transfer_result = transfer.submit_transfer(transfer_data) self.log.info("Finished upload staging: successfully completed") return_result = {} keys = ["code", "message", "task_id", "submission_id"] for key in keys: return_result[key] = transfer_result[key] return return_result
def trigger_stage_out(self, jobspec): # make logger tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID), method_name='trigger_stage_out') tmpLog.debug('start') # default return tmpRetVal = (True, '') # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite)) # test we have a Globus Transfer Client if not self.tc : errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # get label label = self.make_label(jobspec) tmpLog.debug('label={0}'.format(label)) # get transfer tasks tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label) if not tmpStat: errStr = 'failed to get transfer tasks' tmpLog.error(errStr) return False, errStr # check if already queued if label in transferTasks: tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label]))) return True, '' # set the Globus destination Endpoint id and path will get them from Agis eventually from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) #self.Globus_srcPath = queueConfig.stager['Globus_srcPath'] self.srcEndpoint = queueConfig.stager['srcEndpoint'] self.Globus_srcPath = self.basePath self.Globus_dstPath = queueConfig.stager['Globus_dstPath'] self.dstEndpoint = queueConfig.stager['dstEndpoint'] # Test the endpoints and create the transfer data class errMsg = None try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errMsg = '' if not tmpStatsrc : errMsg += ' source Endpoint not activated ' if not tmpStatdst : errMsg += ' destination Endpoint not activated ' tmpLog.error(errMsg) tmpRetVal = (False,errMsg) return tmpRetVal # both endpoints activated now prepare to transfer data tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, label=label, sync_level="checksum") except: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) tmpRetVal = (errStat, errMsg) return tmpRetVal # loop over all files fileAttrs = jobspec.get_output_file_attributes() lfns = [] for fileSpec in jobspec.outFiles: scope = fileAttrs[fileSpec.lfn]['scope'] hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) srcURL = fileSpec.path dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL)) # add files to transfer object - tdata if os.access(srcURL, os.R_OK): tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL)) tdata.add_item(srcURL,dstURL) lfns.append(fileSpec.lfn) else: errMsg = "source file {} does not exist".format(srcURL) tmpLog.error(errMsg) tmpRetVal = (False,errMsg) return tmpRetVal # submit transfer try: transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] tmpLog.debug('successfully submitted id={0}'.format(transferID)) jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}}) # set for fileSpec in jobspec.outFiles: if fileSpec.fileAttributes == None: fileSpec.fileAttributes = {} fileSpec.fileAttributes['transferID'] = transferID else: tmpRetVal = (False, transfer_result['message']) except Exception as e: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) if errMsg is None: errtype, errvalue = sys.exc_info()[:2] errMsg = "{0} {1}".format(errtype.__name__, errvalue) tmpRetVal = (errStat,errMsg) # return tmpLog.debug('done') return tmpRetVal
tmpStatdst, dstStr = globus_utils.check_endpoint_activation( tmpLog, tc, srcEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errStr = '' if not tmpStatsrc: errStr += ' source Endpoint not activated ' if not tmpStatdst: errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) sys.exit(2) # We are sending test files from our destination machine to the source machine # both endpoints activated now prepare to transfer data tdata = TransferData(tc, dstEndpoint, srcEndpoint, sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) sys.exit(1) # create JobSpec jobSpec = JobSpec() jobSpec.jobParams = { 'scopeLog': 'panda', 'logFile': 'log', } jobSpec.computingSite = queueName jobSpec.PandaID = job_id jobSpec.modificationTime = datetime.datetime.now() realDataset = 'panda.sgotest.' + uuid.uuid4().hex ddmEndPointIn = 'BNL-OSG2_DATADISK'
def transfer_command( batch, sync_level, recursive, destination, source, label, preserve_mtime, verify_checksum, encrypt, submission_id, dry_run, delete, deadline, skip_activation_check, notify, perf_cc, perf_p, perf_pp, perf_udt, ): """ Executor for `globus transfer` """ source_endpoint, cmd_source_path = source dest_endpoint, cmd_dest_path = destination if recursive and batch: raise click.UsageError( ("You cannot use --recursive in addition to --batch. " "Instead, use --recursive on lines of --batch input " "which need it")) if (cmd_source_path is None or cmd_dest_path is None) and (not batch): raise click.UsageError( ("transfer requires either SOURCE_PATH and DEST_PATH or " "--batch")) # because python can't handle multiple **kwargs expansions in a single # call, we need to get a little bit clever # both the performance options (of which there are a few), and the # notification options (also there are a few) have elements which should be # omitted in some cases # notify comes to us clean, perf opts need more care # put them together into a dict before passing to TransferData kwargs = {} perf_opts = dict((k, v) for (k, v) in dict( perf_cc=perf_cc, perf_p=perf_p, perf_pp=perf_pp, perf_udt=perf_udt).items() if v is not None) kwargs.update(perf_opts) kwargs.update(notify) client = get_client() transfer_data = TransferData(client, source_endpoint, dest_endpoint, label=label, sync_level=sync_level, verify_checksum=verify_checksum, preserve_timestamp=preserve_mtime, encrypt_data=encrypt, submission_id=submission_id, delete_destination_extra=delete, deadline=deadline, skip_activation_check=skip_activation_check, **kwargs) if batch: @click.command() @click.option("--recursive", "-r", is_flag=True) @click.argument("source_path", type=TaskPath(base_dir=cmd_source_path)) @click.argument("dest_path", type=TaskPath(base_dir=cmd_dest_path)) def process_batch_line(dest_path, source_path, recursive): """ Parse a line of batch input and turn it into a transfer submission item. """ transfer_data.add_item(str(source_path), str(dest_path), recursive=recursive) shlex_process_stdin( process_batch_line, ("Enter transfers, line by line, as\n\n" " [--recursive] SOURCE_PATH DEST_PATH\n"), ) else: transfer_data.add_item(cmd_source_path, cmd_dest_path, recursive=recursive) if dry_run: formatted_print( transfer_data, response_key="DATA", fields=( ("Source Path", "source_path"), ("Dest Path", "destination_path"), ("Recursive", "recursive"), ), ) # exit safely return # autoactivate after parsing all args and putting things together # skip this if skip-activation-check is given if not skip_activation_check: autoactivate(client, source_endpoint, if_expires_in=60) autoactivate(client, dest_endpoint, if_expires_in=60) res = client.submit_transfer(transfer_data) formatted_print( res, text_format=FORMAT_TEXT_RECORD, fields=(("Message", "message"), ("Task ID", "task_id")), )
def transfer_command( batch, sync_level, recursive, destination, source, label, preserve_mtime, verify_checksum, encrypt, submission_id, dry_run, delete, deadline, skip_activation_check, notify, perf_cc, perf_p, perf_pp, perf_udt, ): """ Executor for `globus transfer` """ source_endpoint, cmd_source_path = source dest_endpoint, cmd_dest_path = destination if recursive and batch: raise click.UsageError( ( "You cannot use --recursive in addition to --batch. " "Instead, use --recursive on lines of --batch input " "which need it" ) ) if (cmd_source_path is None or cmd_dest_path is None) and (not batch): raise click.UsageError( ("transfer requires either SOURCE_PATH and DEST_PATH or " "--batch") ) # because python can't handle multiple **kwargs expansions in a single # call, we need to get a little bit clever # both the performance options (of which there are a few), and the # notification options (also there are a few) have elements which should be # omitted in some cases # notify comes to us clean, perf opts need more care # put them together into a dict before passing to TransferData kwargs = {} perf_opts = dict( (k, v) for (k, v) in dict( perf_cc=perf_cc, perf_p=perf_p, perf_pp=perf_pp, perf_udt=perf_udt ).items() if v is not None ) kwargs.update(perf_opts) kwargs.update(notify) client = get_client() transfer_data = TransferData( client, source_endpoint, dest_endpoint, label=label, sync_level=sync_level, verify_checksum=verify_checksum, preserve_timestamp=preserve_mtime, encrypt_data=encrypt, submission_id=submission_id, delete_destination_extra=delete, deadline=deadline, skip_activation_check=skip_activation_check, **kwargs ) if batch: @click.command() @click.option("--recursive", "-r", is_flag=True) @click.argument("source_path", type=TaskPath(base_dir=cmd_source_path)) @click.argument("dest_path", type=TaskPath(base_dir=cmd_dest_path)) def process_batch_line(dest_path, source_path, recursive): """ Parse a line of batch input and turn it into a transfer submission item. """ transfer_data.add_item( str(source_path), str(dest_path), recursive=recursive ) shlex_process_stdin( process_batch_line, ( "Enter transfers, line by line, as\n\n" " [--recursive] SOURCE_PATH DEST_PATH\n" ), ) else: transfer_data.add_item(cmd_source_path, cmd_dest_path, recursive=recursive) if dry_run: formatted_print( transfer_data, response_key="DATA", fields=( ("Source Path", "source_path"), ("Dest Path", "destination_path"), ("Recursive", "recursive"), ), ) # exit safely return # autoactivate after parsing all args and putting things together # skip this if skip-activation-check is given if not skip_activation_check: autoactivate(client, source_endpoint, if_expires_in=60) autoactivate(client, dest_endpoint, if_expires_in=60) res = client.submit_transfer(transfer_data) formatted_print( res, text_format=FORMAT_TEXT_RECORD, fields=(("Message", "message"), ("Task ID", "task_id")), )