def globus_get_folders(client: globus_sdk.TransferClient, endpoint_id: str, remote_path: str) -> Optional[tuple]: """Returns a list of files on the endpoint path that match the dates provided Arguments: client: the Globus transfer client to use endpoint_id: the ID of the endpoint to access remote_path: the remote path to search Return: Returns a list of found sub folders """ base_path = os.path.join('/-', remote_path) return_paths = [] try: path_contents = client.operation_ls(endpoint_id, path=base_path) except globus_sdk.exc.TransferAPIError: logging.error( "Continuing after TransferAPIError Exception caught for: '%s'", base_path) return None for one_entry in path_contents: if one_entry['type'] == 'dir': sub_folder = os.path.join(base_path, one_entry['name']) logging.debug("Globus remote sub folder: %s", sub_folder) return_paths.append(sub_folder) return tuple(return_paths)
def browse(dataset_id=None, endpoint_id=None, endpoint_path=None): """ - Get list of files for the selected dataset or endpoint ID/path - Return a list of files to a browse view The target template (browse.jinja2) expects an `endpoint_uri` (if available for the endpoint), `target` (either `"dataset"` or `"endpoint"`), and 'file_list' (list of dictionaries) containing the following information about each file in the result: {'name': 'file name', 'size': 'file size', 'id': 'file uri/path'} If you want to display additional information about each file, you must add those keys to the dictionary and modify the browse.jinja2 template accordingly. """ assert bool(dataset_id) != bool(endpoint_id and endpoint_path) if dataset_id: try: dataset = next(ds for ds in datasets if ds['id'] == dataset_id) except StopIteration: abort(404) endpoint_id = app.config['DATASET_ENDPOINT_ID'] endpoint_path = app.config['DATASET_ENDPOINT_BASE'] + dataset['path'] else: endpoint_path = '/' + endpoint_path transfer = TransferClient(authorizer=RefreshTokenAuthorizer( session['tokens']['transfer.api.globus.org']['refresh_token'], load_portal_client())) try: transfer.endpoint_autoactivate(endpoint_id) listing = transfer.operation_ls(endpoint_id, path=endpoint_path) except TransferAPIError as err: flash('Error [{}]: {}'.format(err.code, err.message)) return redirect(url_for('transfer')) file_list = [e for e in listing if e['type'] == 'file'] ep = transfer.get_endpoint(endpoint_id) https_server = ep['https_server'] endpoint_uri = https_server + endpoint_path if https_server else None webapp_xfer = 'https://www.globus.org/app/transfer?' + \ urlencode(dict(origin_id=endpoint_id, origin_path=endpoint_path)) return render_template( 'browse.jinja2', endpoint_uri=endpoint_uri, target="dataset" if dataset_id else "endpoint", description=(dataset['name'] if dataset_id else ep['display_name']), file_list=file_list, webapp_xfer=webapp_xfer)
def query_files(client: globus_sdk.TransferClient, endpoint_id: str, folders: tuple, extensions: tuple, exclude_parts: tuple) -> Optional[tuple]: """Returns a list of files on the endpoint path that match the dates provided Arguments: client: the Globus transfer client to use endpoint_id: the ID of the endpoint to access folders: a list of folders to search within (search is 1 deep) extensions: a list of acceptable filename extensions (can be wildcard '*') Return: Returns a list of acceptable files with the extension(s) """ get_input = getattr(__builtins__, 'raw_input', input) found_files = [] check_ext = [e.lstrip('.') for e in extensions] out_file = open(os.path.join(LOCAL_SAVE_PATH, 'file_list.txt'), 'w') for one_folder in folders: cur_path = os.path.join('/-', one_folder) logging.debug("Globus files path: %s", cur_path) try: path_contents = client.operation_ls(endpoint_id, path=cur_path) except globus_sdk.exc.TransferAPIError: logging.error( "Continuing after TransferAPIError Exception caught for: '%s'", cur_path) continue matches = [] for one_entry in path_contents: if one_entry['type'] != 'dir': file_path = os.path.join(cur_path, one_entry['name']) logging.debug("Globus remote file path: %s", file_path) # Get the format of the file (aka: its extension) file_format = os.path.splitext(one_entry['name'])[1] if file_format: file_format = file_format.lstrip('.') # Check if it's included if file_format not in check_ext: logging.debug( " remote file doesn't match extension: %s %s", os.path.basename(file_path), check_ext) continue if exclude_parts: found_exclude = False for part in exclude_parts: if part in one_entry['name']: found_exclude = True break if found_exclude: logging.warning( " remote file name includes an excluded term: %s %s", one_entry['name'], exclude_parts) continue matches.append(file_path) if matches: done = False while not done: print("Remote folder", one_folder) print("Please select file to download:") print(0, ".", "None") idx = 1 for one_match in matches: print(idx, ".", os.path.basename(one_match)) idx += 1 sel_file = get_input( 'Enter the number associated with file: ').strip() sel_idx = int(sel_file) if sel_idx > 0: if sel_idx <= len(matches): logging.debug(" file index %s selected", sel_idx) found_files.append(matches[sel_idx - 1]) out_file.write(matches[sel_idx - 1] + '\n') done = True else: print("Entered value is out of range: %s %d", sel_file, sel_idx) elif sel_idx == 0: print("Skipping folder") done = True else: print("Invalid entry") if not done: print("Please try again") print("-") print("-") out_file.close() print("Done searching for files to download: found", len(found_files), "files") return tuple(found_files)
class GlobusStorageManager: # https://globus-sdk-python.readthedocs.io/en/stable/clients/transfer/ app_id = 'b2fe5703-edb0-4f7f-80a6-2147c8ae35f0' # map transfer app id class GlobusQueue: ''' placeholder for globus async helpers ''' pass def __init__(self): self.auth_client = NativeAppAuthClient(self.app_id) self.auth_client.oauth2_start_flow(refresh_tokens=True) self.xfer_client = None custom = dj.config.get('custom', None) if custom and 'globus.token' in custom: self.refresh() else: self.login() # authentication methods def login(self): ''' fetch refresh token, store in dj.config['globus.token'] ''' auth_client = self.auth_client print('Please login via: {}'.format( auth_client.oauth2_get_authorize_url())) code = input('and enter code:').strip() tokens = auth_client.oauth2_exchange_code_for_tokens(code) xfer_auth_cfg = tokens.by_resource_server['transfer.api.globus.org'] xfer_rt = xfer_auth_cfg['refresh_token'] xfer_at = xfer_auth_cfg['access_token'] xfer_exp = xfer_auth_cfg['expires_at_seconds'] xfer_auth = RefreshTokenAuthorizer( xfer_rt, auth_client, access_token=xfer_at, expires_at=xfer_exp) self.xfer_client = TransferClient(authorizer=xfer_auth) custom = dj.config.get('custom', {}) custom['globus.token'] = xfer_rt dj.config['custom'] = custom def refresh(self): ''' use refresh token to refresh access token ''' auth_client = self.auth_client xfer_auth = RefreshTokenAuthorizer( dj.config['custom']['globus.token'], auth_client, access_token=None, expires_at=None) self.xfer_client = TransferClient(authorizer=xfer_auth) # endpoint managment / utility methods @classmethod def ep_parts(cls, endpoint_path): # split endpoint:/path to endpoint, path epsplit = endpoint_path.split(':') return epsplit[0], ':'.join(epsplit[1:]) def activate_endpoint(self, endpoint): ''' activate an endpoint ''' tc = self.xfer_client r = tc.endpoint_autoactivate(endpoint, if_expires_in=3600) log.debug('activate_endpoint() code: {}'.format(r['code'])) if r['code'] == 'AutoActivationFailed': print('Endpoint({}) Not Active! Error! Source message: {}' .format(endpoint, r['message'])) raise Exception('globus endpoint activation failure') knownok = any(('AutoActivated' in r['code'], 'AlreadyActivated' in r['code'])) if not knownok: log.debug('activate_endpoint(): not knownok response') def _wait(self, task, timeout=10, polling_interval=10): ''' tranfer client common wait wrapper ''' return self.xfer_client.task_wait(task, timeout, polling_interval) def _tasks(self): ''' >>> tl = tc.task_list(num_results=25, filter="type:TRANSFER,DELETE") >>> _ = [print(t["task_id"], t["type"], t["status"]) for t in tl] ''' pass def _task_info(self): ''' >>> for event in tc.task_event_list(task_id): >>> print("Event on Task({}) at {}:\n{}".format( >>> task_id, event["time"], event["description"]) or get_task ''' pass # transfer methods def ls(self, endpoint_path): ''' returns: { "DATA": [ { "DATA_TYPE": "file", "group": "staff", "last_modified": "2018-05-22 18:49:19+00:00", "link_group": null, "link_last_modified": null, "link_size": null, "link_target": null, "link_user": null, "name": "map", "permissions": "0755", "size": 102, "type": "dir", "user": "******" }, ], "DATA_TYPE": "file_list", "absolute_path": null, "endpoint": "aa4e5f9c-05f3-11e8-a6ad-0a448319c2f8", "length": 2, "path": "/~/Globus/", "rename_supported": true, "symlink_supported": false, "total": 2 } ''' ep, path = self.ep_parts(endpoint_path) return self.xfer_client.operation_ls(ep, path=path) def mkdir(self, ep_path): ''' create a directory at ep_path ''' ep, path = self.ep_parts(ep_path) return self.xfer_client.operation_mkdir(ep, path=path) def rmdir(self, ep_path, recursive=False): ''' remove a directory at ep_path ''' tc = self.xfer_client ep, path = self.ep_parts(ep_path) ddata = DeleteData(tc, ep, recursive=recursive) ddata.add_item(path) task_id = tc.submit_delete(ddata)['task_id'] return self._wait(task_id) def cp(self, src_ep_path, dst_ep_path, recursive=False): ''' copy file/path todo: support label, sync_level, etc? sync_level: ["exists", "size", "mtime", "checksum"] ''' tc = self.xfer_client sep, spath = self.ep_parts(src_ep_path) dep, dpath = self.ep_parts(dst_ep_path) td = TransferData(tc, sep, dep) td.add_item(spath, dpath, recursive=recursive) task_id = tc.submit_transfer(td)['task_id'] return self._wait(task_id) def rename(self, src_ep_path, dst_ep_path): ''' rename a file/path ''' tc = self.xfer_client sep, spath = self.ep_parts(src_ep_path) dep, dpath = self.ep_parts(dst_ep_path) if sep != dep: raise Exception('rename between two different endpoints') return tc.operation_rename(sep, spath, dpath)
def query_files(client: globus_sdk.TransferClient, endpoint_id: str, folders: tuple, extensions: tuple, include_parts: tuple) -> Optional[tuple]: """Returns a list of files on the endpoint path that match the dates provided Arguments: client: the Globus transfer client to use endpoint_id: the ID of the endpoint to access folders: a list of folders to search within (search is 1 deep) extensions: a list of acceptable filename extensions (can be wildcard '*') include_parts: the file name fragments for inclusion Return: Returns a list of acceptable files with the extension(s) """ found_files = [] check_ext = [e.lstrip('.') for e in extensions] out_file = open(os.path.join(LOCAL_SAVE_PATH, 'file_10pct.txt'), 'w') for one_folder in folders: cur_path = os.path.join('/-', one_folder) logging.debug("Globus files path: %s", cur_path) try: path_contents = client.operation_ls(endpoint_id, path=cur_path) except globus_sdk.exc.TransferAPIError: logging.error( "Continuing after TransferAPIError Exception caught for: '%s'", cur_path) continue matches = [] for one_entry in path_contents: if one_entry['type'] != 'dir': file_path = os.path.join(cur_path, one_entry['name']) logging.debug("Globus remote file path: %s", file_path) # Get the format of the file (aka: its extension) file_format = os.path.splitext(one_entry['name'])[1] if file_format: file_format = file_format.lstrip('.') # Check if it's included if file_format not in check_ext: logging.debug( " remote file doesn't match extension: %s %s", os.path.basename(file_path), check_ext) continue if include_parts: found_include = False for part in include_parts: if part in one_entry['name']: found_include = True break if found_include: logging.warning("Found wanted image: %s %s", one_entry['name'], include_parts) matches.append(file_path) break for one_match in matches: found_files.append(one_match) out_file.write(one_match + '\n') out_file.close() print("Done searching for files to download: found", len(found_files), "files") return tuple(found_files)
def browse(dataset_id=None, endpoint_id=None, endpoint_path=None): """ - Get list of files for the selected dataset or endpoint ID/path - Return a list of files to a browse view The target template (browse.jinja2) expects an `endpoint_uri` (if available for the endpoint), `target` (either `"dataset"` or `"endpoint"`), and 'file_list' (list of dictionaries) containing the following information about each file in the result: {'name': 'file name', 'size': 'file size', 'id': 'file uri/path'} If you want to display additional information about each file, you must add those keys to the dictionary and modify the browse.jinja2 template accordingly. """ if request.method == 'GET': assert bool(dataset_id) != bool(endpoint_id and endpoint_path) if dataset_id: try: dataset = next(ds for ds in datasets if ds['id'] == dataset_id) except StopIteration: abort(404) endpoint_id = app.config['DATASET_ENDPOINT_ID'] endpoint_path = app.config['DATASET_ENDPOINT_BASE'] + dataset['path'] else: endpoint_path = '/' + endpoint_path transfer_tokens = session['tokens']['transfer.api.globus.org'] authorizer = RefreshTokenAuthorizer( transfer_tokens['refresh_token'], load_portal_client(), access_token=transfer_tokens['access_token'], expires_at=transfer_tokens['expires_at_seconds']) transfer = TransferClient(authorizer=authorizer) try: transfer.endpoint_autoactivate(endpoint_id) listing = transfer.operation_ls(endpoint_id, path=endpoint_path) except TransferAPIError as err: flash('Error [{}]: {}'.format(err.code, err.message)) return redirect(url_for('browse')) file_list = [e for e in listing if e['type'] == 'file'] ep = transfer.get_endpoint(endpoint_id) https_server = ep['https_server'] endpoint_uri = https_server + endpoint_path if https_server else None webapp_xfer = 'https://app.globus.org/file-manager?' + \ urlencode(dict(origin_id=endpoint_id, origin_path=endpoint_path)) #print("endpintURL == " + endpoint_uri) return render_template('browse.jinja2', endpoint_uri=endpoint_uri, target="dataset" if dataset_id else "endpoint", description=(dataset['name'] if dataset_id else ep['display_name']), mypath=(dataset['path'] if dataset_id else None), myid=(dataset['id'] if dataset_id else None), file_list=file_list, webapp_xfer=webapp_xfer) if request.method == 'POST': if not request.form.get('file'): flash('Please select at least one file.') return redirect(url_for('browse')) params = { 'method': 'POST', 'action': url_for('submit_transfer', _external=True, _scheme='https'), 'filelimit': 0, 'folderlimit': 1 } browse_endpoint = 'https://app.globus.org/file-manager?{}' \ .format(urlencode(params)) session['form'] = { 'dirselect': False, 'datasets': request.form.getlist('file'), 'path': request.form.getlist('path'), 'id': request.form.getlist('id') } return redirect(browse_endpoint)
class GlobusConnection(DecadeFileBase): REDIRECT_URI = 'https://auth.globus.org/v2/web/auth-code' SCOPES = ('openid email profile ' 'urn:globus:auth:scope:transfer.api.globus.org:all') TRANSLIMIT = 25 def __init__(self, config, tag): DecadeFileBase.__init__(self, config, tag) self.client = None self.active_transfer_count = 0 self.noao_dirs = [] self.scantime = 0. self.starttime = datetime.datetime.now() self.active_transfer_count = 0 self.number_started = 0 self.number_successful = 0 self.number_failed = 0 self.number_waiting = 0 def __del__(self): self.close() logging.shutdown() def load_tokens_from_file(self): """Load a set of saved tokens.""" with open(self.config['token_file'], 'r') as _file: tokens = json.load(_file) return tokens def save_tokens_to_file(self, tokens): """Save a set of tokens for later use.""" with open(self.config['token_file'], 'w') as _file: json.dump(tokens, _file) def update_tokens_file_on_refresh(self, token_response): """ Callback function passed into the RefreshTokenAuthorizer. Will be invoked any time a new access token is fetched. """ self.save_tokens_to_file(token_response.by_resource_server) def initiate_connection(self): """ Initiate the connection """ tokens = None try: # if we already have tokens, load and use them tokens = self.load_tokens_from_file() except: pass if not tokens: # if we need to get tokens, start the Native App authentication process client = NativeAppAuthClient( client_id=self.CLIENT_ID) #self.config['client_id']) # pass refresh_tokens=True to request refresh tokens client.oauth2_start_flow( requested_scopes=self. SCOPES, #self.config['requested_scopes'], redirect_uri=self.REDIRECT_URI, #self.config['redirect_uri'], refresh_tokens=True) url = client.oauth2_get_authorize_url() print 'Native App Authorization URL: \n{}'.format(url) auth_code = raw_input('Enter the auth code: ').strip() token_response = client.oauth2_exchange_code_for_tokens(auth_code) # return a set of tokens, organized by resource server name tokens = token_response.by_resource_server try: self.save_tokens_to_file(tokens) except: pass transfer_tokens = tokens['transfer.api.globus.org'] auth_client = NativeAppAuthClient(client_id=self.config['client_id']) authorizer = RefreshTokenAuthorizer( transfer_tokens['refresh_token'], auth_client, #access_token=transfer_tokens['access_token'], #expires_at=transfer_tokens['expires_at_seconds'], on_refresh=self.update_tokens_file_on_refresh) self.client = TransferClient(authorizer=authorizer) # print out a directory listing from an endpoint try: #print 'ACTIVATE' #print 'DEST',self.config['dest_ep'] self.client.endpoint_autoactivate(self.config['dest_ep']) ac = self.client.endpoint_get_activation_requirements( self.config['dest_ep']) #print ac self.client.endpoint_autoactivate(self.config['src_ep']) ac2 = self.client.endpoint_get_activation_requirements( self.config['src_ep']) #print ac2 except GlobusAPIError as ex: self.logger.error('Error in endpoint activation %s', str(ex)) if ex.http_status == 401: sys.exit('Refresh token has expired. ' 'Please delete refresh-tokens.json and try again.') else: raise ex def get_dirs(self): """ function """ now = time.time() full_dirs = self.client.operation_ls(self.config['src_ep'], path=self.config['noao_root']) self.scantime = time.time() - now for entry in full_dirs: if entry['type'].lower() == 'dir': self.noao_dirs.append(str(entry['name']))
class Transfer: ''' Modified Transfer, add an option to pass refresh token to avoid the web login ''' def __init__(self, src_endpoint_name, dst_endpoint_name, transfer_rt=None, log_lv=logging.INFO): log_format = '%(asctime)-15s %(levelname)s:\t class:%(name)s %(message)s' logging.basicConfig(format=log_format) self.logger = logging.getLogger(self.__class__.__name__) self.logger.setLevel(log_lv) self.logger.debug('CLIENT_ID: {0}'.format(CLIENT_ID)) self.client = NativeAppAuthClient(CLIENT_ID) self.client.oauth2_start_flow(refresh_tokens=True) if transfer_rt is not None: self.authorizer = RefreshTokenAuthorizer(transfer_rt, self.client) else: authorize_url = self.client.oauth2_get_authorize_url() print('Please go to this URL and login: {0}'.format(authorize_url)) get_input = getattr(__builtins__, 'raw_input', input) auth_code = get_input( 'Please enter the code you get after login here: ').strip() token_response = self.client.oauth2_exchange_code_for_tokens( auth_code) self.globus_auth_data = token_response.by_resource_server[ 'auth.globus.org'] self.globus_transfer_data = token_response.by_resource_server[ 'transfer.api.globus.org'] auth_token = self.globus_auth_data['access_token'] transfer_token = self.globus_transfer_data['access_token'] transfer_rt = self.globus_transfer_data['refresh_token'] transfer_at = self.globus_transfer_data['access_token'] expires_at_s = self.globus_transfer_data['expires_at_seconds'] self.authorizer = RefreshTokenAuthorizer(transfer_rt, self.client, access_token=transfer_at, expires_at=expires_at_s) self.transferClient = TransferClient(authorizer=self.authorizer) self.src_endpoint = None self.dst_endpoint = None for ep in self.transferClient.endpoint_search( filter_scope="shared-with-me"): if ep["display_name"] == src_endpoint_name: self.src_endpoint = ep self.logger.info('Source endpoint: [{0}] {1}'.format( self.src_endpoint['id'], self.src_endpoint['display_name'])) if self.src_endpoint is None: self.logger.error( 'No endpoint shared with you with name: {0}'.format( src_endpoint_name)) raise LookupError for ep in self.transferClient.endpoint_search( filter_scope="my-endpoints"): if ep['display_name'] == dst_endpoint_name: self.dst_endpoint = ep self.logger.info('Destination endpoint: [{0}] {1}'.format( self.dst_endpoint['id'], self.dst_endpoint['display_name'])) if self.dst_endpoint is None: self.logger.error('You don\'t have endpoint named: {0}'.format( dst_endpoint_name)) raise LookupError def transfer_dir(self, src_dir, dst_dir): transfer_data = TransferData(self.transferClient, self.src_endpoint['id'], self.dst_endpoint['id']) transfer_data.add_item(src_dir, dst_dir, recursive=True) result = self.transferClient.submit_transfer(transfer_data) self.logger.info('task [{0}] {1}'.format(result['task_id'], result['code'])) return result def transfer_file(self, src_file, dst_file): transfer_data = TransferData(self.transferClient, self.src_endpoint['id'], self.dst_endpoint['id']) transfer_data.add_item(src_file, dst_file) result = self.transferClient.submit_transfer(transfer_data) self.logger.info('task_id [{0}] {1}'.format(result['task_id'], result['code'])) return result def ls_src_dir(self, path, ls_filter=''): # using iteration to get every entry from result # an entry contain two keys: 'name' and 'type' # type define the entry is a file or folder result = self.transferClient.operation_ls(self.src_endpoint['id'], path=path, filter=ls_filter) for entry in result: self.logger.debug('name: {0}\ttype: {1}'.format( entry["name"], entry["type"])) return result def task_list(self, num_results=10): result = self.transferClient.task_list(num_results=num_results) for task in result: self.logger.debug('task_id: [{0}]\t status: {1}'.format( task['task_id'], task['status'])) result = self.transferClient.task_list(num_results=num_results) return result def get_task(self, task_id): return self.transferClient.get_task(task_id)