def get_job_details(response): logger.debug('Executing get_job_details') try: skip_user_query = "spark" json_data_dump = capi.get_response_data(response) if json_data_dump['apps'] is None: return None data_arr = json_data_dump['apps']['app'] job_count = 0 data_list = list() for key in data_arr: logger.debug('complete application response %s: ' % key) if key.get('elapsedTime') > int(capi.get_config('check_interval')): logger.debug('A job is running more than an hour, fetching info. Elapsed time: {0}'.format(key.get('elapsedTime'))) job_count += 1 data = {"appId": key['id'], "allocatedMB": key['allocatedMB'], "user": key['user'], "jobCounts": job_count, "trackingUrl": key['trackingUrl'], "startedTime": key['startedTime'], "elapsedTime": key['elapsedTime']}; app_landing_url = capi.get_base_url() + '/cluster/app/' + data.get('appId') logger.debug('App landing url is: %s', app_landing_url) data["app_landing_url"] = app_landing_url # if skip_user_query not in key.get('name').lower(): # user_query = get_user_query(key.get('trackingUrl', '')) # if user_query == '' or user_query is None: # user_query = key['name'] # data["user_query"] = user_query data_list.append(data) return data_list except Exception as e: logger.error('Exception from get_job_details'.format(e.message)) raise e
def get_user_query(job_tracker_url): logger.debug('Executing get_user_query with params : {0}'.format(job_tracker_url)) try: user_query = None if job_tracker_url == '': return user_query else: jtr = requests.get(job_tracker_url) jtr_content = BeautifulSoup(jtr.text, 'html.parser') for jtr_link in jtr_content.find_all('a'): l = str(jtr_link.get('href')) if l.__contains__('job'): nl = l.replace('/job/', '/conf/') conf_url = capi.get_base_url()+nl logger.debug('pulling job configs from hdfs-conf link : {0}'.format(conf_url)) jr = requests.get(conf_url) sp = BeautifulSoup(jr.text, "html.parser") all_td = sp.find('td', {"class": "content"}).find_all('td') logger.debug('td array length from hdfs configs : {0}'.format(len(all_td))) for td in all_td: s = td.get_text() if s.strip() == "hive.query.string": user_query = td.findNextSibling().get_text().replace('\n',' ') logger.debug('Found user query from job tracker : {0}'.format(user_query)) return user_query except requests.ConnectionError as ce: logger.error('Connection exception from get_user_query, Error is: {0}'.format(ce.message)) raise ce except Exception as e: logger.error('Exception from get_user_query, Error is: {0}'.format(e.message)) raise e
def main(): logger = common.setup_logging() # What are we going to do? logger.info(__doc__) # Determine some arguments we need for api calls base_url = common.get_base_url() token = common.get_token_from_env() if not token: logger.fatal("ATHERA_API_TOKEN not set in env") return 1 # Show the token expiry token_helper = common.TokenHelper(token) logger.info("Token expires {}".format(token_helper.get_expiry_string())) # API calls all need an active group to define the 'Context' of the request. We only care about the top-level groups, orgs. Ask for user input. selector = common.GroupSelector(logger, base_url, token) group_id = selector.get_org() if not group_id: return 2 logger.info("Selected {}".format(group_id)) # Feed this into the class which will query the app_families endpoint searcher = AppSearcher(logger, base_url, group_id, token) # Fetch the search term target = input("-- Enter the app name (or part of) to search for: ") # Run the search families = searcher.search_families(target) if families: # List comprehension to filter bundled. Bundled apps are legacy and should not be used result = list(filter(lambda f: 'Bundled' not in f['name'], families)) if len(result) == 0: logger.info("-- No apps found (bundled apps are ignored)") # Pretty-print the output for f in result: logger.info("{:50} {}".format(f['name'], f['id'])) if 'apps' not in f: logger.error("Missing apps data") continue apps = f['apps'] interactive_app = apps[ 'interactive'] if 'interactive' in apps else None compute_app = apps['compute'] if 'compute' in apps else None if interactive_app: for k, v in interactive_app.items(): logger.info("-- interactive {:35} {}".format(k, v)) if compute_app: for k, v in compute_app.items(): logger.info("-- compute {:35} {}".format(k, v))
def get_user_query(job_tracker_url): logger.debug( 'Executing get_user_query with params : {0}'.format(job_tracker_url)) try: user_query = None if job_tracker_url == '': return user_query else: jtr = requests.get(job_tracker_url) jtr_content = BeautifulSoup(jtr.text, 'html.parser') for jtr_link in jtr_content.find_all('a'): l = str(jtr_link.get('href')) if l.__contains__('job'): nl = l.replace('/job/', '/conf/') conf_url = capi.get_base_url() + nl logger.debug( 'pulling job configs from hdfs-conf link : {0}'.format( conf_url)) jr = requests.get(conf_url) sp = BeautifulSoup(jr.text, "html.parser") all_td = sp.find('td', {"class": "content"}).find_all('td') logger.debug( 'td array length from hdfs configs : {0}'.format( len(all_td))) for td in all_td: s = td.get_text() if s.strip() == "hive.query.string": user_query = td.findNextSibling().get_text( ).replace('\n', ' ') logger.debug( 'Found user query from job tracker : {0}'. format(user_query)) return user_query except requests.ConnectionError as ce: logger.error( 'Connection exception from get_user_query, Error is: {0}'.format( ce.message)) raise ce except Exception as e: logger.error('Exception from get_user_query, Error is: {0}'.format( e.message)) raise e
def main(): logger = common.setup_logging() # What are we going to do? logger.info(__doc__) # Determine some arguments we need for api calls base_url = common.get_base_url() token = common.get_token_from_env() if not token: logger.fatal("ATHERA_API_TOKEN not set in env") return 1 # API calls all need an active group to define the 'Context' of the request. In this case, ask the user for a group, starting with Orgs and walking the context tree selector = common.GroupSelector(logger, base_url, token) org_id = selector.get_org() if not org_id: logger.fatal("Failed to get Org") return 2 leaf_group_id = selector.get_leaf_group(org_id) if not leaf_group_id: logger.fatal("Failed to get leaf group") return 3 # Check provided app ID is valid and available for the selected org - The response is based on active entitlements, so we provide the Org ID, not the child group ID. if not validate_app(logger, base_url, org_id, token, NUKE_COMPUTE_APP_ID): logger.fatal("Validate app failed") return 4 # Check the user has a dropbox mount dropbox_driver = check_dropbox_connected(logger, base_url, leaf_group_id, token) if not dropbox_driver: logger.fatal("Dropbox not connected?") return 5 logger.info("Found a dropbox driver with id {}".format( dropbox_driver['id'])) # Ready to go! # We're launching the compute job in the leaf group's context, not the ancestor Org. job_id = launch(logger, base_url, leaf_group_id, token)
def get_job_details(response): logger.debug('Executing get_job_details') try: skip_user_query = "spark" json_data_dump = capi.get_response_data(response) if json_data_dump['apps'] is None: return None data_arr = json_data_dump['apps']['app'] job_count = 0 data_list = list() for key in data_arr: logger.debug('complete application response %s: ' % key) if key.get('elapsedTime') > int(capi.get_config('check_interval')): logger.debug( 'A job is running more than an hour, fetching info. Elapsed time: {0}' .format(key.get('elapsedTime'))) job_count += 1 data = { "appId": key['id'], "allocatedMB": key['allocatedMB'], "user": key['user'], "jobCounts": job_count, "trackingUrl": key['trackingUrl'], "startedTime": key['startedTime'], "elapsedTime": key['elapsedTime'] } app_landing_url = capi.get_base_url( ) + '/cluster/app/' + data.get('appId') logger.debug('App landing url is: %s', app_landing_url) data["app_landing_url"] = app_landing_url # if skip_user_query not in key.get('name').lower(): # user_query = get_user_query(key.get('trackingUrl', '')) # if user_query == '' or user_query is None: # user_query = key['name'] # data["user_query"] = user_query data_list.append(data) return data_list except Exception as e: logger.error('Exception from get_job_details'.format(e.message)) raise e
def main(): logger = common.setup_logging() # What are we going to do? logger.info(__doc__) # Determine some arguments we need for api calls base_url = common.get_base_url() token = common.get_token_from_env() if not token: logger.fatal("ATHERA_API_TOKEN not set in env") return 1 # Show the token expiry token_helper = common.TokenHelper(token) logger.info("Token expires {}".format(token_helper.get_expiry_string())) # API calls all need an active group to define the 'Context' of the request. We only care about the top-level groups, orgs. Ask for user input. selector = common.GroupSelector(logger, base_url, token) group_id = selector.get_org() if not group_id: return 2 # Feed the org id into the class which will recursively walk the Context tree RecursivePrinter(logger, base_url, group_id, token)()
def main(): logger = common.setup_logging() # What are we going to do? logger.info(__doc__) # Determine some arguments we need for API calls base_url = common.get_base_url() token = common.get_token_from_env() if not token: logger.fatal("ATHERA_API_TOKEN not set in env") sys.exit(1) # Show the token expiry token_helper = common.TokenHelper(token) logger.info("Token expires {}".format(token_helper.get_expiry_string())) # API calls all need an active group to define the 'Context' of the request. selector = common.GroupSelector(logger, base_url, token) group_id = selector.get_org() if not group_id: sys.exit(3) logger.info("Using group_id {}".format(group_id)) # Select the destination mount to which the source folder will be uploaded selector = common.MountSelector(logger, token) mounts, err = selector.get_mounts(group_id) if err != None: logger.error("Failed getting mounts: {}".format(err)) sys.exit(4) if len(mounts) < 1: logger.info("No mounts found") sys.exit(5) selected_mount = selector.select_mount( group_id, "Select the mount on which you would like to upload your folder to", mounts) logger.info("Selected mount_id {} ({})".format( selected_mount.id, selected_mount.mount_location)) destination_folder = input( "-- Please provide the absolute path of the local source folder\n") if not os.path.isdir(destination_folder): logger.info( "'{}' is not a valid folder path".format(destination_folder)) sys.exit(2) # Feed this into the class which will query the upload_file endpoint folder_syncer = FolderSyncer(logger, base_url, group_id, token) # Upload the folder remote_folder_name = folder_syncer.upload_folder_to_athera( selected_mount.id, destination_folder) logger.info( "Successfully uploaded {} to Athera at the location: {}".format( destination_folder, selected_mount.mount_location + "/" + remote_folder_name))