Esempio n. 1
0
def get_job_details(response):
        logger.debug('Executing get_job_details')
        try:
            skip_user_query = "spark"
            json_data_dump = capi.get_response_data(response)
            if json_data_dump['apps'] is None: return None
            data_arr = json_data_dump['apps']['app']
            job_count = 0
            data_list = list()
            for key in data_arr:
                logger.debug('complete application response %s:  ' % key)
                if key.get('elapsedTime') > int(capi.get_config('check_interval')):
                    logger.debug('A job is running more than an hour, fetching info. Elapsed time: {0}'.format(key.get('elapsedTime')))
                    job_count += 1
                    data = {"appId": key['id'], "allocatedMB": key['allocatedMB'], "user": key['user'], "jobCounts": job_count,
                            "trackingUrl": key['trackingUrl'], "startedTime": key['startedTime'], "elapsedTime": key['elapsedTime']};

                    app_landing_url = capi.get_base_url() + '/cluster/app/' + data.get('appId')
                    logger.debug('App landing url is: %s', app_landing_url)
                    data["app_landing_url"] = app_landing_url
                    # if skip_user_query not in key.get('name').lower():
                    #     user_query = get_user_query(key.get('trackingUrl', ''))
                    #     if user_query == '' or user_query is None:
                    #         user_query = key['name']
                    #     data["user_query"] = user_query

                    data_list.append(data)
            return data_list
        except Exception as e:
            logger.error('Exception from get_job_details'.format(e.message))
            raise e
Esempio n. 2
0
def get_user_query(job_tracker_url):
    logger.debug('Executing get_user_query with params : {0}'.format(job_tracker_url))
    try:
        user_query = None
        if job_tracker_url == '':
            return user_query
        else:
            jtr = requests.get(job_tracker_url)
            jtr_content = BeautifulSoup(jtr.text, 'html.parser')
            for jtr_link in jtr_content.find_all('a'):
                l = str(jtr_link.get('href'))
                if l.__contains__('job'):
                    nl = l.replace('/job/', '/conf/')
                    conf_url = capi.get_base_url()+nl
                    logger.debug('pulling job configs from hdfs-conf link : {0}'.format(conf_url))
                    jr = requests.get(conf_url)
                    sp = BeautifulSoup(jr.text, "html.parser")
                    all_td = sp.find('td', {"class": "content"}).find_all('td')
                    logger.debug('td array length from hdfs configs : {0}'.format(len(all_td)))
                    for td in all_td:
                        s = td.get_text()
                        if s.strip() == "hive.query.string":
                            user_query = td.findNextSibling().get_text().replace('\n',' ')
                            logger.debug('Found user query from job tracker : {0}'.format(user_query))
                            return user_query
    except requests.ConnectionError as ce:
        logger.error('Connection exception from get_user_query, Error is: {0}'.format(ce.message))
        raise ce
    except Exception as e:
        logger.error('Exception from get_user_query, Error is: {0}'.format(e.message))
        raise e
Esempio n. 3
0
def main():
    logger = common.setup_logging()
    # What are we going to do?
    logger.info(__doc__)

    # Determine some arguments we need for api calls
    base_url = common.get_base_url()
    token = common.get_token_from_env()
    if not token:
        logger.fatal("ATHERA_API_TOKEN not set in env")
        return 1

    # Show the token expiry
    token_helper = common.TokenHelper(token)
    logger.info("Token expires {}".format(token_helper.get_expiry_string()))

    # API calls all need an active group to define the 'Context' of the request. We only care about the top-level groups, orgs. Ask for user input.
    selector = common.GroupSelector(logger, base_url, token)
    group_id = selector.get_org()
    if not group_id:
        return 2

    logger.info("Selected {}".format(group_id))

    # Feed this into the class which will query the app_families endpoint
    searcher = AppSearcher(logger, base_url, group_id, token)

    # Fetch the search term
    target = input("-- Enter the app name (or part of) to search for: ")

    # Run the search
    families = searcher.search_families(target)

    if families:
        # List comprehension to filter bundled. Bundled apps are legacy and should not be used
        result = list(filter(lambda f: 'Bundled' not in f['name'], families))

        if len(result) == 0:
            logger.info("-- No apps found (bundled apps are ignored)")

        # Pretty-print the output
        for f in result:
            logger.info("{:50} {}".format(f['name'], f['id']))
            if 'apps' not in f:
                logger.error("Missing apps data")
                continue

            apps = f['apps']
            interactive_app = apps[
                'interactive'] if 'interactive' in apps else None
            compute_app = apps['compute'] if 'compute' in apps else None
            if interactive_app:
                for k, v in interactive_app.items():
                    logger.info("-- interactive {:35} {}".format(k, v))
            if compute_app:
                for k, v in compute_app.items():
                    logger.info("-- compute     {:35} {}".format(k, v))
Esempio n. 4
0
def get_user_query(job_tracker_url):
    logger.debug(
        'Executing get_user_query with params : {0}'.format(job_tracker_url))
    try:
        user_query = None
        if job_tracker_url == '':
            return user_query
        else:
            jtr = requests.get(job_tracker_url)
            jtr_content = BeautifulSoup(jtr.text, 'html.parser')
            for jtr_link in jtr_content.find_all('a'):
                l = str(jtr_link.get('href'))
                if l.__contains__('job'):
                    nl = l.replace('/job/', '/conf/')
                    conf_url = capi.get_base_url() + nl
                    logger.debug(
                        'pulling job configs from hdfs-conf link : {0}'.format(
                            conf_url))
                    jr = requests.get(conf_url)
                    sp = BeautifulSoup(jr.text, "html.parser")
                    all_td = sp.find('td', {"class": "content"}).find_all('td')
                    logger.debug(
                        'td array length from hdfs configs : {0}'.format(
                            len(all_td)))
                    for td in all_td:
                        s = td.get_text()
                        if s.strip() == "hive.query.string":
                            user_query = td.findNextSibling().get_text(
                            ).replace('\n', ' ')
                            logger.debug(
                                'Found user query from job tracker : {0}'.
                                format(user_query))
                            return user_query
    except requests.ConnectionError as ce:
        logger.error(
            'Connection exception from get_user_query, Error is: {0}'.format(
                ce.message))
        raise ce
    except Exception as e:
        logger.error('Exception from get_user_query, Error is: {0}'.format(
            e.message))
        raise e
Esempio n. 5
0
def main():
    logger = common.setup_logging()
    # What are we going to do?
    logger.info(__doc__)

    # Determine some arguments we need for api calls
    base_url = common.get_base_url()
    token = common.get_token_from_env()
    if not token:
        logger.fatal("ATHERA_API_TOKEN not set in env")
        return 1

    # API calls all need an active group to define the 'Context' of the request. In this case, ask the user for a group, starting with Orgs and walking the context tree
    selector = common.GroupSelector(logger, base_url, token)
    org_id = selector.get_org()
    if not org_id:
        logger.fatal("Failed to get Org")
        return 2

    leaf_group_id = selector.get_leaf_group(org_id)
    if not leaf_group_id:
        logger.fatal("Failed to get leaf group")
        return 3

    # Check provided app ID is valid and available for the selected org - The response is based on active entitlements, so we provide the Org ID, not the child group ID.
    if not validate_app(logger, base_url, org_id, token, NUKE_COMPUTE_APP_ID):
        logger.fatal("Validate app failed")
        return 4

    # Check the user has a dropbox mount
    dropbox_driver = check_dropbox_connected(logger, base_url, leaf_group_id,
                                             token)
    if not dropbox_driver:
        logger.fatal("Dropbox not connected?")
        return 5

    logger.info("Found a dropbox driver with id {}".format(
        dropbox_driver['id']))

    # Ready to go!
    # We're launching the compute job in the leaf group's context, not the ancestor Org.
    job_id = launch(logger, base_url, leaf_group_id, token)
Esempio n. 6
0
def get_job_details(response):
    logger.debug('Executing get_job_details')
    try:
        skip_user_query = "spark"
        json_data_dump = capi.get_response_data(response)
        if json_data_dump['apps'] is None: return None
        data_arr = json_data_dump['apps']['app']
        job_count = 0
        data_list = list()
        for key in data_arr:
            logger.debug('complete application response %s:  ' % key)
            if key.get('elapsedTime') > int(capi.get_config('check_interval')):
                logger.debug(
                    'A job is running more than an hour, fetching info. Elapsed time: {0}'
                    .format(key.get('elapsedTime')))
                job_count += 1
                data = {
                    "appId": key['id'],
                    "allocatedMB": key['allocatedMB'],
                    "user": key['user'],
                    "jobCounts": job_count,
                    "trackingUrl": key['trackingUrl'],
                    "startedTime": key['startedTime'],
                    "elapsedTime": key['elapsedTime']
                }

                app_landing_url = capi.get_base_url(
                ) + '/cluster/app/' + data.get('appId')
                logger.debug('App landing url is: %s', app_landing_url)
                data["app_landing_url"] = app_landing_url
                # if skip_user_query not in key.get('name').lower():
                #     user_query = get_user_query(key.get('trackingUrl', ''))
                #     if user_query == '' or user_query is None:
                #         user_query = key['name']
                #     data["user_query"] = user_query

                data_list.append(data)
        return data_list
    except Exception as e:
        logger.error('Exception from get_job_details'.format(e.message))
        raise e
Esempio n. 7
0
def main():
    logger = common.setup_logging()
    # What are we going to do?
    logger.info(__doc__)

    # Determine some arguments we need for api calls
    base_url = common.get_base_url()
    token = common.get_token_from_env()
    if not token:
        logger.fatal("ATHERA_API_TOKEN not set in env")
        return 1

    # Show the token expiry
    token_helper = common.TokenHelper(token)
    logger.info("Token expires {}".format(token_helper.get_expiry_string()))

    # API calls all need an active group to define the 'Context' of the request. We only care about the top-level groups, orgs. Ask for user input.
    selector = common.GroupSelector(logger, base_url, token)
    group_id = selector.get_org()
    if not group_id:
        return 2

    # Feed the org id into the class which will recursively walk the Context tree
    RecursivePrinter(logger, base_url, group_id, token)()
Esempio n. 8
0
def main():
    logger = common.setup_logging()
    # What are we going to do?
    logger.info(__doc__)

    # Determine some arguments we need for API calls
    base_url = common.get_base_url()
    token = common.get_token_from_env()
    if not token:
        logger.fatal("ATHERA_API_TOKEN not set in env")
        sys.exit(1)

    # Show the token expiry
    token_helper = common.TokenHelper(token)
    logger.info("Token expires {}".format(token_helper.get_expiry_string()))

    # API calls all need an active group to define the 'Context' of the request.
    selector = common.GroupSelector(logger, base_url, token)
    group_id = selector.get_org()
    if not group_id:
        sys.exit(3)

    logger.info("Using group_id {}".format(group_id))

    # Select the destination mount to which the source folder will be uploaded
    selector = common.MountSelector(logger, token)

    mounts, err = selector.get_mounts(group_id)
    if err != None:
        logger.error("Failed getting mounts: {}".format(err))
        sys.exit(4)
    if len(mounts) < 1:
        logger.info("No mounts found")
        sys.exit(5)

    selected_mount = selector.select_mount(
        group_id,
        "Select the mount on which you would like to upload your folder to",
        mounts)
    logger.info("Selected mount_id {} ({})".format(
        selected_mount.id, selected_mount.mount_location))

    destination_folder = input(
        "-- Please provide the absolute path of the local source folder\n")

    if not os.path.isdir(destination_folder):
        logger.info(
            "'{}' is not a valid folder path".format(destination_folder))
        sys.exit(2)

    # Feed this into the class which will query the upload_file endpoint
    folder_syncer = FolderSyncer(logger, base_url, group_id, token)

    # Upload the folder
    remote_folder_name = folder_syncer.upload_folder_to_athera(
        selected_mount.id, destination_folder)

    logger.info(
        "Successfully uploaded {} to Athera at the location: {}".format(
            destination_folder,
            selected_mount.mount_location + "/" + remote_folder_name))