Exemple #1
0
def enable_resource_for_execution(resource):
    """populate mod_appkernel database and allow execution of jobs on this resource"""
    if dry_run:
        return
    resource_name = resource['name']
    try:
        con_ak, cur_ak = cfg.getAKDB(True)

        cur_ak.execute('''SELECT * FROM resource WHERE nickname=%s''',
                       (resource_name, ))
        resource_in_ak_db = cur_ak.fetchall()
        if len(resource_in_ak_db) == 0:
            log.warning(
                "There is no record of %s in mod_appkernel.resource will add one.",
                resource_name)
            cur_ak.execute(
                '''INSERT INTO resource (resource,nickname,description,enabled,visible)
                        VALUES(%s,%s,%s,0,0);''',
                (resource['name'], resource['name'], resource['info']))
            con_ak.commit()

            cur_ak.execute('''SELECT * FROM resource WHERE nickname=%s''',
                           (resource_name, ))
            resource_in_ak_db = cur_ak.fetchall()
        resource_in_ak_db = resource_in_ak_db[0]
        # enable and make visible
        cur_ak.execute(
            '''UPDATE resource
                        SET enabled=1,visible=1
                        WHERE resource_id=%s;''',
            (resource_in_ak_db['resource_id'], ))
        con_ak.commit()
        log.info(
            "Enabled %s in mod_appkernel.resource for tasks execution and made it visible to XDMoD UI.",
            resource_name)
    except MySQLdb.Error:
        log.error("Can not connect to AK DB\n" "Probably invalid credential")

    # enabling resource for execution
    try:
        r = akrrrestclient.put('/resources/' + resource_name + '/on')
        if r.status_code == 200:
            log.info('Successfully enabled ' + resource_name)
        else:
            log.error(
                "Can not enable resource through AKRR REST API ( %s )\nSee server response below\n%s",
                akrrrestclient.restapi_host, json.dumps(r.json(), indent=4))
    except requests.RequestException:
        log.error(
            "Can not enable resource through AKRR REST API ( %s )\n"
            "Is it still running?\n", akrrrestclient.restapi_host)
Exemple #2
0
def check_if_test_job_already_submitted(resource, app_name="test"):
    """check if the test job is already submitted, return task id if it is submitted"""
    task_id = None
    test_job_lock_filename = get_test_job_lock_filename(resource, app_name)
    if os.path.isfile(test_job_lock_filename):
        fin = open(test_job_lock_filename, "r")
        task_id = int(fin.readline())
        fin.close()

        r = akrrrestclient.get('/tasks/' + str(task_id))
        if r.status_code != 200:
            task_id = None
        else:
            log.warning_count += 1
            log.warning(
                "\nWARNING %d: Seems this is rerun of this script, will monitor task with task_id = %d ",
                log.warning_count, task_id)
            log.warning("To submit new task delete %s\n",
                        test_job_lock_filename)

        # check how old is it
    return task_id
Exemple #3
0
def check_create_dirs(rsh, resource):
    log.info("Checking directory locations\n")

    d = resource['akrrData']
    log.info("Checking: %s:%s", resource['remoteAccessNode'], d)
    status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True)
    log.info(msg)

    d = resource['appKerDir']
    log.info("Checking: %s:%s", resource['remoteAccessNode'], d)
    status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True)
    log.info(msg)

    d = resource['networkScratch']
    log.info("Checking: %s:%s", resource['remoteAccessNode'], d)
    status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False)

    if status is True:
        log.info(msg)
    else:
        log.warning_count += 1
        log.warning(msg)
        log.warning(
            "WARNING %d: network scratch might be have a different location on head node, "
            "so if it is by design it is ok", log.warning_count)

    d = resource['localScratch']
    log.info("Checking: %s:%s", resource['remoteAccessNode'], d)
    status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False)
    if status is True:
        log.info(msg)
    else:
        log.warning_count += 1
        log.warning(msg)
        log.warning(
            "WARNING %d: local scratch might be have a different location on head node, "
            "so if it is by design it is ok", log.warning_count)

    log.empty_line()
Exemple #4
0
 def run_it(_):
     log.warning("add_command_install is not implemented")
Exemple #5
0
 def run_it(_):
     log.warning("add_command_build is not implemented")
Exemple #6
0
def resource_deploy(args):
    global dry_run
    global checking_frequency

    resource_name = args.resource

    if 'dry_run' in args:
        dry_run = args.dry_run

    if "checking_frequency" in args:
        checking_frequency = args.checking_frequency

    if "appkernel" in args:
        app_name = args.appkernel
    else:
        app_name = "test"

    if "nodes" in args:
        nodes = int(args.nodes)
    else:
        nodes = 2

    log.error_count = 0
    log.warning_count = 0

    # validate resource configuration and get config
    resource = validate_resource_parameter_file(resource_name)

    # connect to resource
    rsh = connect_to_resource(resource)

    # do tests
    check_shell(rsh, resource)
    check_create_dirs(rsh, resource)

    # deploy inputs and sources
    copy_exec_sources_and_inputs(rsh, resource)

    # check that app.signature calculator on headnode
    check_appsig(rsh, resource)

    # close connection we don't need it any more
    rsh.close(force=True)
    del rsh

    # run test job to queue
    run_test_job(resource, app_name, nodes)

    if log.error_count == 0:
        append_to_bashrc(resource)
        enable_resource_for_execution(resource)
    log.empty_line()

    log.info("Result:")
    if log.error_count > 0:
        log.error("There are %d errors, fix them.", log.error_count)

    if log.warning_count > 0:
        log.warning(
            "There are %d warnings.\nif warnings have sense you can move to next step!\n",
            log.warning_count)
    if log.error_count == 0 and log.warning_count == 0:
        log.info("\nDONE, you can move to next step!\n")
Exemple #7
0
def copy_exec_sources_and_inputs(rsh, resource):
    """Copy exec sources and inputs to remote resource"""
    log.info(
        "Preparing to copy application signature calculator,\n"
        "    app. kernel input files and \n"
        "    HPCC, IMB, IOR and Graph500 source code to remote resource\n")

    try:
        cfg.sshCommand(rsh, "cd %s" % resource['appKerDir'])
        out = cfg.sshCommand(rsh, "ls " + resource['appKerDir'])
        files_in_appker_dir = out.strip().split()

        if not ("inputs" in files_in_appker_dir
                or "inputs/" in files_in_appker_dir):
            log.info("Copying app. kernel input tarball to %s",
                     resource['appKerDir'])
            if not dry_run:
                cfg.scpToResource(resource,
                                  cfg.appker_repo_dir + "/inputs.tar.gz",
                                  resource['appKerDir'])

            log.info("Unpacking app. kernel input files to %s/inputs",
                     resource['appKerDir'])
            if not dry_run:
                out = cfg.sshCommand(
                    rsh, "tar xvfz %s/inputs.tar.gz" % resource['appKerDir'])
                log.debug(out)

                out = cfg.sshCommand(rsh,
                                     "du -h %s/inputs" % resource['appKerDir'])
                log.debug(out)

                if out.count("No such file or directory") == 0:
                    log.info("App. kernel input files are in %s/inputs\n",
                             resource['appKerDir'])
                else:
                    raise Exception("files are not copied!")
        else:
            log.warning_count += 1
            log.warning(
                "WARNING %d: App. kernel inputs directory %s/inputs is present, assume they are correct.\n",
                log.warning_count, resource['appKerDir'])

        if not ("execs" in files_in_appker_dir
                or "execs/" in files_in_appker_dir):
            log.info(
                "Copying app. kernel execs tarball to %s\n" %
                (resource['appKerDir']) +
                "It contains HPCC,IMB,IOR and Graph500 source code and app.signature calculator"
            )
            if not dry_run:
                cfg.scpToResource(resource,
                                  cfg.appker_repo_dir + "/execs.tar.gz",
                                  resource['appKerDir'])
            log.info(
                "Unpacking HPCC,IMB,IOR and Graph500 source code and app.signature calculator files to %s/execs",
                resource['appKerDir'])
            if not dry_run:
                out = cfg.sshCommand(
                    rsh, "tar xvfz %s/execs.tar.gz" % resource['appKerDir'])
                log.debug(out)

                out = cfg.sshCommand(rsh,
                                     "df -h %s/execs" % resource['appKerDir'])
                log.debug(out)

                if out.count("No such file or directory") == 0:
                    log.info(
                        "HPCC,IMB,IOR and Graph500 source code and app.signature calculator are in %s/execs\n",
                        resource['appKerDir'])
                else:
                    raise Exception("files are not copied!")
        else:
            log.warning_count += 1
            log.warning(
                "WARNING %d: App. kernel executables directory %s/execs is present, assume they are correct.",
                log.warning_count, resource['appKerDir'])
            log.warning(
                "It should contain HPCC,IMB,IOR and Graph500 source code and app.signature calculator\n"
            )

        cfg.sshCommand(rsh, "rm execs.tar.gz  inputs.tar.gz")
    except Exception as e:
        log.critical("Can not copy files to %s", resource['name'])
        raise e
Exemple #8
0
def _remove_log_dir():
    """remove mod_appkernel"""
    if cfg.akrr_log_dir is None:
        log.warning("akrr_log_dir is None")
        return
    _remove_dir(cfg.akrr_log_dir)
Exemple #9
0
def resource_add(config):
    """add resource, config should have following members
        dry_run - Dry Run No files will actually be created
        minimalistic - Minimize questions number, configuration files will be edited manually
        no-ping - do not run ping to test headnode name
        verbose
    """
    global verbose
    global dry_run
    global no_ping
    global minimalistic
    global resource_name
    global remoteAccessNode
    global remoteAccessMethod
    global remoteCopyMethod
    global sshUserName
    global sshPassword
    global sshPrivateKeyFile
    global sshPrivateKeyPassword
    global networkScratch
    global localScratch
    global akrrData
    global appKerDir
    global batchScheduler
    global batchJobHeaderTemplate

    if config.verbose:
        verbose = True

    log.info("Beginning Initiation of New Resource...")
    verbose = config.verbose
    dry_run = config.dry_run
    resource_deploy.dry_run = config.dry_run
    no_ping = config.no_ping
    minimalistic = config.minimalistic

    log.info("Retrieving Resources from XDMoD Database...")
    # RETRIEVE: the resources from XDMoD
    resources = retrieve_resources()
    log.info("Found following resources from XDMoD Database:\n" +
             "    resource_id  name\n" + "\n".join([
                 "    %11d  %-40s" % (resource_id, resource_name)
                 for resource_name, resource_id in resources
             ]) + "\n")

    if len(resources) > 0:
        while True:
            log.log_input(
                'Enter resource_id for import (enter 0 for no match):')
            resource_id = input()
            if validate_resource_id(resource_id, resources):
                break
            log.warning("Incorrect resource_id try again")
        log.empty_line()
        resource_id = int(resource_id)
    else:
        resource_id = 0

    if resource_id <= 0:  # i.e. no match from XDMoD DB
        resource_id = None

    resource_name = ""
    while True:
        if resource_id is None:
            log.log_input('Enter AKRR resource name:')
            resource_name = input()
        else:
            resource_name2 = get_resource_name_by_id(resource_id, resources)
            log.log_input(
                'Enter AKRR resource name, hit enter to use same name as in XDMoD Database [%s]:'
                % (resource_name2, ))
            resource_name = input()
            if resource_name.strip() == "":
                resource_name = resource_name2

        if validate_resource_name(resource_name):
            break
    log.empty_line()

    while True:
        log.log_input('Enter queuing system on resource (slurm or pbs): ')
        queuing_system = input()
        if validate_queuing_system(queuing_system):
            break
        else:
            log.error("Incorrect queuing_system try again")

    batchScheduler = queuing_system
    log.empty_line()

    if minimalistic is False:
        get_remote_access_method()
        get_system_characteristics()
        get_file_system_access_points()

    log.debug("Summary of parameters" +
              "resource_name: {}".format(resource_name) +
              "remoteAccessNode: {}".format(remoteAccessNode) +
              "remoteAccessMethod: {}".format(remoteAccessMethod) +
              "remoteCopyMethod: {}".format(remoteCopyMethod) +
              "sshUserName: {}".format(sshUserName) +
              "sshPassword: {}".format(sshPassword) +
              "sshPrivateKeyFile: {}".format(sshPrivateKeyFile) +
              "sshPrivateKeyPassword: {}".format(sshPrivateKeyPassword) +
              "networkScratch: {}".format(networkScratch) +
              "localScratch: {}".format(localScratch) +
              "akrrData: {}".format(akrrData) +
              "appKerDir: {}".format(appKerDir) +
              "batchScheduler: {}".format(batchScheduler) +
              "batchJobHeaderTemplate: {}".format(batchJobHeaderTemplate) +
              "\n")

    generate_resource_config(resource_id, resource_name, queuing_system)
    log.info("Initiation of new resource is completed.\n"
             "    Edit batchJobHeaderTemplate variable in {}\n"
             "    and move to resource validation and deployment step.\n"
             "    i.e. execute:\n"
             "        akrr resource deploy -r {}".format(
                 resource_cfg_filename, resource_name))
Exemple #10
0
def get_file_system_access_points():
    global resource_name
    global networkScratch
    global localScratch
    global akrrData
    global appKerDir

    home_dir = cfg.sshCommand(rsh, "echo $HOME").strip()
    scratch_network_dir = cfg.sshCommand(rsh, "echo $SCRATCH").strip()

    # localScratch
    local_scratch_default = "/tmp"
    while True:
        log.log_input(
            "Enter location of local scratch (visible only to single node):")
        localScratch = input("[%s]" % local_scratch_default)
        if localScratch.strip() == "":
            localScratch = local_scratch_default
        status, msg = resource_deploy.check_dir_simple(rsh, localScratch)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.warning(msg)
            log.warning(
                'local scratch might be have a different location on head node, so if it is by design it is ok'
            )
            log.empty_line()
            break
    localScratch = cfg.sshCommand(rsh, "echo %s" % (localScratch, )).strip()
    # networkScratch
    network_scratch_default = ""
    if scratch_network_dir != "":
        network_scratch_default = scratch_network_dir
    network_scratch_visible = False
    while True:
        log.log_input(
            "Enter location of network scratch (visible only to all nodes),"
            "used for temporary storage of app kernel input/output:")
        if network_scratch_default != "":
            networkScratch = input("[%s]" % network_scratch_default)
            if networkScratch.strip() == "":
                networkScratch = network_scratch_default
        else:
            networkScratch = input("")

        if networkScratch == "":
            log.error("Incorrect value for networkScratch, try again")
            continue

        status, msg = resource_deploy.check_dir(rsh,
                                                networkScratch,
                                                exit_on_fail=False,
                                                try_to_create=True)
        if status:
            log.info(msg)
            network_scratch_visible = True
            log.empty_line()
            break
        else:
            log.warning(msg)
            break
    networkScratch = cfg.sshCommand(rsh,
                                    "echo %s" % (networkScratch, )).strip()
    # appKerDir
    appker_dir_default = os.path.join(home_dir, "appker", resource_name)
    while True:
        log.log_input(
            "Enter future location of app kernels input and executable files:")
        appKerDir = input("[%s]" % appker_dir_default)
        if appKerDir.strip() == "":
            appKerDir = appker_dir_default
        status, msg = resource_deploy.check_dir(rsh,
                                                appKerDir,
                                                exit_on_fail=False,
                                                try_to_create=True)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.error(msg)
    appKerDir = cfg.sshCommand(rsh, "echo %s" % (appKerDir, )).strip()
    # akrrData
    akrr_data_default = os.path.join(home_dir, "akrr_data", resource_name)
    if network_scratch_visible:
        akrr_data_default = os.path.join(networkScratch, "akrr_data",
                                         resource_name)
    while True:
        log.log_input(
            "Enter future locations for app kernels working directories (can or even should be on scratch space):"
        )
        akrrData = input("[%s]" % akrr_data_default)
        if akrrData.strip() == "":
            akrrData = akrr_data_default
        status, msg = resource_deploy.check_dir(rsh,
                                                akrrData,
                                                exit_on_fail=False,
                                                try_to_create=True)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.error(msg)
    akrrData = cfg.sshCommand(rsh, "echo %s" % (akrrData, )).strip()
Exemple #11
0
def get_remote_access_method():
    global resource_name
    global remoteAccessNode
    global remoteAccessMethod
    global remoteCopyMethod
    global sshUserName
    global sshPassword
    global sshPassword4thisSession
    global sshPrivateKeyFile
    global sshPrivateKeyPassword
    global rsh
    global no_ping

    # set remoteAccessNode
    while True:
        log.log_input(
            "Enter Resource head node (access node) full name (e.g. headnode.somewhere.org):"
        )
        remoteAccessNode = input("[%s] " % resource_name)
        if remoteAccessNode.strip() == "":
            remoteAccessNode = resource_name

        response = os.system("ping -c 1 -w2 " + remoteAccessNode +
                             " > /dev/null 2>&1")

        if response == 0:
            break
        else:
            if no_ping:
                log.warning("Can not ping %s, but asked to ignore it.",
                            remoteAccessNode)
                break
            log.error("Incorrect head node name (can not ping %s), try again",
                      remoteAccessNode)

    # set sshUserName
    current_user = getpass.getuser()
    ask_for_user_name = True

    while True:
        if ask_for_user_name:
            log.log_input("Enter username for resource access:")
            sshUserName = input("[%s] " % current_user)
            if sshUserName.strip() == "":
                sshUserName = current_user
            current_user = sshUserName

        # check password-less access
        if sshPassword is None:
            log.info("Checking for password-less access")
        else:
            log.info("Checking for resource access")
        successfully_connected = check_connection_to_resource()

        if successfully_connected:
            if sshPassword is None:
                log.info("Can access resource without password")
            else:
                log.info("Can access resource")

        if successfully_connected is False:
            log.info("Can not access resource without password")
            action_list = [(
                "TryAgain",
                "The private and public keys was generated manually, right now. Try again."
            )]
            # check private keys
            user_home_dir = os.path.expanduser("~")
            private_keys = [
                os.path.join(user_home_dir, '.ssh', f[:-4])
                for f in os.listdir(os.path.join(user_home_dir, '.ssh'))
                if os.path.isfile(os.path.join(user_home_dir, '.ssh', f))
                and f[-4:] == '.pub'
                and os.path.isfile(os.path.join(user_home_dir, '.ssh', f[:-4]))
            ]

            if len(private_keys) > 0:
                action_list.append(("UseExistingPrivateKey",
                                    "Use existing private and public key."))

            action_list.append(
                ("GenNewKey", "Generate new private and public key."))
            action_list.append(("UsePassword", "Use password directly."))
            log.empty_line()

            log.info("Select authentication method:" + "\n".join([
                "%3d  %s" % (i, desc)
                for i, (_, desc) in enumerate(action_list)
            ]))
            while True:
                log.log_input("Select option from list above:")
                try:
                    action = input("[2] ")
                    if action.strip() == "":
                        action = 2
                    else:
                        action = int(action)

                    if action < 0 or action >= len(action_list):
                        raise ValueError()
                    break
                except (ValueError, TypeError):
                    log.error("Incorrect entry, try again.")

            # do the action
            log.empty_line()
            if action_list[action][0] == "TryAgain":
                continue
            if action_list[action][0] == "UsePassword":
                log.log_input("Enter password for %s@%s:" %
                              (sshUserName, remoteAccessNode))
                sshPassword = getpass.getpass("")
                ask_for_user_name = not ask_for_user_name
                continue
            if action_list[action][0] == "UseExistingPrivateKey":
                log.info("Available private keys:" + "\n".join(
                    ["%3d  %s" % (i, p) for i, p in enumerate(private_keys)]))
                while True:
                    log.log_input("Select key number from list above:")
                    try:
                        i_key = input("")
                        i_key = int(i_key)

                        if i_key < 0 or i_key >= len(private_keys):
                            raise ValueError()
                        break
                    except (ValueError, TypeError):
                        log.error("Incorrect entry, try again.")
                sshPrivateKeyFile = private_keys[i_key]
                ask_for_user_name = not ask_for_user_name
                continue
            if action_list[action][0] == "GenNewKey":
                count = 0
                while True:
                    log.log_input(
                        "Enter password for %s@%s (will be used only during this session):"
                        % (sshUserName, remoteAccessNode))
                    sshPassword4thisSession = getpass.getpass("")
                    sshPassword = sshPassword4thisSession

                    if check_connection_to_resource():
                        break
                    count += 1
                    if count >= 3:
                        break
                sshPassword = None
                # generate keys
                log.log_input("Enter private key name:")
                sshPrivateKeyFile = input("[id_rsa_%s]" % resource_name)
                if sshPrivateKeyFile.strip() == "":
                    sshPrivateKeyFile = "id_rsa_%s" % resource_name
                sshPrivateKeyFile = os.path.join(user_home_dir, '.ssh',
                                                 sshPrivateKeyFile)
                log.log_input(
                    "Enter passphrase for new key (leave empty for passwordless access):"
                )
                sshPrivateKeyPassword = getpass.getpass("")
                os.system("ssh-keygen -t rsa -N \"%s\" -f %s" %
                          (sshPrivateKeyPassword, sshPrivateKeyFile))
                if sshPrivateKeyPassword.strip() == "":
                    sshPrivateKeyPassword = None
                # copy keys
                cfg.sshAccess(remoteAccessNode,
                              ssh='ssh-copy-id',
                              username=sshUserName,
                              password=sshPassword4thisSession,
                              PrivateKeyFile=sshPrivateKeyFile,
                              PrivateKeyPassword=None,
                              logfile=sys.stdout,
                              command='')
                ask_for_user_name = not ask_for_user_name
                continue

        if successfully_connected:
            break
        else:
            log.error("Incorrect resource access credential")

    if successfully_connected:
        log.empty_line()
        log.info("Connecting to " + resource_name)

        str_io = io.StringIO()
        try:
            sys.stdout = sys.stderr = str_io
            rsh = cfg.sshAccess(remoteAccessNode,
                                ssh=remoteAccessMethod,
                                username=sshUserName,
                                password=sshPassword,
                                PrivateKeyFile=sshPrivateKeyFile,
                                PrivateKeyPassword=sshPrivateKeyPassword,
                                logfile=sys.stdout,
                                command=None)
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
        except Exception as e:
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
            log.debug(str_io.getvalue())
            raise e

        log.info("              Done")
    log.empty_line()
    return successfully_connected
Exemple #12
0
def check_connection_to_resource():
    """check the connection to remote resource."""
    global remoteAccessNode
    global remoteAccessMethod
    global remoteCopyMethod
    global sshUserName
    global sshPassword
    global sshPassword4thisSession
    global sshPrivateKeyFile
    global sshPrivateKeyPassword

    successfully_connected = False
    passphrase_entrance_count = 0
    authorize_key_count = 0
    while True:
        # Try to connect
        str_io = io.StringIO()
        try:
            sys.stdout = sys.stderr = str_io
            cfg.sshAccess(remoteAccessNode,
                          ssh=remoteAccessMethod,
                          username=sshUserName,
                          password=sshPassword,
                          PrivateKeyFile=sshPrivateKeyFile,
                          PrivateKeyPassword=sshPrivateKeyPassword,
                          logfile=str_io,
                          command='ls')

            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__

            successfully_connected = True
            break
        except Exception:
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
            response = str_io.getvalue()

            log.debug(
                "Had attempted to access resource without password and failed, below is resource response"
                + "=" * 80 + str_io.getvalue() + "=" * 80)

            # check if it asking for passphrase
            m = re.search(r"Enter passphrase for key '(.*)':", response)
            if m:
                if passphrase_entrance_count >= 3:
                    sshPrivateKeyPassword = None
                    sshPrivateKeyFile = None
                    break
                if passphrase_entrance_count > 0:
                    log.error("Incorrect passphrase try again")
                sshPrivateKeyFile = m.group(1)
                log.log_input("Enter passphrase for key '%s':" %
                              sshPrivateKeyFile)
                sshPrivateKeyPassword = getpass.getpass("")
                passphrase_entrance_count += 1
                continue
            m2 = re.search(r"[pP]assword:", response)
            if m is None and sshPrivateKeyFile is not None and m2:
                log.warning(
                    "Can not login to head node. "
                    "Probably the public key of private key was not authorized on head node"
                )
                log.info(
                    "Will try to add public key to list of authorized keys on head node"
                )
                while True:
                    try:
                        authorize_key_count += 1
                        log.log_input(
                            "Enter password for %s@%s (will be used only during this session):"
                            % (sshUserName, remoteAccessNode))
                        sshPassword4thisSession = getpass.getpass("")
                        log.empty_line()
                        str_io = io.StringIO()
                        sys.stdout = sys.stderr = str_io
                        cfg.sshAccess(remoteAccessNode,
                                      ssh='ssh-copy-id',
                                      username=sshUserName,
                                      password=sshPassword4thisSession,
                                      PrivateKeyFile=sshPrivateKeyFile,
                                      PrivateKeyPassword=None,
                                      logfile=str_io,
                                      command='')

                        sys.stdout = sys.__stdout__
                        sys.stderr = sys.__stderr__
                        log.info(response)

                        log.info(
                            "Have added public key to list of authorized keys on head node, "
                            "will attempt to connect again.")
                        log.empty_line()
                        break
                    except Exception:
                        sys.stdout = sys.__stdout__
                        sys.stderr = sys.__stderr__
                        if verbose:
                            log.debug(
                                "Had attempted to add public key to list of authorized keys on head node and failed, "
                                + "below is resource response" + "=" * 80 +
                                str_io.getvalue() + "=" * 80)
                        log.error("Incorrect password try again.")
                        if authorize_key_count >= 3:
                            break
                if authorize_key_count < 3:
                    continue
            break
    return successfully_connected
Exemple #13
0
    def install_cron_scripts(self):
        """Install cron scripts."""
        log.info("Installing cron entries")
        if dry_run: return

        if self.cron_email:
            mail = "MAILTO = " + self.cron_email
        else:
            mail = None
        restart = "50 23 * * * " + akrr_bin_dir + "/akrr daemon -cron restart"
        checknrestart = "33 * * * * " + akrr_bin_dir + "/akrr daemon -cron checknrestart"

        try:
            crontanContent = subprocess.check_output("crontab -l", shell=True)
            crontanContent = crontanContent.decode("utf-8").splitlines(True)
        except:
            log.info("Crontab does not have user's crontab yet")
            crontanContent = []

        mailUpdated = False
        mailThere = False
        restartThere = False
        checknrestartThere = False

        for i in range(len(crontanContent)):
            l = crontanContent[i]
            if len(l.strip()) > 1 and l.strip()[0] != "#":
                m = re.match(r'^MAILTO\s*=\s*(.*)', l.strip())
                if m:
                    cron_email = m.group(1)
                    cron_email = self.cron_email.replace('"', '')
                    mailThere = True
                    if self.cron_email != cron_email:
                        if mail:
                            crontanContent[i] = mail
                        else:
                            crontanContent[i] = "#" + crontanContent[i]
                        mailUpdated = True
                if l.count("akrr") and l.count(
                        "daemon") and l.count("restart") > 0:
                    restartThere = True
                if l.count("akrr") and l.count(
                        "daemon") and l.count("checknrestart") > 0:
                    checknrestartThere = True
        if mailUpdated:
            log.info("Cron's MAILTO was updated")
        if ((self.cron_email != None and mailThere) or
            (self.cron_email == None and mailThere == False)
            ) and restartThere and checknrestartThere and mailUpdated == False:
            log.warning(
                "All AKRR crond entries found. No modifications necessary.")
            return
        if self.cron_email != None and mailThere == False:
            crontanContent.insert(0, mail + "\n")
        if restartThere == False:
            crontanContent.append(restart + "\n")
        if checknrestartThere == False:
            crontanContent.append(checknrestart + "\n")

        with open(os.path.expanduser('.crontmp'), 'w') as f:
            for l in crontanContent:
                f.write(l)
        subprocess.call("crontab .crontmp", shell=True)
        os.remove(".crontmp")
        log.info("Cron Scripts Processed!")