Ejemplo n.º 1
0
def _read_username_password(prompt="Enter username:"******"user",
                            password_on_default_user=None):
    log.log_input(prompt)

    if username is None:
        username = input('[{0}] '.format(default_username))
        if username == '':
            username = default_username
    else:
        log.info("User, " + username + ", already entered.")

    if username == default_username and password is None and password_on_default_user is not None:
        password = password_on_default_user

    if password is None:
        while True:
            log.log_input("Please specify a password:"******"Please reenter the password:"******"Entered passwords do not match. Please try again.")
    else:
        log.info("Password already entered.")
    return username, password
Ejemplo n.º 2
0
def connect_to_resource(resource):
    """connect to resource defined in resource dictionary"""
    log.info("Validating resource accessibility. Connecting to %s.",
             resource['name'])
    if resource['sshPrivateKeyFile'] is not None and os.path.isfile(
            resource['sshPrivateKeyFile']) is False:
        log.error("Can not access ssh private key (%s)"
                  "", resource['sshPrivateKeyFile'])
        exit(1)

    str_io = io.StringIO()
    try:
        sys.stdout = sys.stderr = str_io
        rsh = cfg.sshResource(resource)

        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__

        log.info("Successfully connected to %s\n", resource['name'])
        log.empty_line()

        return rsh
    except akrrError:
        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__

        log.critical("Can not connect to %s\nMessage:\n%s", resource['name'],
                     str_io.getvalue())
        exit(1)
Ejemplo n.º 3
0
def get_system_characteristics():
    """detect system characteristics or ask user about them"""
    global ppn
    while True:
        try:
            log.log_input("Enter processors (cores) per node count:")
            ppn = int(input(""))
            break
        except (ValueError, TypeError):
            log.error("Incorrect entry, try again.")
Ejemplo n.º 4
0
def check_shell(rsh, resource):
    log.info("Checking if shell is BASH\n")
    msg = cfg.sshCommand(rsh, "echo $BASH")
    if msg.count("bash") > 0:
        log.info("Shell is BASH\n")
    else:
        log.error(
            "Shell on headnode of %s is not BASH, change it to bash and try again.\n",
            resource['name'])
        exit(1)
Ejemplo n.º 5
0
    def check_previous_installation():
        if os.path.exists(akrr_cfg):
            msg = "This is a fresh installation script. " + akrr_home + \
                  " contains previous AKRR installation. Either uninstall it or see documentation on updates.\n\n"
            msg += "To uninstall AKRR manually:\n\t1)remove " + akrr_cfg + "\n\t\trm " + akrr_cfg + "\n"
            msg += "\t2) (optionally for totally fresh start) drop mod_akrr and mod_appkernel database\n"
            msg += "\t\tDROP DATABASE mod_appkernel;\n"
            msg += "\t\tDROP DATABASE mod_akrr;\n\n"

            log.error(msg)
            exit(1)
Ejemplo n.º 6
0
    def init_mysql_dbs(self):
        try:

            def _create_db_user_gran_priv_if_needed(con_fun, user, password,
                                                    db, priv):
                log.info("Creating %s and user to access it if needed" %
                         (db, ))
                su_con, su_cur = con_fun(True, None)
                client_host = get_db_client_host(su_cur)

                _cursor_execute(
                    su_cur, "CREATE DATABASE IF NOT EXISTS %s" % (cv(db), ))

                su_cur.execute(
                    "SELECT * FROM mysql.user WHERE User=%s AND Host=%s",
                    (user, client_host))
                if len(su_cur.fetchall()) == 0:
                    # Older version of MySQL do not support CREATE USER IF NOT EXISTS
                    # so need to do checking
                    _cursor_execute(su_cur,
                                    "CREATE USER %s@%s IDENTIFIED BY %s",
                                    (user, client_host, password))
                _cursor_execute(
                    su_cur,
                    "GRANT " + cv(priv) + " ON " + cv(db) + ".* TO %s@%s",
                    (user, client_host))

                su_con.commit()

            # During self.read_db_creds db and user was checked and
            # if they do not  exist or not good enough super user credentials
            # was asked so if they not None that means that
            # either user or db or user priv needed to be set
            if self.akrr_db_su_user_name is not None:
                _create_db_user_gran_priv_if_needed(self.get_akrr_db,
                                                    self.akrr_db_user_name,
                                                    self.akrr_db_user_password,
                                                    self.akrr_db_name, "ALL")
            if self.ak_db_su_user_name is not None:
                _create_db_user_gran_priv_if_needed(self.get_ak_db,
                                                    self.ak_db_user_name,
                                                    self.ak_db_user_password,
                                                    self.ak_db_name, "ALL")
            if self.xd_db_su_user_name is not None:
                _create_db_user_gran_priv_if_needed(self.get_xd_db,
                                                    self.xd_db_user_name,
                                                    self.xd_db_user_password,
                                                    self.xd_db_name, "SELECT")

        except Exception as e:
            import traceback
            traceback.print_exc()
            log.error("Can not execute the sql setup script: " + str(e))
            exit(1)
Ejemplo n.º 7
0
    def run(self):
        """execute what asked in command line"""
        log.info("AKRR Regression Tests")
        cli_args = self.root_parser.parse_args()

        self.process_common_args(cli_args)

        if hasattr(cli_args, "func"):
            cli_args.func(cli_args)
        else:
            log.error("There is no command specified!")
Ejemplo n.º 8
0
    def check_utils():
        from distutils.spawn import find_executable

        errmsg = ""
        if not find_executable('ssh'):
            errmsg += "Can not find ssh in PATH, please install it.\n"
        if not find_executable('openssl'):
            errmsg += "Can not find openssl in PATH, please install it.\n"

        if errmsg != "":
            log.error(errmsg)
            exit(1)
Ejemplo n.º 9
0
def check_connection_to_rest_api():
    # get check connection
    try:
        r = akrrrestclient.get('/scheduled_tasks')
        if r.status_code != 200:
            log.error(
                "Can not get token for AKRR REST API ( %s )\nSee server response below\n%s",
                akrrrestclient.restapi_host, json.dumps(r.json(), indent=4))
            exit(1)
    except Exception as e:
        log.critical(
            "Can not connect to AKRR REST API ( %s )\nIs it running?\nSee full error report below",
            akrrrestclient.restapi_host)
        raise e
Ejemplo n.º 10
0
def check_appsig(rsh, resource):
    log.info("Testing app.signature calculator on headnode\n")
    out = cfg.sshCommand(
        rsh, "%s/execs/bin/appsigcheck.sh `which md5sum`" %
        (resource['appKerDir'], ))
    if out.count("===ExeBinSignature===") > 0 and out.count("MD5:") > 0:
        log.info("App.signature calculator is working on headnode\n")
    else:
        if dry_run:
            log.dry_run("App.signature calculator is not working\n")
            return
        log.error(
            "App.signature calculator is not working\n" +
            "See full error report below\n%s", out)
        exit(1)
Ejemplo n.º 11
0
def _read_sql_su_credentials(host, port):
    while True:
        log.log_input(
            "Please provide an administrative database user (for {}:{}) "
            "under which the installation sql script should "
            "run (This user must have privileges to create "
            "users and databases).".format(host, port))
        su_username = input("Username: "******"Please provide the password for the the user which you previously entered:"
        )
        su_password = getpass.getpass()

        try:
            get_con_to_db(su_username, su_password, host, port)
            return su_username, su_password
        except Exception as e:
            log.error("MySQL error: " + str(e))
            log.error("Entered credential is not valid. Please try again.")
Ejemplo n.º 12
0
def check_rw_db(connection_func, pre_msg, post_msg):
    """
    Check that the user has the correct privileges to the database
    at the end of the connection provided by 'connection_func'. Specifically, checking
    for read / write permissions ( and create table ).

    :type connection_func function
    :type pre_msg str
    :type post_msg str

    :param connection_func: the function that will provide a (connection, cursor) tuple.
    :param pre_msg:         a message to be provided to the user before the checks begin.
    :param post_msg:        a message to be provided to the user after the checks are successful
    :return: true if the database is available / the provided user has the correct privileges.
    """
    success = False
    log.info(pre_msg)

    try:
        connection, cursor = connection_func()

        try:
            with connection:
                result = cursor.execute(
                    "CREATE TABLE CREATE_ME(`id` INT NOT NULL PRIMARY KEY, `name` VARCHAR(48));"
                )
                success = True if result == 0 else False

                if success:
                    log.info(post_msg, success)
                else:
                    log.error(post_msg, success)

        except MySQLdb.Error as e:
            log.error(
                'Unable to create a table w/ the provided username. %s: %s',
                e.args[0], e.args[1])

        connection, cursor = connection_func()
        try:
            with connection:
                cursor.execute("DROP TABLE CREATE_ME;")
        except MySQLdb.Error as e:
            log.error(
                'Unable to drop the table created to check permissions. %s: %s',
                e.args[0], e.args[1])

    except MySQLdb.Error as e:
        log.error('Unable to connect to Database. %s: %s', e.args[0],
                  e.args[1])

    return success
Ejemplo n.º 13
0
def db_check(mod_akrr=True, mod_appkernel=True, modw=True):
    from akrr import cfg

    overall_success = True

    # CHECK: the akrr db
    akrr_ok = check_rw_db(cfg.getDB,
                          "Checking 'mod_akrr' Database / User privileges...",
                          "'mod_akrr' Database check complete - Status: %s")

    if mod_akrr:
        overall_success = overall_success and akrr_ok

    # Check: the app_kernel db
    app_kernel_ok = check_rw_db(
        cfg.getAKDB, "Checking 'mod_appkernel' Database / User privileges...",
        "'mod_appkernel' Database check complete - Status: %s")

    if mod_appkernel:
        overall_success = overall_success and app_kernel_ok

    # CHECK: the XDMoD db
    xdmod_ok = check_r_db(cfg.getXDDB,
                          "Checking 'modw' Database / User privileges...",
                          "'modw' Database check complete - Status: %s")

    if modw:
        overall_success = overall_success and xdmod_ok

    # DETERMINE: whether or not everything passed.

    if overall_success:
        log.info("All Databases / User privileges check out!")
        return True
    else:
        log.error(
            "One or more of the required databases and their required users ran into a problem. Please take note of the previous messages, correct the issue and re-run this script."
        )
        return False
Ejemplo n.º 14
0
 def init_dir(self):
     try:
         log.info("Creating directories structure.")
         if not os.path.isdir(akrr_home):
             _make_dirs(akrr_home)
         if not os.path.isdir(os.path.join(akrr_home, 'etc')):
             _make_dirs(os.path.join(akrr_home, 'etc'))
         if not os.path.isdir(os.path.join(akrr_home, 'etc', 'resources')):
             _make_dirs(os.path.join(akrr_home, 'etc', 'resources'))
         if not os.path.isdir(os.path.join(akrr_home, 'etc', 'resources')):
             _make_dirs(os.path.join(akrr_home, 'etc', 'resources'))
         if not os.path.isdir(os.path.join(akrr_home, 'log')):
             _make_dirs(os.path.join(akrr_home, 'log'))
         if not os.path.isdir(os.path.join(akrr_home, 'log', 'data')):
             _make_dirs(os.path.join(akrr_home, 'log', 'data'))
         if not os.path.isdir(os.path.join(akrr_home, 'log', 'comptasks')):
             _make_dirs(os.path.join(akrr_home, 'log', 'comptasks'))
         if not os.path.isdir(os.path.join(akrr_home, 'log', 'akrrd')):
             _make_dirs(os.path.join(akrr_home, 'log', 'akrrd'))
     except Exception as e:
         log.error("Can not create directories: " + str(e))
         exit(1)
Ejemplo n.º 15
0
def _remove_from_crontab(remove_mailto=False):
    """remove from cron"""

    try:

        crontab_content = subprocess.check_output("crontab -l", shell=True)
    except subprocess.CalledProcessError:
        log.error("Can not run crontab -l")
        return

    new_crontab = False
    crontab_content = crontab_content.decode("utf-8").splitlines(True)

    with open(os.path.expanduser('.crontmp'), 'w') as f:
        for l in crontab_content:
            not_akrr = True
            if l.count('akrr') > 0 and (l.count('checknrestart.sh') > 0 or l.count('restart.sh') > 0):
                not_akrr = False
            if remove_mailto and l.count('MAILTO') > 0:
                not_akrr = False
            if not_akrr:
                f.write(l)
            else:
                new_crontab = True
    if new_crontab:
        log.info("AKRR Section present in crontab. Cleaning crontab.")
        try:
            if not dry_run:
                output = subprocess.check_output("crontab .crontmp", shell=True).decode("utf-8")
                log.debug(output)
            else:
                log.info("DRY RUN: should run `crontab .crontmp`. .crontmp:" + open(".crontmp", "rt").read())
        except subprocess.CalledProcessError:
            log.error("Can not run crontab .crontmp")
        os.remove(".crontmp")
    else:
        log.info("There was no AKRR records detected in crontab list")
Ejemplo n.º 16
0
def check_dir(sh, d, exit_on_fail=True, try_to_create=True):
    status, msg = check_dir_simple(sh, d)
    if try_to_create is True and status is None:
        log.info("Directory %s:%s does not exists, will try to create it",
                 sh.remotemachine, d)
        if not dry_run:
            cmd = "mkdir -p \"%s\"" % (d, )
            cfg.sshCommand(sh, cmd)
            status, msg = check_dir_simple(sh, d)
        else:
            status, msg = (True,
                           "Directory exist and accessible for read/write")
    if exit_on_fail is False:
        return status, msg

    if status is None:
        log.error("Directory %s:%s does not exists!", sh.remotemachine, d)
        exit()
    elif status is True:
        return True, msg
    else:
        log.error("Directory %s:%s is NOT accessible for read/write!",
                  sh.remotemachine, d)
        exit()
Ejemplo n.º 17
0
def resource_deploy(resource_name, test_appkernel=None, test_nodes=None, deploy_timeout=600, **_):
    bash = get_bash()
    bash.output = ""
    bash.timeoutMessage = 'Unexpected behavior of prep.sh (premature EOF or TIMEOUT)'

    bash.runcmd('which python3', printOutput=True)
    bash.runcmd('which ' + cfg.which_akrr, printOutput=True)

    # now deploy
    cmd = "{}{} resource deploy{}{}{}{}".format(
        cfg.which_akrr,
        " -v" if cfg.verbose else "",
        " -r " + resource_name,
        " -a {}".format(test_appkernel) if test_appkernel is not None else "",
        " --dry-run" if cfg.dry_run else "",
        " -n" if test_nodes is not None else "")
    bash.startcmd(cmd+" > out")

    bash.justExpect(bash.prompt, timeout=deploy_timeout)

    out=open("out", "rt").read()
    if out.count("you can move to next step")==0:
        log.error("Unsuccessful deployment\n"+out)
        exit(1)
Ejemplo n.º 18
0
def submit_test_job(resource, app_name="test", nodes=2):
    # submit test job
    r = None
    try:
        payload = {
            'resource': resource['name'],
            'app': app_name,
            'resource_param': "{'nnodes':%d}" % nodes,
            'task_param': "{'test_run':True}"
        }
        r = akrrrestclient.post('/scheduled_tasks', data=payload)
        if r.status_code != 200:
            log.error(
                "Can not submit task through AKRR REST API ( %s )\nSee server response below\n%s\n",
                akrrrestclient.restapi_host, json.dumps(r.json(), indent=4))
            exit(1)
        task_id = r.json()['data']['data']['task_id']
    except Exception as e:
        if r is not None:
            log.critical(
                "Can not submit task through AKRR REST API ( %s )\n"
                "Is it still running?\nSee full error report below\n%s",
                akrrrestclient.restapi_host, r.json())
        else:
            log.critical(
                "Can not submit task through AKRR REST API ( %s )\n"
                "Is it still running?\n", akrrrestclient.restapi_host)
        raise e

    # write file with task_id
    test_job_lock_filename = get_test_job_lock_filename(resource, app_name)
    with open(test_job_lock_filename, "w") as fout:
        print(task_id, file=fout)

    log.info("\nSubmitted test job to AKRR, task_id is %d\n", task_id)
    return task_id
Ejemplo n.º 19
0
def enable_resource_for_execution(resource):
    """populate mod_appkernel database and allow execution of jobs on this resource"""
    if dry_run:
        return
    resource_name = resource['name']
    try:
        con_ak, cur_ak = cfg.getAKDB(True)

        cur_ak.execute('''SELECT * FROM resource WHERE nickname=%s''',
                       (resource_name, ))
        resource_in_ak_db = cur_ak.fetchall()
        if len(resource_in_ak_db) == 0:
            log.warning(
                "There is no record of %s in mod_appkernel.resource will add one.",
                resource_name)
            cur_ak.execute(
                '''INSERT INTO resource (resource,nickname,description,enabled,visible)
                        VALUES(%s,%s,%s,0,0);''',
                (resource['name'], resource['name'], resource['info']))
            con_ak.commit()

            cur_ak.execute('''SELECT * FROM resource WHERE nickname=%s''',
                           (resource_name, ))
            resource_in_ak_db = cur_ak.fetchall()
        resource_in_ak_db = resource_in_ak_db[0]
        # enable and make visible
        cur_ak.execute(
            '''UPDATE resource
                        SET enabled=1,visible=1
                        WHERE resource_id=%s;''',
            (resource_in_ak_db['resource_id'], ))
        con_ak.commit()
        log.info(
            "Enabled %s in mod_appkernel.resource for tasks execution and made it visible to XDMoD UI.",
            resource_name)
    except MySQLdb.Error:
        log.error("Can not connect to AK DB\n" "Probably invalid credential")

    # enabling resource for execution
    try:
        r = akrrrestclient.put('/resources/' + resource_name + '/on')
        if r.status_code == 200:
            log.info('Successfully enabled ' + resource_name)
        else:
            log.error(
                "Can not enable resource through AKRR REST API ( %s )\nSee server response below\n%s",
                akrrrestclient.restapi_host, json.dumps(r.json(), indent=4))
    except requests.RequestException:
        log.error(
            "Can not enable resource through AKRR REST API ( %s )\n"
            "Is it still running?\n", akrrrestclient.restapi_host)
Ejemplo n.º 20
0
def validate_resource_name(m_resource_name):
    if m_resource_name.strip() == "":
        log.error("Bad name for resource, try a different name")
        return False
    # check config file presence
    file_path = os.path.abspath(os.path.join(resources_dir, m_resource_name))
    if os.path.exists(file_path):
        log.error(
            "Resource configuration directory (%s) for resource with name %s "
            "already present on file system, try a different name" % (
                file_path,
                m_resource_name,
            ))
        return False

    # check the entry in mod_appkernel
    con_ak, cur_ak = cfg.getAKDB(True)

    cur_ak.execute('''SELECT * FROM resource WHERE nickname=%s''',
                   (m_resource_name, ))
    resource_in_ak_db = cur_ak.fetchall()
    if len(resource_in_ak_db) != 0:
        log.error(
            "Resource with name %s already present in mod_appkernel DB, try a different name"
            % (m_resource_name, ))
        return False

    # check the entry in mod_akrr
    db, cur = cfg.getDB(True)

    cur.execute('''SELECT * FROM resources WHERE name=%s''',
                (m_resource_name, ))
    resource_in_db = cur.fetchall()
    if len(resource_in_db) != 0:
        log.error(
            "Resource with name %s already present in mod_akrr DB, try a different name"
            % (m_resource_name, ))
        return False

    return True
Ejemplo n.º 21
0
def check_r_db(connection_func, pre_msg, post_msg):
    """
    Check that the user has the correct privileges to the database
    at the end of the connection provided by 'connection_func'.
    Specifically checking for read permissions.

    :type connection_func function
    :type pre_msg str
    :type post_msg str

    :param connection_func: the function that will provide a (connection, cursor) tuple.
    :param pre_msg:         a message to be provided to the user before the checks begin.
    :param post_msg:        a message to be provided to the user after the checks are successful
    :return: true if the database is available / the provided user has the correct privileges.
    """
    success = False
    log.info(pre_msg)

    try:
        connection, cursor = connection_func()

        try:
            with connection:
                result = cursor.execute(
                    "SELECT COUNT(*) FROM `modw`.`resourcefact`;")
                success = True if result >= 0 else False

                if success:
                    log.info(post_msg, success)
                else:
                    log.error(post_msg, success)

        except MySQLdb.Error as e:
            log.error('Unable to select from `modw`.`resourcefact`. %s: %s',
                      e.args[0], e.args[1])

    except MySQLdb.Error as e:
        log.error('Unable to connect to Database. %s: %s', e.args[0],
                  e.args[1])

    return success
Ejemplo n.º 22
0
from akrr.util.sql import db_check_priv
from akrr.util.sql import get_db_client_host

# Python version
if sys.version_info.major < 3 or sys.version_info.minor < 4:
    log.critical("Python should be of version 3.4+. This one is " +
                 sys.version)
    exit(1)

# check openssl presence
try:
    subprocess.check_output("which openssl", shell=True)
except Exception as _e:
    log.error("""openssl program is not available. Install it!
    For example by running
    on CentOS
        sudo yum install openssl openssh-clients
    on Ubuntu:
        sudo apt-get install openssl""")
    raise _e

# AKRR configuration can be in three places
# 1) AKRR_CONF if AKRR_CONF environment variable is defined
# 2) ~/akrr/etc/akrr.conf if initiated from RPM or global python install
# 3) <path to AKRR sources>/etc/akrr.conf for in source installation

in_src_install = False

akrr_mod_dir = os.path.dirname(
    os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))))
akrr_bin_dir = None
if os.path.isfile(os.path.join(os.path.dirname(akrr_mod_dir), 'bin', 'akrr')):
Ejemplo n.º 23
0
def get_remote_access_method():
    global resource_name
    global remoteAccessNode
    global remoteAccessMethod
    global remoteCopyMethod
    global sshUserName
    global sshPassword
    global sshPassword4thisSession
    global sshPrivateKeyFile
    global sshPrivateKeyPassword
    global rsh
    global no_ping

    # set remoteAccessNode
    while True:
        log.log_input(
            "Enter Resource head node (access node) full name (e.g. headnode.somewhere.org):"
        )
        remoteAccessNode = input("[%s] " % resource_name)
        if remoteAccessNode.strip() == "":
            remoteAccessNode = resource_name

        response = os.system("ping -c 1 -w2 " + remoteAccessNode +
                             " > /dev/null 2>&1")

        if response == 0:
            break
        else:
            if no_ping:
                log.warning("Can not ping %s, but asked to ignore it.",
                            remoteAccessNode)
                break
            log.error("Incorrect head node name (can not ping %s), try again",
                      remoteAccessNode)

    # set sshUserName
    current_user = getpass.getuser()
    ask_for_user_name = True

    while True:
        if ask_for_user_name:
            log.log_input("Enter username for resource access:")
            sshUserName = input("[%s] " % current_user)
            if sshUserName.strip() == "":
                sshUserName = current_user
            current_user = sshUserName

        # check password-less access
        if sshPassword is None:
            log.info("Checking for password-less access")
        else:
            log.info("Checking for resource access")
        successfully_connected = check_connection_to_resource()

        if successfully_connected:
            if sshPassword is None:
                log.info("Can access resource without password")
            else:
                log.info("Can access resource")

        if successfully_connected is False:
            log.info("Can not access resource without password")
            action_list = [(
                "TryAgain",
                "The private and public keys was generated manually, right now. Try again."
            )]
            # check private keys
            user_home_dir = os.path.expanduser("~")
            private_keys = [
                os.path.join(user_home_dir, '.ssh', f[:-4])
                for f in os.listdir(os.path.join(user_home_dir, '.ssh'))
                if os.path.isfile(os.path.join(user_home_dir, '.ssh', f))
                and f[-4:] == '.pub'
                and os.path.isfile(os.path.join(user_home_dir, '.ssh', f[:-4]))
            ]

            if len(private_keys) > 0:
                action_list.append(("UseExistingPrivateKey",
                                    "Use existing private and public key."))

            action_list.append(
                ("GenNewKey", "Generate new private and public key."))
            action_list.append(("UsePassword", "Use password directly."))
            log.empty_line()

            log.info("Select authentication method:" + "\n".join([
                "%3d  %s" % (i, desc)
                for i, (_, desc) in enumerate(action_list)
            ]))
            while True:
                log.log_input("Select option from list above:")
                try:
                    action = input("[2] ")
                    if action.strip() == "":
                        action = 2
                    else:
                        action = int(action)

                    if action < 0 or action >= len(action_list):
                        raise ValueError()
                    break
                except (ValueError, TypeError):
                    log.error("Incorrect entry, try again.")

            # do the action
            log.empty_line()
            if action_list[action][0] == "TryAgain":
                continue
            if action_list[action][0] == "UsePassword":
                log.log_input("Enter password for %s@%s:" %
                              (sshUserName, remoteAccessNode))
                sshPassword = getpass.getpass("")
                ask_for_user_name = not ask_for_user_name
                continue
            if action_list[action][0] == "UseExistingPrivateKey":
                log.info("Available private keys:" + "\n".join(
                    ["%3d  %s" % (i, p) for i, p in enumerate(private_keys)]))
                while True:
                    log.log_input("Select key number from list above:")
                    try:
                        i_key = input("")
                        i_key = int(i_key)

                        if i_key < 0 or i_key >= len(private_keys):
                            raise ValueError()
                        break
                    except (ValueError, TypeError):
                        log.error("Incorrect entry, try again.")
                sshPrivateKeyFile = private_keys[i_key]
                ask_for_user_name = not ask_for_user_name
                continue
            if action_list[action][0] == "GenNewKey":
                count = 0
                while True:
                    log.log_input(
                        "Enter password for %s@%s (will be used only during this session):"
                        % (sshUserName, remoteAccessNode))
                    sshPassword4thisSession = getpass.getpass("")
                    sshPassword = sshPassword4thisSession

                    if check_connection_to_resource():
                        break
                    count += 1
                    if count >= 3:
                        break
                sshPassword = None
                # generate keys
                log.log_input("Enter private key name:")
                sshPrivateKeyFile = input("[id_rsa_%s]" % resource_name)
                if sshPrivateKeyFile.strip() == "":
                    sshPrivateKeyFile = "id_rsa_%s" % resource_name
                sshPrivateKeyFile = os.path.join(user_home_dir, '.ssh',
                                                 sshPrivateKeyFile)
                log.log_input(
                    "Enter passphrase for new key (leave empty for passwordless access):"
                )
                sshPrivateKeyPassword = getpass.getpass("")
                os.system("ssh-keygen -t rsa -N \"%s\" -f %s" %
                          (sshPrivateKeyPassword, sshPrivateKeyFile))
                if sshPrivateKeyPassword.strip() == "":
                    sshPrivateKeyPassword = None
                # copy keys
                cfg.sshAccess(remoteAccessNode,
                              ssh='ssh-copy-id',
                              username=sshUserName,
                              password=sshPassword4thisSession,
                              PrivateKeyFile=sshPrivateKeyFile,
                              PrivateKeyPassword=None,
                              logfile=sys.stdout,
                              command='')
                ask_for_user_name = not ask_for_user_name
                continue

        if successfully_connected:
            break
        else:
            log.error("Incorrect resource access credential")

    if successfully_connected:
        log.empty_line()
        log.info("Connecting to " + resource_name)

        str_io = io.StringIO()
        try:
            sys.stdout = sys.stderr = str_io
            rsh = cfg.sshAccess(remoteAccessNode,
                                ssh=remoteAccessMethod,
                                username=sshUserName,
                                password=sshPassword,
                                PrivateKeyFile=sshPrivateKeyFile,
                                PrivateKeyPassword=sshPrivateKeyPassword,
                                logfile=sys.stdout,
                                command=None)
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
        except Exception as e:
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
            log.debug(str_io.getvalue())
            raise e

        log.info("              Done")
    log.empty_line()
    return successfully_connected
Ejemplo n.º 24
0
def resource_add(config):
    """add resource, config should have following members
        dry_run - Dry Run No files will actually be created
        minimalistic - Minimize questions number, configuration files will be edited manually
        no-ping - do not run ping to test headnode name
        verbose
    """
    global verbose
    global dry_run
    global no_ping
    global minimalistic
    global resource_name
    global remoteAccessNode
    global remoteAccessMethod
    global remoteCopyMethod
    global sshUserName
    global sshPassword
    global sshPrivateKeyFile
    global sshPrivateKeyPassword
    global networkScratch
    global localScratch
    global akrrData
    global appKerDir
    global batchScheduler
    global batchJobHeaderTemplate

    if config.verbose:
        verbose = True

    log.info("Beginning Initiation of New Resource...")
    verbose = config.verbose
    dry_run = config.dry_run
    resource_deploy.dry_run = config.dry_run
    no_ping = config.no_ping
    minimalistic = config.minimalistic

    log.info("Retrieving Resources from XDMoD Database...")
    # RETRIEVE: the resources from XDMoD
    resources = retrieve_resources()
    log.info("Found following resources from XDMoD Database:\n" +
             "    resource_id  name\n" + "\n".join([
                 "    %11d  %-40s" % (resource_id, resource_name)
                 for resource_name, resource_id in resources
             ]) + "\n")

    if len(resources) > 0:
        while True:
            log.log_input(
                'Enter resource_id for import (enter 0 for no match):')
            resource_id = input()
            if validate_resource_id(resource_id, resources):
                break
            log.warning("Incorrect resource_id try again")
        log.empty_line()
        resource_id = int(resource_id)
    else:
        resource_id = 0

    if resource_id <= 0:  # i.e. no match from XDMoD DB
        resource_id = None

    resource_name = ""
    while True:
        if resource_id is None:
            log.log_input('Enter AKRR resource name:')
            resource_name = input()
        else:
            resource_name2 = get_resource_name_by_id(resource_id, resources)
            log.log_input(
                'Enter AKRR resource name, hit enter to use same name as in XDMoD Database [%s]:'
                % (resource_name2, ))
            resource_name = input()
            if resource_name.strip() == "":
                resource_name = resource_name2

        if validate_resource_name(resource_name):
            break
    log.empty_line()

    while True:
        log.log_input('Enter queuing system on resource (slurm or pbs): ')
        queuing_system = input()
        if validate_queuing_system(queuing_system):
            break
        else:
            log.error("Incorrect queuing_system try again")

    batchScheduler = queuing_system
    log.empty_line()

    if minimalistic is False:
        get_remote_access_method()
        get_system_characteristics()
        get_file_system_access_points()

    log.debug("Summary of parameters" +
              "resource_name: {}".format(resource_name) +
              "remoteAccessNode: {}".format(remoteAccessNode) +
              "remoteAccessMethod: {}".format(remoteAccessMethod) +
              "remoteCopyMethod: {}".format(remoteCopyMethod) +
              "sshUserName: {}".format(sshUserName) +
              "sshPassword: {}".format(sshPassword) +
              "sshPrivateKeyFile: {}".format(sshPrivateKeyFile) +
              "sshPrivateKeyPassword: {}".format(sshPrivateKeyPassword) +
              "networkScratch: {}".format(networkScratch) +
              "localScratch: {}".format(localScratch) +
              "akrrData: {}".format(akrrData) +
              "appKerDir: {}".format(appKerDir) +
              "batchScheduler: {}".format(batchScheduler) +
              "batchJobHeaderTemplate: {}".format(batchJobHeaderTemplate) +
              "\n")

    generate_resource_config(resource_id, resource_name, queuing_system)
    log.info("Initiation of new resource is completed.\n"
             "    Edit batchJobHeaderTemplate variable in {}\n"
             "    and move to resource validation and deployment step.\n"
             "    i.e. execute:\n"
             "        akrr resource deploy -r {}".format(
                 resource_cfg_filename, resource_name))
Ejemplo n.º 25
0
def get_file_system_access_points():
    global resource_name
    global networkScratch
    global localScratch
    global akrrData
    global appKerDir

    home_dir = cfg.sshCommand(rsh, "echo $HOME").strip()
    scratch_network_dir = cfg.sshCommand(rsh, "echo $SCRATCH").strip()

    # localScratch
    local_scratch_default = "/tmp"
    while True:
        log.log_input(
            "Enter location of local scratch (visible only to single node):")
        localScratch = input("[%s]" % local_scratch_default)
        if localScratch.strip() == "":
            localScratch = local_scratch_default
        status, msg = resource_deploy.check_dir_simple(rsh, localScratch)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.warning(msg)
            log.warning(
                'local scratch might be have a different location on head node, so if it is by design it is ok'
            )
            log.empty_line()
            break
    localScratch = cfg.sshCommand(rsh, "echo %s" % (localScratch, )).strip()
    # networkScratch
    network_scratch_default = ""
    if scratch_network_dir != "":
        network_scratch_default = scratch_network_dir
    network_scratch_visible = False
    while True:
        log.log_input(
            "Enter location of network scratch (visible only to all nodes),"
            "used for temporary storage of app kernel input/output:")
        if network_scratch_default != "":
            networkScratch = input("[%s]" % network_scratch_default)
            if networkScratch.strip() == "":
                networkScratch = network_scratch_default
        else:
            networkScratch = input("")

        if networkScratch == "":
            log.error("Incorrect value for networkScratch, try again")
            continue

        status, msg = resource_deploy.check_dir(rsh,
                                                networkScratch,
                                                exit_on_fail=False,
                                                try_to_create=True)
        if status:
            log.info(msg)
            network_scratch_visible = True
            log.empty_line()
            break
        else:
            log.warning(msg)
            break
    networkScratch = cfg.sshCommand(rsh,
                                    "echo %s" % (networkScratch, )).strip()
    # appKerDir
    appker_dir_default = os.path.join(home_dir, "appker", resource_name)
    while True:
        log.log_input(
            "Enter future location of app kernels input and executable files:")
        appKerDir = input("[%s]" % appker_dir_default)
        if appKerDir.strip() == "":
            appKerDir = appker_dir_default
        status, msg = resource_deploy.check_dir(rsh,
                                                appKerDir,
                                                exit_on_fail=False,
                                                try_to_create=True)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.error(msg)
    appKerDir = cfg.sshCommand(rsh, "echo %s" % (appKerDir, )).strip()
    # akrrData
    akrr_data_default = os.path.join(home_dir, "akrr_data", resource_name)
    if network_scratch_visible:
        akrr_data_default = os.path.join(networkScratch, "akrr_data",
                                         resource_name)
    while True:
        log.log_input(
            "Enter future locations for app kernels working directories (can or even should be on scratch space):"
        )
        akrrData = input("[%s]" % akrr_data_default)
        if akrrData.strip() == "":
            akrrData = akrr_data_default
        status, msg = resource_deploy.check_dir(rsh,
                                                akrrData,
                                                exit_on_fail=False,
                                                try_to_create=True)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.error(msg)
    akrrData = cfg.sshCommand(rsh, "echo %s" % (akrrData, )).strip()
Ejemplo n.º 26
0
def resource_deploy(args):
    global dry_run
    global checking_frequency

    resource_name = args.resource

    if 'dry_run' in args:
        dry_run = args.dry_run

    if "checking_frequency" in args:
        checking_frequency = args.checking_frequency

    if "appkernel" in args:
        app_name = args.appkernel
    else:
        app_name = "test"

    if "nodes" in args:
        nodes = int(args.nodes)
    else:
        nodes = 2

    log.error_count = 0
    log.warning_count = 0

    # validate resource configuration and get config
    resource = validate_resource_parameter_file(resource_name)

    # connect to resource
    rsh = connect_to_resource(resource)

    # do tests
    check_shell(rsh, resource)
    check_create_dirs(rsh, resource)

    # deploy inputs and sources
    copy_exec_sources_and_inputs(rsh, resource)

    # check that app.signature calculator on headnode
    check_appsig(rsh, resource)

    # close connection we don't need it any more
    rsh.close(force=True)
    del rsh

    # run test job to queue
    run_test_job(resource, app_name, nodes)

    if log.error_count == 0:
        append_to_bashrc(resource)
        enable_resource_for_execution(resource)
    log.empty_line()

    log.info("Result:")
    if log.error_count > 0:
        log.error("There are %d errors, fix them.", log.error_count)

    if log.warning_count > 0:
        log.warning(
            "There are %d warnings.\nif warnings have sense you can move to next step!\n",
            log.warning_count)
    if log.error_count == 0 and log.warning_count == 0:
        log.info("\nDONE, you can move to next step!\n")
Ejemplo n.º 27
0
def validate_resource_parameter_file(resource_name):
    """validate resource parameter file and return dictionary with resource configuration"""
    default_resource_param_filename = os.path.join(cfg.akrr_mod_dir,
                                                   "default_conf",
                                                   "default.resource.conf")
    resource_param_filename = os.path.join(cfg.cfg_dir, "resources",
                                           resource_name, "resource.conf")

    log.info("Validating %s parameters from %s", resource_name,
             resource_param_filename)

    if not os.path.isfile(resource_param_filename):
        log.error("resource parameters file (%s) does not exist!",
                  resource_param_filename)
        exit(1)

    # check syntax
    try:
        tmp = {}
        exec(
            compile(
                open(default_resource_param_filename).read(),
                default_resource_param_filename, 'exec'), tmp)
        exec(
            compile(
                open(resource_param_filename).read(), resource_param_filename,
                'exec'), tmp)
    except Exception as e:
        log.critical(
            "Can not load resource from %s.\nProbably invalid syntax.",
            resource_param_filename)
        raise e

    # now we can load akrr
    resource = cfg.FindResourceByName(resource_name)

    # check that parameters for presents and type
    # format: key,type,can be None,must have parameter
    parameters_types = [
        ['info', str, False, False],
        ['localScratch', str, False, True],
        ['batchJobTemplate', str, False, True],
        ['remoteAccessNode', str, False, True],
        ['name', str, False, False],
        ['akrrCommonCommandsTemplate', str, False, True],
        ['networkScratch', str, False, True],
        ['ppn', int, False, True],
        # ['akrrStartAppKerTemplate',      types.StringType,       False,True],
        ['remoteCopyMethod', str, False, True],
        ['sshUserName', str, False, True],
        ['sshPassword', str, True, False],
        ['sshPrivateKeyFile', str, True, False],
        ['sshPrivateKeyPassword', str, True, False],
        ['batchScheduler', str, False, True],
        ['remoteAccessMethod', str, False, True],
        ['appKerDir', str, False, True],
        ['akrrCommonCleanupTemplate', str, False, True],
        # ['nodeListSetterTemplate',      types.StringType,       False,True],
        ['akrrData', str, False, True]
    ]

    for variable, m_type, can_be_none, must in parameters_types:
        if (must is True) and (variable not in resource):
            log.error("Syntax error in %s\nVariable %s is not set",
                      resource_param_filename, variable)
            exit(1)
        if variable not in resource:
            continue
        if resource[variable] is None and can_be_none is False:
            log.error("Syntax error in %s\nVariable %s can not be None",
                      resource_param_filename, variable)
            exit(1)
        if not isinstance(resource[variable], m_type) and not (
                resource[variable] is None and can_be_none):
            log.error(
                "Syntax error in %s\nVariable %s should be %s, but it is %s !",
                resource_param_filename, variable, str(m_type),
                type(resource[variable]))
            exit(1)

    log.info(
        "Syntax of %s is correct and all necessary parameters are present.",
        resource_param_filename)
    log.empty_line()
    return resource
Ejemplo n.º 28
0
def analyse_test_job_results(task_id, resource, app_name="test"):
    """analysing the output"""
    log.info("Test job is completed analyzing output\n")
    test_job_lock_filename = get_test_job_lock_filename(resource, app_name)
    r = akrrrestclient.get('/tasks/%d' % task_id)

    if r.status_code != 200:
        log.error(
            "Can not get information about task\nSee full error report below\nAKRR server response:\n%s\n",
            r.text)
        exit(1)

    completed_tasks = r.json()['data']['data']['completed_tasks']
    akrr_xdmod_instance_info = r.json(
    )['data']['data']['akrr_xdmod_instanceinfo']
    akrr_errmsg = r.json()['data']['data']['akrr_errmsg']

    results_summary = make_results_summary(resource['name'], app_name,
                                           completed_tasks,
                                           akrr_xdmod_instance_info,
                                           akrr_errmsg)

    if completed_tasks['status'].count("ERROR") > 0:
        # execution was not successful
        if completed_tasks['status'].count(
                "ERROR Can not created batch job script and submit it to remote queue"
        ) > 0:
            log.error(
                "Can not created batch job script and/or submit it to remote queue\nSee full error report below\n%s",
                results_summary)
        else:
            log.error("Status: %s\nSee full error report below\n%s",
                      completed_tasks['status'], results_summary)
        os.remove(test_job_lock_filename)
        exit(1)

    if akrr_xdmod_instance_info['status'] == 0:
        # execution was not successful
        log.error(
            "Task execution was not successful\nSee full error report below\n%s",
            results_summary)
        os.remove(test_job_lock_filename)
        exit(1)

    # see what is in report
    elm_perf = xml.etree.ElementTree.fromstring(
        akrr_xdmod_instance_info['body'])
    elm_parameters = elm_perf.find('benchmark').find('parameters')
    elm_statistics = elm_perf.find('benchmark').find('statistics')

    parameters = {'RunEnv:Nodes': '', 'App:ExeBinSignature': ''}
    statistics = {
        'Wall Clock Time': '0.0',
        'Network scratch directory exists': '0',
        'Network scratch directory accessible': '0',
        'App kernel input exists': '0',
        'Task working directory accessible': '0',
        'local scratch directory accessible': '0',
        'local scratch directory exists': '0',
        'App kernel executable exists': '0',
        'Task working directory exists': '0',
        'Shell is BASH': '0'
    }

    for elm in list(elm_parameters):
        variable = elm.findtext('ID')
        if variable is not None:
            variable = variable.strip()
        value = elm.findtext('value')
        if value is not None:
            value = value.strip()
        units = elm.findtext('units')
        if units is not None:
            units = units.strip()

        if variable == 'App:ExeBinSignature' or variable == 'RunEnv:Nodes':
            value = os.popen('echo "%s"|base64 -d|gzip -d' % (value, )).read()

        log.debug2("parameter: {} = {} {}".format(variable, value, units))
        parameters[variable] = value

    for elm in list(elm_statistics):
        variable = elm.findtext('ID')
        if variable is not None:
            variable = variable.strip()
        value = elm.findtext('value')
        if value is not None:
            value = value.strip()
        units = elm.findtext('units')
        if units is not None:
            units = units.strip()

        statistics[variable] = value
        log.debug2("statistic: {} = {} {}".format(variable, value, units))

    files_exists = [
        'Network scratch directory exists', 'App kernel input exists',
        'local scratch directory exists', 'App kernel executable exists',
        'Task working directory exists'
    ]
    dirs_access = [
        'Network scratch directory accessible',
        'Task working directory accessible',
        'local scratch directory accessible'
    ]

    if statistics['Shell is BASH'] == '0':
        log.error(
            "Shell on compute nodes of %s is not BASH, change it to bash and try again.\n",
            resource['name'])
        log.error_count += 1
    for file_exists in files_exists:
        if statistics[file_exists] == '0':
            log.error(file_exists.replace('exists', 'does not exist'))
            log.error_count += 1
    for dirAccess in dirs_access:
        if statistics[dirAccess] == '0':
            log.error(dirAccess.replace('accessible', 'is not accessible'))
            log.error_count += 1

    if parameters['App:ExeBinSignature'] == '':
        log.error(
            "Application signature calculator is not working, you might need to recompile it."
            "see application output for more hints")
        log.error_count += 1

    # test the nodes, log to headnode and ping them
    if parameters['RunEnv:Nodes'] == '':
        log.error(
            "Nodes are not detected, check batchJobTemplate and setup of AKRR_NODELIST variable"
        )
        log.error_count += 1

    nodes = parameters['RunEnv:Nodes'].split()

    requested_nodes = eval(completed_tasks['resource_param'])['nnodes']

    str_io = io.StringIO()
    try:
        sys.stdout = sys.stderr = str_io
        rsh = cfg.sshResource(resource)

        number_of_unknown_hosts = 0
        for node in set(nodes):
            log.debug2(node)
            out = cfg.sshCommand(rsh, "ping -c 1 %s" % node)
            if out.count("unknown host") > 0:
                number_of_unknown_hosts += 1

        rsh.close(force=True)
        del rsh

        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__

        if number_of_unknown_hosts > 0:
            log.error(
                "ERROR %d: Can not ping compute nodes from head node\n" %
                (log.error_count + 1) +
                "Nodes on which test job was executed detected as " +
                parameters['RunEnv:Nodes'] + "\n" +
                "If these names does not have sense check batchJobTemplate and setup of AKRR_NODELIST "
                "variable in resource configuration file")
            log.error_count += 1
    except Exception as e:
        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
        log.critical(
            "Can not connect to %s\nProbably invalid credential, see full error report:\n%s",
            resource['name'], str_io.getvalue())
        raise e

    # check ppn count
    if requested_nodes * resource['ppn'] != len(nodes):
        log.error(
            "ERROR {}: Number of requested processes (processes per node * nodes) "
            "do not match actual processes executed"
            "Either\n"
            "    AKRR_NODELIST variable is set incorrectly\n"
            "Or\n"
            "    processes per node (PPN) is wrong\n".format(log.error_count +
                                                             1))
        log.error_count += 1
    log.info("\nTest kernel execution summary:\n%s", results_summary)
    log.info("\nThe output looks good.\n")
Ejemplo n.º 29
0
def check_connection_to_resource():
    """check the connection to remote resource."""
    global remoteAccessNode
    global remoteAccessMethod
    global remoteCopyMethod
    global sshUserName
    global sshPassword
    global sshPassword4thisSession
    global sshPrivateKeyFile
    global sshPrivateKeyPassword

    successfully_connected = False
    passphrase_entrance_count = 0
    authorize_key_count = 0
    while True:
        # Try to connect
        str_io = io.StringIO()
        try:
            sys.stdout = sys.stderr = str_io
            cfg.sshAccess(remoteAccessNode,
                          ssh=remoteAccessMethod,
                          username=sshUserName,
                          password=sshPassword,
                          PrivateKeyFile=sshPrivateKeyFile,
                          PrivateKeyPassword=sshPrivateKeyPassword,
                          logfile=str_io,
                          command='ls')

            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__

            successfully_connected = True
            break
        except Exception:
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
            response = str_io.getvalue()

            log.debug(
                "Had attempted to access resource without password and failed, below is resource response"
                + "=" * 80 + str_io.getvalue() + "=" * 80)

            # check if it asking for passphrase
            m = re.search(r"Enter passphrase for key '(.*)':", response)
            if m:
                if passphrase_entrance_count >= 3:
                    sshPrivateKeyPassword = None
                    sshPrivateKeyFile = None
                    break
                if passphrase_entrance_count > 0:
                    log.error("Incorrect passphrase try again")
                sshPrivateKeyFile = m.group(1)
                log.log_input("Enter passphrase for key '%s':" %
                              sshPrivateKeyFile)
                sshPrivateKeyPassword = getpass.getpass("")
                passphrase_entrance_count += 1
                continue
            m2 = re.search(r"[pP]assword:", response)
            if m is None and sshPrivateKeyFile is not None and m2:
                log.warning(
                    "Can not login to head node. "
                    "Probably the public key of private key was not authorized on head node"
                )
                log.info(
                    "Will try to add public key to list of authorized keys on head node"
                )
                while True:
                    try:
                        authorize_key_count += 1
                        log.log_input(
                            "Enter password for %s@%s (will be used only during this session):"
                            % (sshUserName, remoteAccessNode))
                        sshPassword4thisSession = getpass.getpass("")
                        log.empty_line()
                        str_io = io.StringIO()
                        sys.stdout = sys.stderr = str_io
                        cfg.sshAccess(remoteAccessNode,
                                      ssh='ssh-copy-id',
                                      username=sshUserName,
                                      password=sshPassword4thisSession,
                                      PrivateKeyFile=sshPrivateKeyFile,
                                      PrivateKeyPassword=None,
                                      logfile=str_io,
                                      command='')

                        sys.stdout = sys.__stdout__
                        sys.stderr = sys.__stderr__
                        log.info(response)

                        log.info(
                            "Have added public key to list of authorized keys on head node, "
                            "will attempt to connect again.")
                        log.empty_line()
                        break
                    except Exception:
                        sys.stdout = sys.__stdout__
                        sys.stderr = sys.__stderr__
                        if verbose:
                            log.debug(
                                "Had attempted to add public key to list of authorized keys on head node and failed, "
                                + "below is resource response" + "=" * 80 +
                                str_io.getvalue() + "=" * 80)
                        log.error("Incorrect password try again.")
                        if authorize_key_count >= 3:
                            break
                if authorize_key_count < 3:
                    continue
            break
    return successfully_connected
Ejemplo n.º 30
0
def monitor_test_job(task_id):
    """monitor the job progress, wait till job is done """
    completed_tasks = None
    akrr_xdmod_instance_info = None
    akrr_errmsg = None

    msg_body_prev = ""

    bad_cycles = 0
    while True:
        t = datetime.datetime.now()

        r = akrrrestclient.get('/tasks/' + str(task_id))

        response_json = r.json()

        if r.status_code == 200:
            response_json = r.json()

            msg_body = "Test status:\n"

            if response_json["data"]["queue"] == "scheduled_tasks":
                msg_body += "Task is in scheduled_tasks queue.\n"
                msg_body += "It schedule to be started on" + response_json[
                    "data"]["data"]['time_to_start'] + "\n"
            elif response_json["data"]["queue"] == "active_tasks":
                msg_body += "Task is in active_tasks queue.\n"
                msg_body += "Status: " + str(
                    response_json["data"]["data"]['status']) + "\n"
                msg_body += "Status info:\n" + str(
                    response_json["data"]["data"]['statusinfo']) + "\n"
            elif response_json["data"]["queue"] == "completed_tasks":
                msg_body += "Task is completed!\n"
                completed_tasks = r.json()['data']['data']['completed_tasks']
                akrr_xdmod_instance_info = r.json(
                )['data']['data']['akrr_xdmod_instanceinfo']
                akrr_errmsg = r.json()['data']['data']['akrr_errmsg']
                if log.verbose:
                    msg_body += "completed_tasks table entry:\n" + pp.pformat(
                        completed_tasks) + "\n"
                    msg_body += "akrr_xdmod_instanceinfo table entry:\n" + pp.pformat(
                        akrr_xdmod_instance_info) + "\n"
                    msg_body += 'output parsing results:\n' + akrr_xdmod_instance_info[
                        'body'] + "\n"
                else:
                    msg_body += "\tstatus: " + str(
                        akrr_xdmod_instance_info['status']) + "\n"
                    if akrr_xdmod_instance_info['status'] == 0:
                        msg_body += "\tstatus2: " + completed_tasks[
                            'status'] + "\n"
                    msg_body += "\tstatusinfo: " + completed_tasks[
                        'statusinfo'] + "\n"
            else:
                msg_body += r.text + "\n"

            tail_msg = "time: " + t.strftime("%Y-%m-%d %H:%M:%S")

            if msg_body != msg_body_prev:
                print("\n\n" + msg_body)
                print(tail_msg, end=' ')
                sys.stdout.flush()
            else:
                print("\r" + tail_msg, end=' ')
                sys.stdout.flush()

            msg_body_prev = copy.deepcopy(msg_body)

            if response_json["data"]["queue"] == "completed_tasks":
                break
        else:
            bad_cycles += 1
            if bad_cycles > 3:
                log.error("Something wrong, REST API said: %s", response_json)
                break

        # try to update:
        try:
            payload = {'next_check_time': ''}
            akrrrestclient.put('/active_tasks/' + str(task_id), data=payload)
        except Exception as e:
            bad_cycles += 1
            if bad_cycles > 10:
                log.error("Something wrong with REST API")
                raise e

        time.sleep(checking_frequency)
    return completed_tasks, akrr_xdmod_instance_info, akrr_errmsg